From e47c9d869a6fbba2b0405137558570b3afb40a13 Mon Sep 17 00:00:00 2001 From: jeremiah Date: Fri, 19 Apr 2024 14:43:46 +0800 Subject: [PATCH] FEAT(loongarch): Support loongarch with 11.0.22 --story=117064695 --- make/CompileJavaModules.gmk | 3 + make/autoconf/hotspot.m4 | 31 +- make/autoconf/platform.m4 | 12 + .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 8 +- .../cpu/aarch64/c1_LIRGenerator_aarch64.cpp | 19 +- src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp | 21 + src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 7 + src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp | 19 +- src/hotspot/cpu/arm/c1_LIR_arm.cpp | 21 + .../abstractInterpreter_loongarch.cpp | 132 + .../cpu/loongarch/assembler_loongarch.cpp | 849 + .../cpu/loongarch/assembler_loongarch.hpp | 2827 ++++ .../loongarch/assembler_loongarch.inline.hpp | 33 + src/hotspot/cpu/loongarch/bytes_loongarch.hpp | 73 + .../loongarch/c1_CodeStubs_loongarch_64.cpp | 344 + .../cpu/loongarch/c1_Defs_loongarch.hpp | 79 + .../loongarch/c1_FpuStackSim_loongarch.hpp | 32 + .../loongarch/c1_FpuStackSim_loongarch_64.cpp | 31 + .../cpu/loongarch/c1_FrameMap_loongarch.hpp | 143 + .../loongarch/c1_FrameMap_loongarch_64.cpp | 354 + .../loongarch/c1_LIRAssembler_loongarch.hpp | 83 + .../c1_LIRAssembler_loongarch_64.cpp | 3387 ++++ .../c1_LIRGenerator_loongarch_64.cpp | 1396 ++ .../cpu/loongarch/c1_LIR_loongarch_64.cpp | 75 + .../cpu/loongarch/c1_LinearScan_loongarch.hpp | 70 + .../loongarch/c1_LinearScan_loongarch_64.cpp | 33 + .../loongarch/c1_MacroAssembler_loongarch.hpp | 112 + .../c1_MacroAssembler_loongarch_64.cpp | 344 + .../loongarch/c1_Runtime1_loongarch_64.cpp | 1138 ++ .../cpu/loongarch/c1_globals_loongarch.hpp | 71 + .../cpu/loongarch/c2_globals_loongarch.hpp | 94 + .../cpu/loongarch/c2_init_loongarch.cpp | 37 + .../cpu/loongarch/codeBuffer_loongarch.hpp | 35 + .../cpu/loongarch/compiledIC_loongarch.cpp | 148 + src/hotspot/cpu/loongarch/copy_loongarch.hpp | 77 + .../cpu/loongarch/depChecker_loongarch.cpp | 30 + .../cpu/loongarch/depChecker_loongarch.hpp | 31 + .../cpu/loongarch/disassembler_loongarch.hpp | 37 + src/hotspot/cpu/loongarch/frame_loongarch.cpp | 690 + src/hotspot/cpu/loongarch/frame_loongarch.hpp | 171 + .../cpu/loongarch/frame_loongarch.inline.hpp | 252 + .../gc/g1/g1BarrierSetAssembler_loongarch.cpp | 523 + .../gc/g1/g1BarrierSetAssembler_loongarch.hpp | 71 + .../shared/barrierSetAssembler_loongarch.cpp | 255 + .../shared/barrierSetAssembler_loongarch.hpp | 88 + ...cardTableBarrierSetAssembler_loongarch.cpp | 140 + ...cardTableBarrierSetAssembler_loongarch.hpp | 44 + .../modRefBarrierSetAssembler_loongarch.cpp | 53 + .../modRefBarrierSetAssembler_loongarch.hpp | 54 + .../loongarch/globalDefinitions_loongarch.hpp | 53 + .../cpu/loongarch/globals_loongarch.hpp | 109 + .../cpu/loongarch/icBuffer_loongarch.cpp | 92 + .../cpu/loongarch/icache_loongarch.cpp | 42 + .../cpu/loongarch/icache_loongarch.hpp | 41 + .../cpu/loongarch/interp_masm_loongarch.hpp | 281 + .../loongarch/interp_masm_loongarch_64.cpp | 2043 +++ .../cpu/loongarch/interpreterRT_loongarch.hpp | 62 + .../loongarch/interpreterRT_loongarch_64.cpp | 273 + .../loongarch/javaFrameAnchor_loongarch.hpp | 87 + .../jniFastGetField_loongarch_64.cpp | 166 + .../cpu/loongarch/jniTypes_loongarch.hpp | 144 + .../jvmciCodeInstaller_loongarch.cpp | 199 + src/hotspot/cpu/loongarch/loongarch.ad | 25 + src/hotspot/cpu/loongarch/loongarch_64.ad | 13917 ++++++++++++++++ .../loongarch/macroAssembler_loongarch.cpp | 4567 +++++ .../loongarch/macroAssembler_loongarch.hpp | 825 + .../macroAssembler_loongarch.inline.hpp | 34 + .../macroAssembler_loongarch_trig.cpp | 1625 ++ .../cpu/loongarch/methodHandles_loongarch.cpp | 564 + .../cpu/loongarch/methodHandles_loongarch.hpp | 62 + .../cpu/loongarch/nativeInst_loongarch.cpp | 511 + .../cpu/loongarch/nativeInst_loongarch.hpp | 528 + .../cpu/loongarch/registerMap_loongarch.hpp | 47 + .../register_definitions_loongarch.cpp | 103 + .../cpu/loongarch/register_loongarch.cpp | 59 + .../cpu/loongarch/register_loongarch.hpp | 495 + .../cpu/loongarch/relocInfo_loongarch.cpp | 132 + .../cpu/loongarch/relocInfo_loongarch.hpp | 44 + .../cpu/loongarch/runtime_loongarch_64.cpp | 191 + .../loongarch/sharedRuntime_loongarch_64.cpp | 3621 ++++ .../loongarch/stubGenerator_loongarch_64.cpp | 4804 ++++++ .../cpu/loongarch/stubRoutines_loongarch.hpp | 67 + .../loongarch/stubRoutines_loongarch_64.cpp | 178 + ...templateInterpreterGenerator_loongarch.cpp | 2269 +++ .../cpu/loongarch/templateTable_loongarch.hpp | 43 + .../loongarch/templateTable_loongarch_64.cpp | 4115 +++++ .../cpu/loongarch/vmStructs_loongarch.hpp | 61 + .../loongarch/vm_version_ext_loongarch.cpp | 85 + .../loongarch/vm_version_ext_loongarch.hpp | 54 + .../cpu/loongarch/vm_version_loongarch.cpp | 397 + .../cpu/loongarch/vm_version_loongarch.hpp | 292 + src/hotspot/cpu/loongarch/vmreg_loongarch.cpp | 53 + src/hotspot/cpu/loongarch/vmreg_loongarch.hpp | 58 + .../cpu/loongarch/vmreg_loongarch.inline.hpp | 39 + .../loongarch/vtableStubs_loongarch_64.cpp | 322 + .../cpu/mips/abstractInterpreter_mips.cpp | 132 + src/hotspot/cpu/mips/assembler_mips.cpp | 759 + src/hotspot/cpu/mips/assembler_mips.hpp | 1789 ++ .../cpu/mips/assembler_mips.inline.hpp | 33 + src/hotspot/cpu/mips/bytes_mips.hpp | 181 + src/hotspot/cpu/mips/c2_globals_mips.hpp | 95 + src/hotspot/cpu/mips/c2_init_mips.cpp | 34 + src/hotspot/cpu/mips/codeBuffer_mips.hpp | 35 + src/hotspot/cpu/mips/compiledIC_mips.cpp | 151 + src/hotspot/cpu/mips/copy_mips.hpp | 77 + src/hotspot/cpu/mips/depChecker_mips.cpp | 30 + src/hotspot/cpu/mips/depChecker_mips.hpp | 31 + src/hotspot/cpu/mips/disassembler_mips.hpp | 37 + src/hotspot/cpu/mips/frame_mips.cpp | 690 + src/hotspot/cpu/mips/frame_mips.hpp | 215 + src/hotspot/cpu/mips/frame_mips.inline.hpp | 238 + .../mips/gc/g1/g1BarrierSetAssembler_mips.cpp | 364 + .../mips/gc/g1/g1BarrierSetAssembler_mips.hpp | 71 + .../gc/shared/barrierSetAssembler_mips.cpp | 194 + .../gc/shared/barrierSetAssembler_mips.hpp | 83 + .../cardTableBarrierSetAssembler_mips.cpp | 147 + .../cardTableBarrierSetAssembler_mips.hpp | 42 + .../shared/modRefBarrierSetAssembler_mips.cpp | 53 + .../shared/modRefBarrierSetAssembler_mips.hpp | 54 + .../cpu/mips/globalDefinitions_mips.hpp | 45 + src/hotspot/cpu/mips/globals_mips.hpp | 137 + src/hotspot/cpu/mips/icBuffer_mips.cpp | 88 + src/hotspot/cpu/mips/icache_mips.cpp | 41 + src/hotspot/cpu/mips/icache_mips.hpp | 41 + src/hotspot/cpu/mips/interp_masm_mips.hpp | 276 + src/hotspot/cpu/mips/interp_masm_mips_64.cpp | 2126 +++ src/hotspot/cpu/mips/interpreterRT_mips.hpp | 60 + .../cpu/mips/interpreterRT_mips_64.cpp | 252 + src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp | 87 + .../cpu/mips/jniFastGetField_mips_64.cpp | 167 + src/hotspot/cpu/mips/jniTypes_mips.hpp | 144 + src/hotspot/cpu/mips/macroAssembler_mips.cpp | 4257 +++++ src/hotspot/cpu/mips/macroAssembler_mips.hpp | 818 + .../cpu/mips/macroAssembler_mips.inline.hpp | 34 + src/hotspot/cpu/mips/methodHandles_mips.cpp | 576 + src/hotspot/cpu/mips/methodHandles_mips.hpp | 62 + src/hotspot/cpu/mips/mips.ad | 25 + src/hotspot/cpu/mips/mips_64.ad | 12243 ++++++++++++++ src/hotspot/cpu/mips/nativeInst_mips.cpp | 1821 ++ src/hotspot/cpu/mips/nativeInst_mips.hpp | 734 + src/hotspot/cpu/mips/registerMap_mips.hpp | 47 + .../cpu/mips/register_definitions_mips.cpp | 103 + src/hotspot/cpu/mips/register_mips.cpp | 52 + src/hotspot/cpu/mips/register_mips.hpp | 341 + src/hotspot/cpu/mips/relocInfo_mips.cpp | 160 + src/hotspot/cpu/mips/relocInfo_mips.hpp | 44 + src/hotspot/cpu/mips/runtime_mips_64.cpp | 198 + .../cpu/mips/sharedRuntime_mips_64.cpp | 3879 +++++ .../cpu/mips/stubGenerator_mips_64.cpp | 2162 +++ src/hotspot/cpu/mips/stubRoutines_mips.hpp | 59 + src/hotspot/cpu/mips/stubRoutines_mips_64.cpp | 35 + .../templateInterpreterGenerator_mips.cpp | 2149 +++ src/hotspot/cpu/mips/templateTable_mips.hpp | 43 + .../cpu/mips/templateTable_mips_64.cpp | 4688 ++++++ src/hotspot/cpu/mips/vmStructs_mips.hpp | 68 + src/hotspot/cpu/mips/vm_version_ext_mips.cpp | 90 + src/hotspot/cpu/mips/vm_version_ext_mips.hpp | 54 + src/hotspot/cpu/mips/vm_version_mips.cpp | 516 + src/hotspot/cpu/mips/vm_version_mips.hpp | 221 + src/hotspot/cpu/mips/vmreg_mips.cpp | 51 + src/hotspot/cpu/mips/vmreg_mips.hpp | 56 + src/hotspot/cpu/mips/vmreg_mips.inline.hpp | 38 + src/hotspot/cpu/mips/vtableStubs_mips_64.cpp | 340 + src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 7 + src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp | 20 +- src/hotspot/cpu/ppc/c1_LIR_ppc.cpp | 21 + src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 7 + src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp | 19 +- src/hotspot/cpu/s390/c1_LIR_s390.cpp | 20 + .../cpu/sparc/c1_LIRAssembler_sparc.cpp | 6 + .../cpu/sparc/c1_LIRGenerator_sparc.cpp | 18 +- src/hotspot/cpu/sparc/c1_LIR_sparc.cpp | 21 + src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 7 + src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 16 +- src/hotspot/cpu/x86/c1_LIR_x86.cpp | 21 + .../cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp | 3 +- .../cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp | 3 +- src/hotspot/os/linux/os_linux.cpp | 8 + .../assembler_linux_loongarch.cpp | 24 + .../atomic_linux_loongarch.hpp | 160 + .../bytes_linux_loongarch.inline.hpp | 37 + .../copy_linux_loongarch.inline.hpp | 125 + .../globals_linux_loongarch.hpp | 43 + .../os_cpu/linux_loongarch/linux_loongarch.s | 25 + .../orderAccess_linux_loongarch.hpp | 51 + .../linux_loongarch/os_linux_loongarch.cpp | 710 + .../linux_loongarch/os_linux_loongarch.hpp | 38 + .../prefetch_linux_loongarch.inline.hpp | 56 + .../thread_linux_loongarch.cpp | 116 + .../thread_linux_loongarch.hpp | 66 + .../vmStructs_linux_loongarch.hpp | 55 + .../vm_version_linux_loongarch.cpp | 93 + .../linux_mips/assembler_linux_mips.cpp | 24 + .../os_cpu/linux_mips/atomic_linux_mips.hpp | 191 + .../linux_mips/bytes_linux_mips.inline.hpp | 37 + .../linux_mips/copy_linux_mips.inline.hpp | 125 + .../os_cpu/linux_mips/globals_linux_mips.hpp | 51 + src/hotspot/os_cpu/linux_mips/linux_mips.s | 25 + .../linux_mips/orderAccess_linux_mips.hpp | 51 + .../os_cpu/linux_mips/os_linux_mips.cpp | 1020 ++ .../os_cpu/linux_mips/os_linux_mips.hpp | 39 + .../linux_mips/prefetch_linux_mips.inline.hpp | 58 + .../os_cpu/linux_mips/thread_linux_mips.cpp | 117 + .../os_cpu/linux_mips/thread_linux_mips.hpp | 66 + .../linux_mips/vmStructs_linux_mips.hpp | 55 + .../linux_mips/vm_version_linux_mips.cpp | 28 + .../linux_x86/gc/z/zGlobals_linux_x86.hpp | 2 + src/hotspot/share/asm/codeBuffer.cpp | 7 + src/hotspot/share/c1/c1_Compiler.cpp | 8 +- src/hotspot/share/c1/c1_LIR.cpp | 167 +- src/hotspot/share/c1/c1_LIR.hpp | 123 +- src/hotspot/share/c1/c1_LIRAssembler.cpp | 12 + src/hotspot/share/c1/c1_LIRAssembler.hpp | 3 + src/hotspot/share/c1/c1_LIRGenerator.cpp | 136 +- src/hotspot/share/c1/c1_LIRGenerator.hpp | 8 +- src/hotspot/share/c1/c1_LinearScan.cpp | 76 +- src/hotspot/share/code/nmethod.cpp | 12 +- src/hotspot/share/code/relocInfo.cpp | 4 +- src/hotspot/share/code/relocInfo.hpp | 18 +- src/hotspot/share/code/vtableStubs.cpp | 11 + src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp | 8 +- .../share/gc/g1/g1FullGCMarker.inline.hpp | 7 + .../gc/parallel/psPromotionManager.inline.hpp | 17 +- .../share/gc/parallel/psScavenge.inline.hpp | 5 +- .../share/gc/shared/c1/barrierSetC1.cpp | 12 +- .../gc/shared/c1/cardTableBarrierSetC1.cpp | 3 +- .../shenandoah/c1/shenandoahBarrierSetC1.cpp | 6 +- src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp | 19 +- .../share/interpreter/interpreterRuntime.cpp | 8 +- .../share/interpreter/interpreterRuntime.hpp | 8 +- .../templateInterpreterGenerator.hpp | 10 +- .../share/jfr/utilities/jfrBigEndian.hpp | 8 +- src/hotspot/share/jvmci/vmStructs_jvmci.cpp | 35 + src/hotspot/share/memory/metaspace.cpp | 9 +- src/hotspot/share/oops/oop.inline.hpp | 8 +- src/hotspot/share/opto/compile.hpp | 2 +- src/hotspot/share/opto/output.cpp | 43 + src/hotspot/share/opto/type.cpp | 12 + src/hotspot/share/runtime/java.cpp | 1 + src/hotspot/share/runtime/objectMonitor.cpp | 10 + src/hotspot/share/runtime/os.cpp | 9 +- .../share/runtime/sharedRuntimeTrig.cpp | 15 + .../share/utilities/globalDefinitions.hpp | 9 + src/hotspot/share/utilities/macros.hpp | 56 + .../native/libsaproc/LinuxDebuggerLocal.c | 74 +- .../linux/native/libsaproc/libproc.h | 13 +- .../linux/native/libsaproc/ps_proc.c | 8 +- .../classes/sun/jvm/hotspot/HotSpotAgent.java | 12 + .../MachineDescriptionLOONGARCH64.java | 41 + .../debugger/MachineDescriptionMIPS64.java | 41 + .../debugger/linux/LinuxCDebugger.java | 26 +- .../linux/LinuxThreadContextFactory.java | 14 +- .../loongarch64/LinuxLOONGARCH64CFrame.java | 92 + .../LinuxLOONGARCH64ThreadContext.java | 47 + .../linux/mips64/LinuxMIPS64CFrame.java | 80 + .../mips64/LinuxMIPS64ThreadContext.java | 47 + .../loongarch64/LOONGARCH64ThreadContext.java | 128 + .../debugger/mips64/MIPS64ThreadContext.java | 128 + .../hotspot/debugger/posix/elf/ELFHeader.java | 8 + .../debugger/proc/ProcDebuggerLocal.java | 6 + .../loongarch64/ProcLOONGARCH64Thread.java | 92 + .../ProcLOONGARCH64ThreadContext.java | 47 + .../ProcLOONGARCH64ThreadFactory.java | 45 + .../proc/mips64/ProcMIPS64Thread.java | 92 + .../proc/mips64/ProcMIPS64ThreadContext.java | 47 + .../proc/mips64/ProcMIPS64ThreadFactory.java | 45 + .../debugger/remote/RemoteDebuggerClient.java | 18 + .../loongarch64/RemoteLOONGARCH64Thread.java | 54 + .../RemoteLOONGARCH64ThreadContext.java | 51 + .../RemoteLOONGARCH64ThreadFactory.java | 45 + .../remote/mips64/RemoteMIPS64Thread.java | 54 + .../mips64/RemoteMIPS64ThreadContext.java | 51 + .../mips64/RemoteMIPS64ThreadFactory.java | 45 + .../sun/jvm/hotspot/runtime/Threads.java | 12 + .../LinuxLOONGARCH64JavaThreadPDAccess.java | 133 + .../LinuxMIPS64JavaThreadPDAccess.java | 133 + .../LOONGARCH64CurrentFrameGuess.java | 250 + .../runtime/loongarch64/LOONGARCH64Frame.java | 526 + .../LOONGARCH64JavaCallWrapper.java | 57 + .../loongarch64/LOONGARCH64RegisterMap.java | 52 + .../mips64/MIPS64CurrentFrameGuess.java | 217 + .../hotspot/runtime/mips64/MIPS64Frame.java | 537 + .../runtime/mips64/MIPS64JavaCallWrapper.java | 57 + .../runtime/mips64/MIPS64RegisterMap.java | 52 + .../jvm/hotspot/utilities/PlatformInfo.java | 15 +- ...LoongArch64HotSpotJVMCIBackendFactory.java | 220 + .../LoongArch64HotSpotRegisterConfig.java | 297 + .../LoongArch64HotSpotVMConfig.java | 77 + .../ci/hotspot/loongarch64/package-info.java | 28 + .../jdk/vm/ci/loongarch64/LoongArch64.java | 247 + .../vm/ci/loongarch64/LoongArch64Kind.java | 163 + .../jdk/vm/ci/loongarch64/package-info.java | 28 + .../share/classes/module-info.java | 7 + src/utils/hsdis/Makefile | 3 + .../TestAESIntrinsicsOnSupportedConfig.java | 8 +- .../TestAESIntrinsicsOnUnsupportedConfig.java | 8 +- .../testcases/GenericTestCaseForOtherCPU.java | 13 +- .../vm/ci/code/test/CodeInstallationTest.java | 4 + .../jdk/vm/ci/code/test/DataPatchTest.java | 5 +- .../code/test/InterpreterFrameSizeTest.java | 5 +- .../code/test/MaxOopMapStackOffsetTest.java | 5 +- .../jdk/vm/ci/code/test/NativeCallTest.java | 5 +- .../code/test/SimpleCodeInstallationTest.java | 5 +- .../vm/ci/code/test/SimpleDebugInfoTest.java | 5 +- .../code/test/VirtualObjectDebugInfoTest.java | 5 +- .../loongarch64/LoongArch64TestAssembler.java | 568 + .../argumentcorruption/CheckLongArgs.java | 7 + .../criticalnatives/lookup/LookUp.java | 7 + .../sha/predicate/IntrinsicPredicates.java | 14 +- .../ReservedStack/ReservedStackTest.java | 8 +- ...stMutuallyExclusivePlatformPredicates.java | 2 +- .../jdk/jfr/event/os/TestCPUInformation.java | 10 +- test/jdk/sun/security/pkcs11/PKCS11Test.java | 9 + test/lib/jdk/test/lib/Platform.java | 14 + .../bench/java/lang/RotateBenchmark.java | 87 + .../bench/vm/compiler/MacroLogicOpt.java | 125 + 316 files changed, 113186 insertions(+), 204 deletions(-) create mode 100644 src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/assembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/assembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp create mode 100644 src/hotspot/cpu/loongarch/bytes_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/c2_init_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/copy_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/depChecker_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/depChecker_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/disassembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/frame_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/frame_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp create mode 100644 src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/globals_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/icache_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/icache_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/loongarch.ad create mode 100644 src/hotspot/cpu/loongarch/loongarch_64.ad create mode 100644 src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp create mode 100644 src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp create mode 100644 src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/registerMap_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/register_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/register_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/templateTable_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp create mode 100644 src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/vm_version_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/vm_version_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/vmreg_loongarch.cpp create mode 100644 src/hotspot/cpu/loongarch/vmreg_loongarch.hpp create mode 100644 src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp create mode 100644 src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp create mode 100644 src/hotspot/cpu/mips/abstractInterpreter_mips.cpp create mode 100644 src/hotspot/cpu/mips/assembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/assembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/assembler_mips.inline.hpp create mode 100644 src/hotspot/cpu/mips/bytes_mips.hpp create mode 100644 src/hotspot/cpu/mips/c2_globals_mips.hpp create mode 100644 src/hotspot/cpu/mips/c2_init_mips.cpp create mode 100644 src/hotspot/cpu/mips/codeBuffer_mips.hpp create mode 100644 src/hotspot/cpu/mips/compiledIC_mips.cpp create mode 100644 src/hotspot/cpu/mips/copy_mips.hpp create mode 100644 src/hotspot/cpu/mips/depChecker_mips.cpp create mode 100644 src/hotspot/cpu/mips/depChecker_mips.hpp create mode 100644 src/hotspot/cpu/mips/disassembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/frame_mips.cpp create mode 100644 src/hotspot/cpu/mips/frame_mips.hpp create mode 100644 src/hotspot/cpu/mips/frame_mips.inline.hpp create mode 100644 src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/globalDefinitions_mips.hpp create mode 100644 src/hotspot/cpu/mips/globals_mips.hpp create mode 100644 src/hotspot/cpu/mips/icBuffer_mips.cpp create mode 100644 src/hotspot/cpu/mips/icache_mips.cpp create mode 100644 src/hotspot/cpu/mips/icache_mips.hpp create mode 100644 src/hotspot/cpu/mips/interp_masm_mips.hpp create mode 100644 src/hotspot/cpu/mips/interp_masm_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/interpreterRT_mips.hpp create mode 100644 src/hotspot/cpu/mips/interpreterRT_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp create mode 100644 src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/jniTypes_mips.hpp create mode 100644 src/hotspot/cpu/mips/macroAssembler_mips.cpp create mode 100644 src/hotspot/cpu/mips/macroAssembler_mips.hpp create mode 100644 src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp create mode 100644 src/hotspot/cpu/mips/methodHandles_mips.cpp create mode 100644 src/hotspot/cpu/mips/methodHandles_mips.hpp create mode 100644 src/hotspot/cpu/mips/mips.ad create mode 100644 src/hotspot/cpu/mips/mips_64.ad create mode 100644 src/hotspot/cpu/mips/nativeInst_mips.cpp create mode 100644 src/hotspot/cpu/mips/nativeInst_mips.hpp create mode 100644 src/hotspot/cpu/mips/registerMap_mips.hpp create mode 100644 src/hotspot/cpu/mips/register_definitions_mips.cpp create mode 100644 src/hotspot/cpu/mips/register_mips.cpp create mode 100644 src/hotspot/cpu/mips/register_mips.hpp create mode 100644 src/hotspot/cpu/mips/relocInfo_mips.cpp create mode 100644 src/hotspot/cpu/mips/relocInfo_mips.hpp create mode 100644 src/hotspot/cpu/mips/runtime_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/stubGenerator_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/stubRoutines_mips.hpp create mode 100644 src/hotspot/cpu/mips/stubRoutines_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp create mode 100644 src/hotspot/cpu/mips/templateTable_mips.hpp create mode 100644 src/hotspot/cpu/mips/templateTable_mips_64.cpp create mode 100644 src/hotspot/cpu/mips/vmStructs_mips.hpp create mode 100644 src/hotspot/cpu/mips/vm_version_ext_mips.cpp create mode 100644 src/hotspot/cpu/mips/vm_version_ext_mips.hpp create mode 100644 src/hotspot/cpu/mips/vm_version_mips.cpp create mode 100644 src/hotspot/cpu/mips/vm_version_mips.hpp create mode 100644 src/hotspot/cpu/mips/vmreg_mips.cpp create mode 100644 src/hotspot/cpu/mips/vmreg_mips.hpp create mode 100644 src/hotspot/cpu/mips/vmreg_mips.inline.hpp create mode 100644 src/hotspot/cpu/mips/vtableStubs_mips_64.cpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s create mode 100644 src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp create mode 100644 src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp create mode 100644 src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp create mode 100644 src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/linux_mips.s create mode 100644 src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp create mode 100644 src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp create mode 100644 src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp create mode 100644 src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java create mode 100644 src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java create mode 100644 test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java create mode 100644 test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java create mode 100644 test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk index 46fb9b4219b..c6d8b24fc40 100644 --- a/make/CompileJavaModules.gmk +++ b/make/CompileJavaModules.gmk @@ -430,6 +430,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \ @@ -437,6 +438,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \ --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \ @@ -456,6 +458,7 @@ jdk.internal.vm.compiler_EXCLUDES += \ org.graalvm.compiler.api.directives.test \ org.graalvm.compiler.api.test \ org.graalvm.compiler.asm.aarch64.test \ + org.graalvm.compiler.asm.loongarch64.test \ org.graalvm.compiler.asm.amd64.test \ org.graalvm.compiler.asm.sparc.test \ org.graalvm.compiler.asm.test \ diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 index 9bb34363e5c..c95a2447ef4 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace" # All valid JVM variants VALID_JVM_VARIANTS="server client minimal core zero custom" +# +# This file has been modified by Loongson Technology in 2021. These +# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made +# available on the same license terms set forth above. +# + ############################################################################### # Check if the specified JVM variant should be built. To be used in shell if # constructs, like this: @@ -340,6 +346,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], HOTSPOT_TARGET_CPU_ARCH=arm fi + # Override hotspot cpu definitions for MIPS and LOONGARCH platforms + if test "x$OPENJDK_TARGET_CPU" = xmips64el && test "x$HOTSPOT_TARGET_CPU" != xzero; then + HOTSPOT_TARGET_CPU=mips_64 + HOTSPOT_TARGET_CPU_ARCH=mips + elif test "x$OPENJDK_TARGET_CPU" = xloongarch64 && test "x$HOTSPOT_TARGET_CPU" != xzero; then + HOTSPOT_TARGET_CPU=loongarch_64 + HOTSPOT_TARGET_CPU_ARCH=loongarch + fi + + # Disable compiler1 on linux-mips and linux-loongarch + if ! (HOTSPOT_CHECK_JVM_FEATURE(compiler1)); then + AC_MSG_CHECKING([if compiler1 should be built, $JVM_FEATURES]) + if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$HOTSPOT_TARGET_CPU_ARCH" = "xmips"; then + DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES compiler1" + AC_MSG_RESULT([no, platform not supported]) + else + AC_MSG_RESULT([yes]) + fi + fi + # Verify that dependencies are met for explicitly set features. if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) @@ -424,10 +450,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], JVM_FEATURES_jvmci="" INCLUDE_JVMCI="false" else - # Only enable jvmci on x86_64, sparcv9 and aarch64 + # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64 if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \ test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \ - test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then + test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ + test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then AC_MSG_RESULT([yes]) JVM_FEATURES_jvmci="jvmci" INCLUDE_JVMCI="true" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index 5d1d9efa399..815180ea966 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2021. These +# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. # Converts autoconf style CPU name to OpenJDK style, into # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. @@ -554,6 +560,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU_DEFINE=PPC64 elif test "x$OPENJDK_$1_CPU" = xppc64le; then HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xmips64; then + HOTSPOT_$1_CPU_DEFINE=MIPS64 + elif test "x$OPENJDK_$1_CPU" = xmips64el; then + HOTSPOT_$1_CPU_DEFINE=MIPS64 + elif test "x$OPENJDK_$1_CPU" = xloongarch64; then + HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 # The cpu defines below are for zero, we don't support them directly. elif test "x$OPENJDK_$1_CPU" = xsparc; then diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index fdd2c0ca3d7..318191233af 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1123,7 +1123,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } - +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); @@ -1663,6 +1665,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index cebc1e410dc..816226c068b 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ store(reg, addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { LIR_Opr reg = new_register(T_INT); __ load(generate_address(base, disp, T_INT), reg, info); - __ cmp(condition, reg, LIR_OprFact::intConst(c)); + __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); } -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { LIR_Opr reg1 = new_register(T_INT); __ load(generate_address(base, disp, type), reg1, info); - __ cmp(condition, reg, reg1); + __ cmp_branch(condition, reg, reg1, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { diff --git a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp index ce75dc552a9..74c4b7e5565 100644 --- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp @@ -52,3 +52,24 @@ void LIR_Address::verify() const { "wrong type for addresses"); } #endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index f0a7229aa18..29db21f975e 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1150,6 +1150,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { __ b(*(op->label()), acond); } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); @@ -3082,6 +3085,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ bind(*stub->continuation()); } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + #ifdef ASSERT // emit run-time assertion void LIR_Assembler::emit_assert(LIR_OpAssert* op) { diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp index b05fc876f27..b3c1afe69a3 100644 --- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp @@ -423,18 +423,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ move(temp, addr); } - -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info); - __ cmp(condition, FrameMap::LR_opr, c); + __ cmp_branch(condition, FrameMap::LR_opr, c, T_INT, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info); - __ cmp(condition, reg, FrameMap::LR_opr); + __ cmp_branch(condition, reg, FrameMap::LR_opr, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { assert(left != result, "should be different registers"); diff --git a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp index 806da320209..5305fe371e1 100644 --- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp @@ -84,3 +84,24 @@ void LIR_Address::verify() const { #endif // AARCH64 } #endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp new file mode 100644 index 00000000000..0412b99537e --- /dev/null +++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ci/ciMethod.hpp" +#include "interpreter/interpreter.hpp" +#include "runtime/frame.inline.hpp" + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::java_frame_sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved ebp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = 6; // see generate_call_stub + // return overhead_size + method->max_locals() + method->max_stack() + stub_code; + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return overhead_size + method_stack + stub_code; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + // If interpreter_frame!=NULL, set up the method, locals, and monitors. + // The frame interpreter_frame, if not NULL, is guaranteed to be the + // right size, as determined by a previous call to this method. + // It is also guaranteed to be walkable even though it is in a skeletal state + + // fixed size of an interpreter frame: + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp+8 + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); + + //set last sp; + intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + // All frames but the initial interpreter frame we fill in have a + // value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + // + if (extra_locals != 0 && + interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); +} + diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp new file mode 100644 index 00000000000..e6e62cccad0 --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp @@ -0,0 +1,849 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +// Implementation of AddressLiteral + +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + _rspec = rspec_from_rtype(rtype, target); +} + +// Implementation of Address + +Address Address::make_array(ArrayAddress adr) { + AddressLiteral base = adr.base(); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(index._base, index._index, index._scale, (intptr_t) base.target()); + array._rspec = base._rspec; + return array; +} + +// exceedingly dangerous constructor +Address::Address(address loc, RelocationHolder spec) { + _base = noreg; + _index = noreg; + _scale = no_scale; + _disp = (intptr_t) loc; + _rspec = spec; +} + + +int Assembler::is_int_mask(int x) { + int xx = x; + int count = 0; + + while (x != 0) { + x &= (x - 1); + count++; + } + + if ((1<> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_b(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_b(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_b(dst, base, AT); + } + } +} + +void Assembler::ld_bu(Register rd, Address src) { + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_bu(dst, base, index); + } else { + add_d(AT, base, index); + ld_bu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_bu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_bu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_bu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_bu(dst, base, AT); + } + } +} + +void Assembler::ld_d(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_d(dst, base, index); + } else { + add_d(AT, base, index); + ld_d(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_d(dst, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + ldptr_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_d(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_d(dst, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + ldptr_d(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_d(dst, base, AT); + } + } +} + +void Assembler::ld_h(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_h(dst, base, index); + } else { + add_d(AT, base, index); + ld_h(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_h(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_h(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_h(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_h(dst, base, AT); + } + } +} + +void Assembler::ld_hu(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_hu(dst, base, index); + } else { + add_d(AT, base, index); + ld_hu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_hu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_hu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_hu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_hu(dst, base, AT); + } + } +} + +void Assembler::ll_w(Register rd, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll_w(rd, src.base(), src.disp()); +} + +void Assembler::ll_d(Register rd, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll_d(rd, src.base(), src.disp()); +} + +void Assembler::ld_w(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_w(dst, base, index); + } else { + add_d(AT, base, index); + ld_w(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_w(dst, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + ldptr_w(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_w(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_w(dst, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + ldptr_w(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_w(dst, base, AT); + } + } +} + +void Assembler::ld_wu(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_wu(dst, base, index); + } else { + add_d(AT, base, index); + ld_wu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_wu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_wu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_wu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_wu(dst, base, AT); + } + } +} + +void Assembler::st_b(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_b(src, base, index); + } else { + add_d(AT, base, index); + st_b(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_b(src, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_b(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_b(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_b(src, base, AT); + } + } +} + +void Assembler::sc_w(Register rd, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc_w(rd, dst.base(), dst.disp()); +} + +void Assembler::sc_d(Register rd, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc_d(rd, dst.base(), dst.disp()); +} + +void Assembler::st_d(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_d(src, base, index); + } else { + add_d(AT, base, index); + st_d(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_d(src, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + stptr_d(src, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_d(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_d(src, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + stptr_d(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_d(src, base, AT); + } + } +} + +void Assembler::st_h(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_h(src, base, index); + } else { + add_d(AT, base, index); + st_h(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_h(src, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_h(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_h(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_h(src, base, AT); + } + } +} + +void Assembler::st_w(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_w(src, base, index); + } else { + add_d(AT, base, index); + st_w(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_w(src, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + stptr_w(src, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_w(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_w(src, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + stptr_w(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_w(src, base, AT); + } + } +} + +void Assembler::fld_s(FloatRegister fd, Address src) { + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fldx_s(fd, base, index); + } else { + add_d(AT, base, index); + fld_s(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fld_s(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fldx_s(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fld_s(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fldx_s(fd, base, AT); + } + } +} + +void Assembler::fld_d(FloatRegister fd, Address src) { + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fldx_d(fd, base, index); + } else { + add_d(AT, base, index); + fld_d(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fld_d(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fldx_d(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fld_d(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fldx_d(fd, base, AT); + } + } +} + +void Assembler::fst_s(FloatRegister fd, Address dst) { + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fstx_s(fd, base, index); + } else { + add_d(AT, base, index); + fst_s(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fst_s(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fstx_s(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fst_s(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fstx_s(fd, base, AT); + } + } +} + +void Assembler::fst_d(FloatRegister fd, Address dst) { + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fstx_d(fd, base, index); + } else { + add_d(AT, base, index); + fst_d(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fst_d(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fstx_d(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fst_d(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fstx_d(fd, base, AT); + } + } +} diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp new file mode 100644 index 00000000000..179da7bd0ef --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp @@ -0,0 +1,2827 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP + +#include "asm/register.hpp" +#include "runtime/vm_version.hpp" + +class BiasedLockingCounters; + + +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class ArrayAddress; + +class Address { + public: + enum ScaleFactor { + no_scale = -1, + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_ptr = times_8 + }; + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + + private: + Register _base; + Register _index; + ScaleFactor _scale; + int _disp; + RelocationHolder _rspec; + + // Easily misused constructors make them private + Address(address loc, RelocationHolder spec); + Address(int disp, address loc, relocInfo::relocType rtype); + Address(int disp, address loc, RelocationHolder spec); + + public: + + // creation + Address() + : _base(noreg), + _index(noreg), + _scale(no_scale), + _disp(0) { + } + + // No default displacement otherwise Register can be implicitly + // converted to 0(Register) which is quite a different animal. + + Address(Register base, int disp = 0) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(disp) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(index), + _scale(scale), + _disp (disp) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } + + // The following two overloads are used in connection with the + // ByteSize type (see sizes.hpp). They simplify the use of + // ByteSize'd arguments in assembly code. Note that their equivalent + // for the optimized build are the member functions with int disp + // argument since ByteSize is mapped to an int type in that case. + // + // Note: DO NOT introduce similar overloaded functions for WordSize + // arguments as in the optimized mode, both ByteSize and WordSize + // are mapped to the same type and thus the compiler cannot make a + // distinction anymore (=> compiler errors). + +#ifdef ASSERT + Address(Register base, ByteSize disp) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(in_bytes(disp)) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) + : _base(base), + _index(index), + _scale(scale), + _disp(in_bytes(disp)) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } +#endif // ASSERT + + // accessors + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } + + static Address make_array(ArrayAddress); + + friend class Assembler; + friend class MacroAssembler; + friend class LIR_Assembler; // base/index/scale/disp +}; + +// Calling convention +class Argument { + public: + enum { + n_register_parameters = 8, // 8 integer registers used to pass parameters + n_float_register_parameters = 8 // 8 float registers used to pass parameters + }; +}; + +// +// AddressLiteral has been split out from Address because operands of this type +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out +// the few instructions that need to deal with address literals are unique and the +// MacroAssembler does not have to implement every instruction in the Assembler +// in order to search for address literals that may need special handling depending +// on the instruction and the platform. As small step on the way to merging i486/amd64 +// directories. +// +class AddressLiteral { + friend class ArrayAddress; + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to + // a register to reach it. Otherwise if near we can do rip + // relative addressing. + + address _target; + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + + AddressLiteral(address target, relocInfo::relocType rtype); + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::poll_type: + case relocInfo::poll_return_type: + return Relocation::spec_simple(rtype); + case relocInfo::none: + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + +}; + +// Convience classes +class RuntimeAddress: public AddressLiteral { + + public: + + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} + +}; + +class OopAddress: public AddressLiteral { + + public: + + OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} + +}; + +class ExternalAddress: public AddressLiteral { + + public: + + ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} + +}; + +// x86 can do array addressing as a single operation since disp can be an absolute +// address amd64 can't. We create a class that expresses the concept but does extra +// magic on amd64 to get the final result + +class ArrayAddress { + private: + + AddressLiteral _base; + Address _index; + + public: + + ArrayAddress() {}; + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; + AddressLiteral base() { return _base; } + Address index() { return _index; } + +}; + +// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction +// level ; i.e., what you write is what you get. The Assembler is generating code into +// a CodeBuffer. + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; // for the non-virtual hack + friend class LIR_Assembler; // as_Address() + friend class StubGenerator; + + public: + // 22-bit opcode, highest 22 bits: bits[31...10] + enum ops22 { + clo_w_op = 0b0000000000000000000100, + clz_w_op = 0b0000000000000000000101, + cto_w_op = 0b0000000000000000000110, + ctz_w_op = 0b0000000000000000000111, + clo_d_op = 0b0000000000000000001000, + clz_d_op = 0b0000000000000000001001, + cto_d_op = 0b0000000000000000001010, + ctz_d_op = 0b0000000000000000001011, + revb_2h_op = 0b0000000000000000001100, + revb_4h_op = 0b0000000000000000001101, + revb_2w_op = 0b0000000000000000001110, + revb_d_op = 0b0000000000000000001111, + revh_2w_op = 0b0000000000000000010000, + revh_d_op = 0b0000000000000000010001, + bitrev_4b_op = 0b0000000000000000010010, + bitrev_8b_op = 0b0000000000000000010011, + bitrev_w_op = 0b0000000000000000010100, + bitrev_d_op = 0b0000000000000000010101, + ext_w_h_op = 0b0000000000000000010110, + ext_w_b_op = 0b0000000000000000010111, + rdtimel_w_op = 0b0000000000000000011000, + rdtimeh_w_op = 0b0000000000000000011001, + rdtime_d_op = 0b0000000000000000011010, + cpucfg_op = 0b0000000000000000011011, + fabs_s_op = 0b0000000100010100000001, + fabs_d_op = 0b0000000100010100000010, + fneg_s_op = 0b0000000100010100000101, + fneg_d_op = 0b0000000100010100000110, + flogb_s_op = 0b0000000100010100001001, + flogb_d_op = 0b0000000100010100001010, + fclass_s_op = 0b0000000100010100001101, + fclass_d_op = 0b0000000100010100001110, + fsqrt_s_op = 0b0000000100010100010001, + fsqrt_d_op = 0b0000000100010100010010, + frecip_s_op = 0b0000000100010100010101, + frecip_d_op = 0b0000000100010100010110, + frsqrt_s_op = 0b0000000100010100011001, + frsqrt_d_op = 0b0000000100010100011010, + fmov_s_op = 0b0000000100010100100101, + fmov_d_op = 0b0000000100010100100110, + movgr2fr_w_op = 0b0000000100010100101001, + movgr2fr_d_op = 0b0000000100010100101010, + movgr2frh_w_op = 0b0000000100010100101011, + movfr2gr_s_op = 0b0000000100010100101101, + movfr2gr_d_op = 0b0000000100010100101110, + movfrh2gr_s_op = 0b0000000100010100101111, + movgr2fcsr_op = 0b0000000100010100110000, + movfcsr2gr_op = 0b0000000100010100110010, + movfr2cf_op = 0b0000000100010100110100, + movcf2fr_op = 0b0000000100010100110101, + movgr2cf_op = 0b0000000100010100110110, + movcf2gr_op = 0b0000000100010100110111, + fcvt_s_d_op = 0b0000000100011001000110, + fcvt_d_s_op = 0b0000000100011001001001, + ftintrm_w_s_op = 0b0000000100011010000001, + ftintrm_w_d_op = 0b0000000100011010000010, + ftintrm_l_s_op = 0b0000000100011010001001, + ftintrm_l_d_op = 0b0000000100011010001010, + ftintrp_w_s_op = 0b0000000100011010010001, + ftintrp_w_d_op = 0b0000000100011010010010, + ftintrp_l_s_op = 0b0000000100011010011001, + ftintrp_l_d_op = 0b0000000100011010011010, + ftintrz_w_s_op = 0b0000000100011010100001, + ftintrz_w_d_op = 0b0000000100011010100010, + ftintrz_l_s_op = 0b0000000100011010101001, + ftintrz_l_d_op = 0b0000000100011010101010, + ftintrne_w_s_op = 0b0000000100011010110001, + ftintrne_w_d_op = 0b0000000100011010110010, + ftintrne_l_s_op = 0b0000000100011010111001, + ftintrne_l_d_op = 0b0000000100011010111010, + ftint_w_s_op = 0b0000000100011011000001, + ftint_w_d_op = 0b0000000100011011000010, + ftint_l_s_op = 0b0000000100011011001001, + ftint_l_d_op = 0b0000000100011011001010, + ffint_s_w_op = 0b0000000100011101000100, + ffint_s_l_op = 0b0000000100011101000110, + ffint_d_w_op = 0b0000000100011101001000, + ffint_d_l_op = 0b0000000100011101001010, + frint_s_op = 0b0000000100011110010001, + frint_d_op = 0b0000000100011110010010, + iocsrrd_b_op = 0b0000011001001000000000, + iocsrrd_h_op = 0b0000011001001000000001, + iocsrrd_w_op = 0b0000011001001000000010, + iocsrrd_d_op = 0b0000011001001000000011, + iocsrwr_b_op = 0b0000011001001000000100, + iocsrwr_h_op = 0b0000011001001000000101, + iocsrwr_w_op = 0b0000011001001000000110, + iocsrwr_d_op = 0b0000011001001000000111, + vpcnt_b_op = 0b0111001010011100001000, + vpcnt_h_op = 0b0111001010011100001001, + vpcnt_w_op = 0b0111001010011100001010, + vpcnt_d_op = 0b0111001010011100001011, + vneg_b_op = 0b0111001010011100001100, + vneg_h_op = 0b0111001010011100001101, + vneg_w_op = 0b0111001010011100001110, + vneg_d_op = 0b0111001010011100001111, + vfclass_s_op = 0b0111001010011100110101, + vfclass_d_op = 0b0111001010011100110110, + vfsqrt_s_op = 0b0111001010011100111001, + vfsqrt_d_op = 0b0111001010011100111010, + vfrint_s_op = 0b0111001010011101001101, + vfrint_d_op = 0b0111001010011101001110, + vfrintrm_s_op = 0b0111001010011101010001, + vfrintrm_d_op = 0b0111001010011101010010, + vfrintrp_s_op = 0b0111001010011101010101, + vfrintrp_d_op = 0b0111001010011101010110, + vfrintrz_s_op = 0b0111001010011101011001, + vfrintrz_d_op = 0b0111001010011101011010, + vfrintrne_s_op = 0b0111001010011101011101, + vfrintrne_d_op = 0b0111001010011101011110, + vfcvtl_s_h_op = 0b0111001010011101111010, + vfcvth_s_h_op = 0b0111001010011101111011, + vfcvtl_d_s_op = 0b0111001010011101111100, + vfcvth_d_s_op = 0b0111001010011101111101, + vffint_s_w_op = 0b0111001010011110000000, + vffint_s_wu_op = 0b0111001010011110000001, + vffint_d_l_op = 0b0111001010011110000010, + vffint_d_lu_op = 0b0111001010011110000011, + vffintl_d_w_op = 0b0111001010011110000100, + vffinth_d_w_op = 0b0111001010011110000101, + vftint_w_s_op = 0b0111001010011110001100, + vftint_l_d_op = 0b0111001010011110001101, + vftintrm_w_s_op = 0b0111001010011110001110, + vftintrm_l_d_op = 0b0111001010011110001111, + vftintrp_w_s_op = 0b0111001010011110010000, + vftintrp_l_d_op = 0b0111001010011110010001, + vftintrz_w_s_op = 0b0111001010011110010010, + vftintrz_l_d_op = 0b0111001010011110010011, + vftintrne_w_s_op = 0b0111001010011110010100, + vftintrne_l_d_op = 0b0111001010011110010101, + vftint_wu_s = 0b0111001010011110010110, + vftint_lu_d = 0b0111001010011110010111, + vftintrz_wu_f = 0b0111001010011110011100, + vftintrz_lu_d = 0b0111001010011110011101, + vftintl_l_s_op = 0b0111001010011110100000, + vftinth_l_s_op = 0b0111001010011110100001, + vftintrml_l_s_op = 0b0111001010011110100010, + vftintrmh_l_s_op = 0b0111001010011110100011, + vftintrpl_l_s_op = 0b0111001010011110100100, + vftintrph_l_s_op = 0b0111001010011110100101, + vftintrzl_l_s_op = 0b0111001010011110100110, + vftintrzh_l_s_op = 0b0111001010011110100111, + vftintrnel_l_s_op = 0b0111001010011110101000, + vftintrneh_l_s_op = 0b0111001010011110101001, + vreplgr2vr_b_op = 0b0111001010011111000000, + vreplgr2vr_h_op = 0b0111001010011111000001, + vreplgr2vr_w_op = 0b0111001010011111000010, + vreplgr2vr_d_op = 0b0111001010011111000011, + xvpcnt_b_op = 0b0111011010011100001000, + xvpcnt_h_op = 0b0111011010011100001001, + xvpcnt_w_op = 0b0111011010011100001010, + xvpcnt_d_op = 0b0111011010011100001011, + xvneg_b_op = 0b0111011010011100001100, + xvneg_h_op = 0b0111011010011100001101, + xvneg_w_op = 0b0111011010011100001110, + xvneg_d_op = 0b0111011010011100001111, + xvfclass_s_op = 0b0111011010011100110101, + xvfclass_d_op = 0b0111011010011100110110, + xvfsqrt_s_op = 0b0111011010011100111001, + xvfsqrt_d_op = 0b0111011010011100111010, + xvfrint_s_op = 0b0111011010011101001101, + xvfrint_d_op = 0b0111011010011101001110, + xvfrintrm_s_op = 0b0111011010011101010001, + xvfrintrm_d_op = 0b0111011010011101010010, + xvfrintrp_s_op = 0b0111011010011101010101, + xvfrintrp_d_op = 0b0111011010011101010110, + xvfrintrz_s_op = 0b0111011010011101011001, + xvfrintrz_d_op = 0b0111011010011101011010, + xvfrintrne_s_op = 0b0111011010011101011101, + xvfrintrne_d_op = 0b0111011010011101011110, + xvfcvtl_s_h_op = 0b0111011010011101111010, + xvfcvth_s_h_op = 0b0111011010011101111011, + xvfcvtl_d_s_op = 0b0111011010011101111100, + xvfcvth_d_s_op = 0b0111011010011101111101, + xvffint_s_w_op = 0b0111011010011110000000, + xvffint_s_wu_op = 0b0111011010011110000001, + xvffint_d_l_op = 0b0111011010011110000010, + xvffint_d_lu_op = 0b0111011010011110000011, + xvffintl_d_w_op = 0b0111011010011110000100, + xvffinth_d_w_op = 0b0111011010011110000101, + xvftint_w_s_op = 0b0111011010011110001100, + xvftint_l_d_op = 0b0111011010011110001101, + xvftintrm_w_s_op = 0b0111011010011110001110, + xvftintrm_l_d_op = 0b0111011010011110001111, + xvftintrp_w_s_op = 0b0111011010011110010000, + xvftintrp_l_d_op = 0b0111011010011110010001, + xvftintrz_w_s_op = 0b0111011010011110010010, + xvftintrz_l_d_op = 0b0111011010011110010011, + xvftintrne_w_s_op = 0b0111011010011110010100, + xvftintrne_l_d_op = 0b0111011010011110010101, + xvftint_wu_s = 0b0111011010011110010110, + xvftint_lu_d = 0b0111011010011110010111, + xvftintrz_wu_f = 0b0111011010011110011100, + xvftintrz_lu_d = 0b0111011010011110011101, + xvftintl_l_s_op = 0b0111011010011110100000, + xvftinth_l_s_op = 0b0111011010011110100001, + xvftintrml_l_s_op = 0b0111011010011110100010, + xvftintrmh_l_s_op = 0b0111011010011110100011, + xvftintrpl_l_s_op = 0b0111011010011110100100, + xvftintrph_l_s_op = 0b0111011010011110100101, + xvftintrzl_l_s_op = 0b0111011010011110100110, + xvftintrzh_l_s_op = 0b0111011010011110100111, + xvftintrnel_l_s_op = 0b0111011010011110101000, + xvftintrneh_l_s_op = 0b0111011010011110101001, + xvreplgr2vr_b_op = 0b0111011010011111000000, + xvreplgr2vr_h_op = 0b0111011010011111000001, + xvreplgr2vr_w_op = 0b0111011010011111000010, + xvreplgr2vr_d_op = 0b0111011010011111000011, + vext2xv_h_b_op = 0b0111011010011111000100, + vext2xv_w_b_op = 0b0111011010011111000101, + vext2xv_d_b_op = 0b0111011010011111000110, + vext2xv_w_h_op = 0b0111011010011111000111, + vext2xv_d_h_op = 0b0111011010011111001000, + vext2xv_d_w_op = 0b0111011010011111001001, + vext2xv_hu_bu_op = 0b0111011010011111001010, + vext2xv_wu_bu_op = 0b0111011010011111001011, + vext2xv_du_bu_op = 0b0111011010011111001100, + vext2xv_wu_hu_op = 0b0111011010011111001101, + vext2xv_du_hu_op = 0b0111011010011111001110, + vext2xv_du_wu_op = 0b0111011010011111001111, + xvreplve0_b_op = 0b0111011100000111000000, + xvreplve0_h_op = 0b0111011100000111100000, + xvreplve0_w_op = 0b0111011100000111110000, + xvreplve0_d_op = 0b0111011100000111111000, + xvreplve0_q_op = 0b0111011100000111111100, + + unknow_ops22 = 0b1111111111111111111111 + }; + + // 21-bit opcode, highest 21 bits: bits[31...11] + enum ops21 { + vinsgr2vr_d_op = 0b011100101110101111110, + vpickve2gr_d_op = 0b011100101110111111110, + vpickve2gr_du_op = 0b011100101111001111110, + vreplvei_d_op = 0b011100101111011111110, + + unknow_ops21 = 0b111111111111111111111 + }; + + // 20-bit opcode, highest 20 bits: bits[31...12] + enum ops20 { + vinsgr2vr_w_op = 0b01110010111010111110, + vpickve2gr_w_op = 0b01110010111011111110, + vpickve2gr_wu_op = 0b01110010111100111110, + vreplvei_w_op = 0b01110010111101111110, + xvinsgr2vr_d_op = 0b01110110111010111110, + xvpickve2gr_d_op = 0b01110110111011111110, + xvpickve2gr_du_op = 0b01110110111100111110, + xvinsve0_d_op = 0b01110110111111111110, + xvpickve_d_op = 0b01110111000000111110, + + unknow_ops20 = 0b11111111111111111111 + }; + + // 19-bit opcode, highest 19 bits: bits[31...13] + enum ops19 { + vrotri_b_op = 0b0111001010100000001, + vinsgr2vr_h_op = 0b0111001011101011110, + vpickve2gr_h_op = 0b0111001011101111110, + vpickve2gr_hu_op = 0b0111001011110011110, + vreplvei_h_op = 0b0111001011110111110, + vbitclri_b_op = 0b0111001100010000001, + vbitseti_b_op = 0b0111001100010100001, + vbitrevi_b_op = 0b0111001100011000001, + vslli_b_op = 0b0111001100101100001, + vsrli_b_op = 0b0111001100110000001, + vsrai_b_op = 0b0111001100110100001, + xvrotri_b_op = 0b0111011010100000001, + xvinsgr2vr_w_op = 0b0111011011101011110, + xvpickve2gr_w_op = 0b0111011011101111110, + xvpickve2gr_wu_op = 0b0111011011110011110, + xvinsve0_w_op = 0b0111011011111111110, + xvpickve_w_op = 0b0111011100000011110, + xvbitclri_b_op = 0b0111011100010000001, + xvbitseti_b_op = 0b0111011100010100001, + xvbitrevi_b_op = 0b0111011100011000001, + xvslli_b_op = 0b0111011100101100001, + xvsrli_b_op = 0b0111011100110000001, + xvsrai_b_op = 0b0111011100110100001, + + unknow_ops19 = 0b1111111111111111111 + }; + + // 18-bit opcode, highest 18 bits: bits[31...14] + enum ops18 { + vrotri_h_op = 0b011100101010000001, + vinsgr2vr_b_op = 0b011100101110101110, + vpickve2gr_b_op = 0b011100101110111110, + vpickve2gr_bu_op = 0b011100101111001110, + vreplvei_b_op = 0b011100101111011110, + vbitclri_h_op = 0b011100110001000001, + vbitseti_h_op = 0b011100110001010001, + vbitrevi_h_op = 0b011100110001100001, + vslli_h_op = 0b011100110010110001, + vsrli_h_op = 0b011100110011000001, + vsrai_h_op = 0b011100110011010001, + vsrlni_b_h_op = 0b011100110100000001, + xvrotri_h_op = 0b011101101010000001, + xvbitclri_h_op = 0b011101110001000001, + xvbitseti_h_op = 0b011101110001010001, + xvbitrevi_h_op = 0b011101110001100001, + xvslli_h_op = 0b011101110010110001, + xvsrli_h_op = 0b011101110011000001, + xvsrai_h_op = 0b011101110011010001, + + unknow_ops18 = 0b111111111111111111 + }; + + // 17-bit opcode, highest 17 bits: bits[31...15] + enum ops17 { + asrtle_d_op = 0b00000000000000010, + asrtgt_d_op = 0b00000000000000011, + add_w_op = 0b00000000000100000, + add_d_op = 0b00000000000100001, + sub_w_op = 0b00000000000100010, + sub_d_op = 0b00000000000100011, + slt_op = 0b00000000000100100, + sltu_op = 0b00000000000100101, + maskeqz_op = 0b00000000000100110, + masknez_op = 0b00000000000100111, + nor_op = 0b00000000000101000, + and_op = 0b00000000000101001, + or_op = 0b00000000000101010, + xor_op = 0b00000000000101011, + orn_op = 0b00000000000101100, + andn_op = 0b00000000000101101, + sll_w_op = 0b00000000000101110, + srl_w_op = 0b00000000000101111, + sra_w_op = 0b00000000000110000, + sll_d_op = 0b00000000000110001, + srl_d_op = 0b00000000000110010, + sra_d_op = 0b00000000000110011, + rotr_w_op = 0b00000000000110110, + rotr_d_op = 0b00000000000110111, + mul_w_op = 0b00000000000111000, + mulh_w_op = 0b00000000000111001, + mulh_wu_op = 0b00000000000111010, + mul_d_op = 0b00000000000111011, + mulh_d_op = 0b00000000000111100, + mulh_du_op = 0b00000000000111101, + mulw_d_w_op = 0b00000000000111110, + mulw_d_wu_op = 0b00000000000111111, + div_w_op = 0b00000000001000000, + mod_w_op = 0b00000000001000001, + div_wu_op = 0b00000000001000010, + mod_wu_op = 0b00000000001000011, + div_d_op = 0b00000000001000100, + mod_d_op = 0b00000000001000101, + div_du_op = 0b00000000001000110, + mod_du_op = 0b00000000001000111, + crc_w_b_w_op = 0b00000000001001000, + crc_w_h_w_op = 0b00000000001001001, + crc_w_w_w_op = 0b00000000001001010, + crc_w_d_w_op = 0b00000000001001011, + crcc_w_b_w_op = 0b00000000001001100, + crcc_w_h_w_op = 0b00000000001001101, + crcc_w_w_w_op = 0b00000000001001110, + crcc_w_d_w_op = 0b00000000001001111, + break_op = 0b00000000001010100, + fadd_s_op = 0b00000001000000001, + fadd_d_op = 0b00000001000000010, + fsub_s_op = 0b00000001000000101, + fsub_d_op = 0b00000001000000110, + fmul_s_op = 0b00000001000001001, + fmul_d_op = 0b00000001000001010, + fdiv_s_op = 0b00000001000001101, + fdiv_d_op = 0b00000001000001110, + fmax_s_op = 0b00000001000010001, + fmax_d_op = 0b00000001000010010, + fmin_s_op = 0b00000001000010101, + fmin_d_op = 0b00000001000010110, + fmaxa_s_op = 0b00000001000011001, + fmaxa_d_op = 0b00000001000011010, + fmina_s_op = 0b00000001000011101, + fmina_d_op = 0b00000001000011110, + fscaleb_s_op = 0b00000001000100001, + fscaleb_d_op = 0b00000001000100010, + fcopysign_s_op = 0b00000001000100101, + fcopysign_d_op = 0b00000001000100110, + ldx_b_op = 0b00111000000000000, + ldx_h_op = 0b00111000000001000, + ldx_w_op = 0b00111000000010000, + ldx_d_op = 0b00111000000011000, + stx_b_op = 0b00111000000100000, + stx_h_op = 0b00111000000101000, + stx_w_op = 0b00111000000110000, + stx_d_op = 0b00111000000111000, + ldx_bu_op = 0b00111000001000000, + ldx_hu_op = 0b00111000001001000, + ldx_wu_op = 0b00111000001010000, + fldx_s_op = 0b00111000001100000, + fldx_d_op = 0b00111000001101000, + fstx_s_op = 0b00111000001110000, + fstx_d_op = 0b00111000001111000, + vldx_op = 0b00111000010000000, + vstx_op = 0b00111000010001000, + xvldx_op = 0b00111000010010000, + xvstx_op = 0b00111000010011000, + amswap_w_op = 0b00111000011000000, + amswap_d_op = 0b00111000011000001, + amadd_w_op = 0b00111000011000010, + amadd_d_op = 0b00111000011000011, + amand_w_op = 0b00111000011000100, + amand_d_op = 0b00111000011000101, + amor_w_op = 0b00111000011000110, + amor_d_op = 0b00111000011000111, + amxor_w_op = 0b00111000011001000, + amxor_d_op = 0b00111000011001001, + ammax_w_op = 0b00111000011001010, + ammax_d_op = 0b00111000011001011, + ammin_w_op = 0b00111000011001100, + ammin_d_op = 0b00111000011001101, + ammax_wu_op = 0b00111000011001110, + ammax_du_op = 0b00111000011001111, + ammin_wu_op = 0b00111000011010000, + ammin_du_op = 0b00111000011010001, + amswap_db_w_op = 0b00111000011010010, + amswap_db_d_op = 0b00111000011010011, + amadd_db_w_op = 0b00111000011010100, + amadd_db_d_op = 0b00111000011010101, + amand_db_w_op = 0b00111000011010110, + amand_db_d_op = 0b00111000011010111, + amor_db_w_op = 0b00111000011011000, + amor_db_d_op = 0b00111000011011001, + amxor_db_w_op = 0b00111000011011010, + amxor_db_d_op = 0b00111000011011011, + ammax_db_w_op = 0b00111000011011100, + ammax_db_d_op = 0b00111000011011101, + ammin_db_w_op = 0b00111000011011110, + ammin_db_d_op = 0b00111000011011111, + ammax_db_wu_op = 0b00111000011100000, + ammax_db_du_op = 0b00111000011100001, + ammin_db_wu_op = 0b00111000011100010, + ammin_db_du_op = 0b00111000011100011, + dbar_op = 0b00111000011100100, + ibar_op = 0b00111000011100101, + fldgt_s_op = 0b00111000011101000, + fldgt_d_op = 0b00111000011101001, + fldle_s_op = 0b00111000011101010, + fldle_d_op = 0b00111000011101011, + fstgt_s_op = 0b00111000011101100, + fstgt_d_op = 0b00111000011101101, + fstle_s_op = 0b00111000011101110, + fstle_d_op = 0b00111000011101111, + ldgt_b_op = 0b00111000011110000, + ldgt_h_op = 0b00111000011110001, + ldgt_w_op = 0b00111000011110010, + ldgt_d_op = 0b00111000011110011, + ldle_b_op = 0b00111000011110100, + ldle_h_op = 0b00111000011110101, + ldle_w_op = 0b00111000011110110, + ldle_d_op = 0b00111000011110111, + stgt_b_op = 0b00111000011111000, + stgt_h_op = 0b00111000011111001, + stgt_w_op = 0b00111000011111010, + stgt_d_op = 0b00111000011111011, + stle_b_op = 0b00111000011111100, + stle_h_op = 0b00111000011111101, + stle_w_op = 0b00111000011111110, + stle_d_op = 0b00111000011111111, + vseq_b_op = 0b01110000000000000, + vseq_h_op = 0b01110000000000001, + vseq_w_op = 0b01110000000000010, + vseq_d_op = 0b01110000000000011, + vsle_b_op = 0b01110000000000100, + vsle_h_op = 0b01110000000000101, + vsle_w_op = 0b01110000000000110, + vsle_d_op = 0b01110000000000111, + vsle_bu_op = 0b01110000000001000, + vsle_hu_op = 0b01110000000001001, + vsle_wu_op = 0b01110000000001010, + vsle_du_op = 0b01110000000001011, + vslt_b_op = 0b01110000000001100, + vslt_h_op = 0b01110000000001101, + vslt_w_op = 0b01110000000001110, + vslt_d_op = 0b01110000000001111, + vslt_bu_op = 0b01110000000010000, + vslt_hu_op = 0b01110000000010001, + vslt_wu_op = 0b01110000000010010, + vslt_du_op = 0b01110000000010011, + vadd_b_op = 0b01110000000010100, + vadd_h_op = 0b01110000000010101, + vadd_w_op = 0b01110000000010110, + vadd_d_op = 0b01110000000010111, + vsub_b_op = 0b01110000000011000, + vsub_h_op = 0b01110000000011001, + vsub_w_op = 0b01110000000011010, + vsub_d_op = 0b01110000000011011, + vabsd_b_op = 0b01110000011000000, + vabsd_h_op = 0b01110000011000001, + vabsd_w_op = 0b01110000011000010, + vabsd_d_op = 0b01110000011000011, + vmax_b_op = 0b01110000011100000, + vmax_h_op = 0b01110000011100001, + vmax_w_op = 0b01110000011100010, + vmax_d_op = 0b01110000011100011, + vmin_b_op = 0b01110000011100100, + vmin_h_op = 0b01110000011100101, + vmin_w_op = 0b01110000011100110, + vmin_d_op = 0b01110000011100111, + vmul_b_op = 0b01110000100001000, + vmul_h_op = 0b01110000100001001, + vmul_w_op = 0b01110000100001010, + vmul_d_op = 0b01110000100001011, + vmuh_b_op = 0b01110000100001100, + vmuh_h_op = 0b01110000100001101, + vmuh_w_op = 0b01110000100001110, + vmuh_d_op = 0b01110000100001111, + vmuh_bu_op = 0b01110000100010000, + vmuh_hu_op = 0b01110000100010001, + vmuh_wu_op = 0b01110000100010010, + vmuh_du_op = 0b01110000100010011, + vmulwev_h_b_op = 0b01110000100100000, + vmulwev_w_h_op = 0b01110000100100001, + vmulwev_d_w_op = 0b01110000100100010, + vmulwev_q_d_op = 0b01110000100100011, + vmulwod_h_b_op = 0b01110000100100100, + vmulwod_w_h_op = 0b01110000100100101, + vmulwod_d_w_op = 0b01110000100100110, + vmulwod_q_d_op = 0b01110000100100111, + vmadd_b_op = 0b01110000101010000, + vmadd_h_op = 0b01110000101010001, + vmadd_w_op = 0b01110000101010010, + vmadd_d_op = 0b01110000101010011, + vmsub_b_op = 0b01110000101010100, + vmsub_h_op = 0b01110000101010101, + vmsub_w_op = 0b01110000101010110, + vmsub_d_op = 0b01110000101010111, + vsll_b_op = 0b01110000111010000, + vsll_h_op = 0b01110000111010001, + vsll_w_op = 0b01110000111010010, + vsll_d_op = 0b01110000111010011, + vsrl_b_op = 0b01110000111010100, + vsrl_h_op = 0b01110000111010101, + vsrl_w_op = 0b01110000111010110, + vsrl_d_op = 0b01110000111010111, + vsra_b_op = 0b01110000111011000, + vsra_h_op = 0b01110000111011001, + vsra_w_op = 0b01110000111011010, + vsra_d_op = 0b01110000111011011, + vrotr_b_op = 0b01110000111011100, + vrotr_h_op = 0b01110000111011101, + vrotr_w_op = 0b01110000111011110, + vrotr_d_op = 0b01110000111011111, + vbitclr_b_op = 0b01110001000011000, + vbitclr_h_op = 0b01110001000011001, + vbitclr_w_op = 0b01110001000011010, + vbitclr_d_op = 0b01110001000011011, + vbitset_b_op = 0b01110001000011100, + vbitset_h_op = 0b01110001000011101, + vbitset_w_op = 0b01110001000011110, + vbitset_d_op = 0b01110001000011111, + vbitrev_b_op = 0b01110001000100000, + vbitrev_h_op = 0b01110001000100001, + vbitrev_w_op = 0b01110001000100010, + vbitrev_d_op = 0b01110001000100011, + vand_v_op = 0b01110001001001100, + vor_v_op = 0b01110001001001101, + vxor_v_op = 0b01110001001001110, + vnor_v_op = 0b01110001001001111, + vandn_v_op = 0b01110001001010000, + vorn_v_op = 0b01110001001010001, + vadd_q_op = 0b01110001001011010, + vsub_q_op = 0b01110001001011011, + vfadd_s_op = 0b01110001001100001, + vfadd_d_op = 0b01110001001100010, + vfsub_s_op = 0b01110001001100101, + vfsub_d_op = 0b01110001001100110, + vfmul_s_op = 0b01110001001110001, + vfmul_d_op = 0b01110001001110010, + vfdiv_s_op = 0b01110001001110101, + vfdiv_d_op = 0b01110001001110110, + vfmax_s_op = 0b01110001001111001, + vfmax_d_op = 0b01110001001111010, + vfmin_s_op = 0b01110001001111101, + vfmin_d_op = 0b01110001001111110, + vfcvt_h_s_op = 0b01110001010001100, + vfcvt_s_d_op = 0b01110001010001101, + vffint_s_l_op = 0b01110001010010000, + vftint_w_d_op = 0b01110001010010011, + vftintrm_w_d_op = 0b01110001010010100, + vftintrp_w_d_op = 0b01110001010010101, + vftintrz_w_d_op = 0b01110001010010110, + vftintrne_w_d_op = 0b01110001010010111, + vshuf_h_op = 0b01110001011110101, + vshuf_w_op = 0b01110001011110110, + vshuf_d_op = 0b01110001011110111, + vslti_bu_op = 0b01110010100010000, + vslti_hu_op = 0b01110010100010001, + vslti_wu_op = 0b01110010100010010, + vslti_du_op = 0b01110010100010011, + vaddi_bu_op = 0b01110010100010100, + vaddi_hu_op = 0b01110010100010101, + vaddi_wu_op = 0b01110010100010110, + vaddi_du_op = 0b01110010100010111, + vsubi_bu_op = 0b01110010100011000, + vsubi_hu_op = 0b01110010100011001, + vsubi_wu_op = 0b01110010100011010, + vsubi_du_op = 0b01110010100011011, + vrotri_w_op = 0b01110010101000001, + vbitclri_w_op = 0b01110011000100001, + vbitseti_w_op = 0b01110011000101001, + vbitrevi_w_op = 0b01110011000110001, + vslli_w_op = 0b01110011001011001, + vsrli_w_op = 0b01110011001100001, + vsrai_w_op = 0b01110011001101001, + vsrlni_h_w_op = 0b01110011010000001, + xvseq_b_op = 0b01110100000000000, + xvseq_h_op = 0b01110100000000001, + xvseq_w_op = 0b01110100000000010, + xvseq_d_op = 0b01110100000000011, + xvsle_b_op = 0b01110100000000100, + xvsle_h_op = 0b01110100000000101, + xvsle_w_op = 0b01110100000000110, + xvsle_d_op = 0b01110100000000111, + xvsle_bu_op = 0b01110100000001000, + xvsle_hu_op = 0b01110100000001001, + xvsle_wu_op = 0b01110100000001010, + xvsle_du_op = 0b01110100000001011, + xvslt_b_op = 0b01110100000001100, + xvslt_h_op = 0b01110100000001101, + xvslt_w_op = 0b01110100000001110, + xvslt_d_op = 0b01110100000001111, + xvslt_bu_op = 0b01110100000010000, + xvslt_hu_op = 0b01110100000010001, + xvslt_wu_op = 0b01110100000010010, + xvslt_du_op = 0b01110100000010011, + xvadd_b_op = 0b01110100000010100, + xvadd_h_op = 0b01110100000010101, + xvadd_w_op = 0b01110100000010110, + xvadd_d_op = 0b01110100000010111, + xvsub_b_op = 0b01110100000011000, + xvsub_h_op = 0b01110100000011001, + xvsub_w_op = 0b01110100000011010, + xvsub_d_op = 0b01110100000011011, + xvabsd_b_op = 0b01110100011000000, + xvabsd_h_op = 0b01110100011000001, + xvabsd_w_op = 0b01110100011000010, + xvabsd_d_op = 0b01110100011000011, + xvmax_b_op = 0b01110100011100000, + xvmax_h_op = 0b01110100011100001, + xvmax_w_op = 0b01110100011100010, + xvmax_d_op = 0b01110100011100011, + xvmin_b_op = 0b01110100011100100, + xvmin_h_op = 0b01110100011100101, + xvmin_w_op = 0b01110100011100110, + xvmin_d_op = 0b01110100011100111, + xvmul_b_op = 0b01110100100001000, + xvmul_h_op = 0b01110100100001001, + xvmul_w_op = 0b01110100100001010, + xvmul_d_op = 0b01110100100001011, + xvmuh_b_op = 0b01110100100001100, + xvmuh_h_op = 0b01110100100001101, + xvmuh_w_op = 0b01110100100001110, + xvmuh_d_op = 0b01110100100001111, + xvmuh_bu_op = 0b01110100100010000, + xvmuh_hu_op = 0b01110100100010001, + xvmuh_wu_op = 0b01110100100010010, + xvmuh_du_op = 0b01110100100010011, + xvmulwev_h_b_op = 0b01110100100100000, + xvmulwev_w_h_op = 0b01110100100100001, + xvmulwev_d_w_op = 0b01110100100100010, + xvmulwev_q_d_op = 0b01110100100100011, + xvmulwod_h_b_op = 0b01110100100100100, + xvmulwod_w_h_op = 0b01110100100100101, + xvmulwod_d_w_op = 0b01110100100100110, + xvmulwod_q_d_op = 0b01110100100100111, + xvmadd_b_op = 0b01110100101010000, + xvmadd_h_op = 0b01110100101010001, + xvmadd_w_op = 0b01110100101010010, + xvmadd_d_op = 0b01110100101010011, + xvmsub_b_op = 0b01110100101010100, + xvmsub_h_op = 0b01110100101010101, + xvmsub_w_op = 0b01110100101010110, + xvmsub_d_op = 0b01110100101010111, + xvsll_b_op = 0b01110100111010000, + xvsll_h_op = 0b01110100111010001, + xvsll_w_op = 0b01110100111010010, + xvsll_d_op = 0b01110100111010011, + xvsrl_b_op = 0b01110100111010100, + xvsrl_h_op = 0b01110100111010101, + xvsrl_w_op = 0b01110100111010110, + xvsrl_d_op = 0b01110100111010111, + xvsra_b_op = 0b01110100111011000, + xvsra_h_op = 0b01110100111011001, + xvsra_w_op = 0b01110100111011010, + xvsra_d_op = 0b01110100111011011, + xvrotr_b_op = 0b01110100111011100, + xvrotr_h_op = 0b01110100111011101, + xvrotr_w_op = 0b01110100111011110, + xvrotr_d_op = 0b01110100111011111, + xvbitclr_b_op = 0b01110101000011000, + xvbitclr_h_op = 0b01110101000011001, + xvbitclr_w_op = 0b01110101000011010, + xvbitclr_d_op = 0b01110101000011011, + xvbitset_b_op = 0b01110101000011100, + xvbitset_h_op = 0b01110101000011101, + xvbitset_w_op = 0b01110101000011110, + xvbitset_d_op = 0b01110101000011111, + xvbitrev_b_op = 0b01110101000100000, + xvbitrev_h_op = 0b01110101000100001, + xvbitrev_w_op = 0b01110101000100010, + xvbitrev_d_op = 0b01110101000100011, + xvand_v_op = 0b01110101001001100, + xvor_v_op = 0b01110101001001101, + xvxor_v_op = 0b01110101001001110, + xvnor_v_op = 0b01110101001001111, + xvandn_v_op = 0b01110101001010000, + xvorn_v_op = 0b01110101001010001, + xvadd_q_op = 0b01110101001011010, + xvsub_q_op = 0b01110101001011011, + xvfadd_s_op = 0b01110101001100001, + xvfadd_d_op = 0b01110101001100010, + xvfsub_s_op = 0b01110101001100101, + xvfsub_d_op = 0b01110101001100110, + xvfmul_s_op = 0b01110101001110001, + xvfmul_d_op = 0b01110101001110010, + xvfdiv_s_op = 0b01110101001110101, + xvfdiv_d_op = 0b01110101001110110, + xvfmax_s_op = 0b01110101001111001, + xvfmax_d_op = 0b01110101001111010, + xvfmin_s_op = 0b01110101001111101, + xvfmin_d_op = 0b01110101001111110, + xvfcvt_h_s_op = 0b01110101010001100, + xvfcvt_s_d_op = 0b01110101010001101, + xvffint_s_l_op = 0b01110101010010000, + xvftint_w_d_op = 0b01110101010010011, + xvftintrm_w_d_op = 0b01110101010010100, + xvftintrp_w_d_op = 0b01110101010010101, + xvftintrz_w_d_op = 0b01110101010010110, + xvftintrne_w_d_op = 0b01110101010010111, + xvshuf_h_op = 0b01110101011110101, + xvshuf_w_op = 0b01110101011110110, + xvshuf_d_op = 0b01110101011110111, + xvperm_w_op = 0b01110101011111010, + xvslti_bu_op = 0b01110110100010000, + xvslti_hu_op = 0b01110110100010001, + xvslti_wu_op = 0b01110110100010010, + xvslti_du_op = 0b01110110100010011, + xvaddi_bu_op = 0b01110110100010100, + xvaddi_hu_op = 0b01110110100010101, + xvaddi_wu_op = 0b01110110100010110, + xvaddi_du_op = 0b01110110100010111, + xvsubi_bu_op = 0b01110110100011000, + xvsubi_hu_op = 0b01110110100011001, + xvsubi_wu_op = 0b01110110100011010, + xvsubi_du_op = 0b01110110100011011, + xvrotri_w_op = 0b01110110101000001, + xvbitclri_w_op = 0b01110111000100001, + xvbitseti_w_op = 0b01110111000101001, + xvbitrevi_w_op = 0b01110111000110001, + xvslli_w_op = 0b01110111001011001, + xvsrli_w_op = 0b01110111001100001, + xvsrai_w_op = 0b01110111001101001, + + unknow_ops17 = 0b11111111111111111 + }; + + // 16-bit opcode, highest 16 bits: bits[31...16] + enum ops16 { + vrotri_d_op = 0b0111001010100001, + vbitclri_d_op = 0b0111001100010001, + vbitseti_d_op = 0b0111001100010101, + vbitrevi_d_op = 0b0111001100011001, + vslli_d_op = 0b0111001100101101, + vsrli_d_op = 0b0111001100110001, + vsrai_d_op = 0b0111001100110101, + vsrlni_w_d_op = 0b0111001101000001, + xvrotri_d_op = 0b0111011010100001, + xvbitclri_d_op = 0b0111011100010001, + xvbitseti_d_op = 0b0111011100010101, + xvbitrevi_d_op = 0b0111011100011001, + xvslli_d_op = 0b0111011100101101, + xvsrli_d_op = 0b0111011100110001, + xvsrai_d_op = 0b0111011100110101, + + unknow_ops16 = 0b1111111111111111 + }; + + // 15-bit opcode, highest 15 bits: bits[31...17] + enum ops15 { + vsrlni_d_q_op = 0b011100110100001, + + unknow_ops15 = 0b111111111111111 + }; + + // 14-bit opcode, highest 14 bits: bits[31...18] + enum ops14 { + alsl_w_op = 0b00000000000001, + bytepick_w_op = 0b00000000000010, + bytepick_d_op = 0b00000000000011, + alsl_d_op = 0b00000000001011, + slli_op = 0b00000000010000, + srli_op = 0b00000000010001, + srai_op = 0b00000000010010, + rotri_op = 0b00000000010011, + lddir_op = 0b00000110010000, + ldpte_op = 0b00000110010001, + vshuf4i_b_op = 0b01110011100100, + vshuf4i_h_op = 0b01110011100101, + vshuf4i_w_op = 0b01110011100110, + vshuf4i_d_op = 0b01110011100111, + vandi_b_op = 0b01110011110100, + vori_b_op = 0b01110011110101, + vxori_b_op = 0b01110011110110, + vnori_b_op = 0b01110011110111, + vldi_op = 0b01110011111000, + vpermi_w_op = 0b01110011111001, + xvshuf4i_b_op = 0b01110111100100, + xvshuf4i_h_op = 0b01110111100101, + xvshuf4i_w_op = 0b01110111100110, + xvshuf4i_d_op = 0b01110111100111, + xvandi_b_op = 0b01110111110100, + xvori_b_op = 0b01110111110101, + xvxori_b_op = 0b01110111110110, + xvnori_b_op = 0b01110111110111, + xvldi_op = 0b01110111111000, + xvpermi_w_op = 0b01110111111001, + xvpermi_d_op = 0b01110111111010, + xvpermi_q_op = 0b01110111111011, + + unknow_ops14 = 0b11111111111111 + }; + + // 12-bit opcode, highest 12 bits: bits[31...20] + enum ops12 { + fmadd_s_op = 0b000010000001, + fmadd_d_op = 0b000010000010, + fmsub_s_op = 0b000010000101, + fmsub_d_op = 0b000010000110, + fnmadd_s_op = 0b000010001001, + fnmadd_d_op = 0b000010001010, + fnmsub_s_op = 0b000010001101, + fnmsub_d_op = 0b000010001110, + vfmadd_s_op = 0b000010010001, + vfmadd_d_op = 0b000010010010, + vfmsub_s_op = 0b000010010101, + vfmsub_d_op = 0b000010010110, + vfnmadd_s_op = 0b000010011001, + vfnmadd_d_op = 0b000010011010, + vfnmsub_s_op = 0b000010011101, + vfnmsub_d_op = 0b000010011110, + xvfmadd_s_op = 0b000010100001, + xvfmadd_d_op = 0b000010100010, + xvfmsub_s_op = 0b000010100101, + xvfmsub_d_op = 0b000010100110, + xvfnmadd_s_op = 0b000010101001, + xvfnmadd_d_op = 0b000010101010, + xvfnmsub_s_op = 0b000010101101, + xvfnmsub_d_op = 0b000010101110, + fcmp_cond_s_op = 0b000011000001, + fcmp_cond_d_op = 0b000011000010, + vfcmp_cond_s_op = 0b000011000101, + vfcmp_cond_d_op = 0b000011000110, + xvfcmp_cond_s_op = 0b000011001001, + xvfcmp_cond_d_op = 0b000011001010, + fsel_op = 0b000011010000, + vbitsel_v_op = 0b000011010001, + xvbitsel_v_op = 0b000011010010, + vshuf_b_op = 0b000011010101, + xvshuf_b_op = 0b000011010110, + + unknow_ops12 = 0b111111111111 + }; + + // 10-bit opcode, highest 10 bits: bits[31...22] + enum ops10 { + bstr_w_op = 0b0000000001, + bstrins_d_op = 0b0000000010, + bstrpick_d_op = 0b0000000011, + slti_op = 0b0000001000, + sltui_op = 0b0000001001, + addi_w_op = 0b0000001010, + addi_d_op = 0b0000001011, + lu52i_d_op = 0b0000001100, + andi_op = 0b0000001101, + ori_op = 0b0000001110, + xori_op = 0b0000001111, + ld_b_op = 0b0010100000, + ld_h_op = 0b0010100001, + ld_w_op = 0b0010100010, + ld_d_op = 0b0010100011, + st_b_op = 0b0010100100, + st_h_op = 0b0010100101, + st_w_op = 0b0010100110, + st_d_op = 0b0010100111, + ld_bu_op = 0b0010101000, + ld_hu_op = 0b0010101001, + ld_wu_op = 0b0010101010, + preld_op = 0b0010101011, + fld_s_op = 0b0010101100, + fst_s_op = 0b0010101101, + fld_d_op = 0b0010101110, + fst_d_op = 0b0010101111, + vld_op = 0b0010110000, + vst_op = 0b0010110001, + xvld_op = 0b0010110010, + xvst_op = 0b0010110011, + ldl_w_op = 0b0010111000, + ldr_w_op = 0b0010111001, + + unknow_ops10 = 0b1111111111 + }; + + // 8-bit opcode, highest 8 bits: bits[31...22] + enum ops8 { + ll_w_op = 0b00100000, + sc_w_op = 0b00100001, + ll_d_op = 0b00100010, + sc_d_op = 0b00100011, + ldptr_w_op = 0b00100100, + stptr_w_op = 0b00100101, + ldptr_d_op = 0b00100110, + stptr_d_op = 0b00100111, + + unknow_ops8 = 0b11111111 + }; + + // 7-bit opcode, highest 7 bits: bits[31...25] + enum ops7 { + lu12i_w_op = 0b0001010, + lu32i_d_op = 0b0001011, + pcaddi_op = 0b0001100, + pcalau12i_op = 0b0001101, + pcaddu12i_op = 0b0001110, + pcaddu18i_op = 0b0001111, + + unknow_ops7 = 0b1111111 + }; + + // 6-bit opcode, highest 6 bits: bits[31...25] + enum ops6 { + addu16i_d_op = 0b000100, + beqz_op = 0b010000, + bnez_op = 0b010001, + bccondz_op = 0b010010, + jirl_op = 0b010011, + b_op = 0b010100, + bl_op = 0b010101, + beq_op = 0b010110, + bne_op = 0b010111, + blt_op = 0b011000, + bge_op = 0b011001, + bltu_op = 0b011010, + bgeu_op = 0b011011, + + unknow_ops6 = 0b111111 + }; + + enum fcmp_cond { + fcmp_caf = 0x00, + fcmp_cun = 0x08, + fcmp_ceq = 0x04, + fcmp_cueq = 0x0c, + fcmp_clt = 0x02, + fcmp_cult = 0x0a, + fcmp_cle = 0x06, + fcmp_cule = 0x0e, + fcmp_cne = 0x10, + fcmp_cor = 0x14, + fcmp_cune = 0x18, + fcmp_saf = 0x01, + fcmp_sun = 0x09, + fcmp_seq = 0x05, + fcmp_sueq = 0x0d, + fcmp_slt = 0x03, + fcmp_sult = 0x0b, + fcmp_sle = 0x07, + fcmp_sule = 0x0f, + fcmp_sne = 0x11, + fcmp_sor = 0x15, + fcmp_sune = 0x19 + }; + + enum Condition { + zero , + notZero , + equal , + notEqual , + less , + lessEqual , + greater , + greaterEqual , + below , + belowEqual , + above , + aboveEqual + }; + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + enum WhichOperand { + // input to locate_operand, and format code for relocations + imm_operand = 0, // embedded 32-bit|64-bit immediate operand + disp32_operand = 1, // embedded 32-bit displacement or address + call32_operand = 2, // embedded 32-bit self-relative displacement + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + _WhichOperand_limit = 4 + }; + + static int low (int x, int l) { return bitfield(x, 0, l); } + static int low16(int x) { return low(x, 16); } + static int low26(int x) { return low(x, 26); } + + static int high (int x, int l) { return bitfield(x, 32-l, l); } + static int high16(int x) { return high(x, 16); } + static int high6 (int x) { return high(x, 6); } + + + static ALWAYSINLINE void patch(address a, int length, uint32_t val) { + guarantee(val < (1ULL << length), "Field too big for insn"); + guarantee(length > 0, "length > 0"); + unsigned target = *(unsigned *)a; + target = (target >> length) << length; + target |= val; + *(unsigned *)a = target; + } + + protected: + // help methods for instruction ejection + + // 2R-type + // 31 10 9 5 4 0 + // | opcode | rj | rd | + static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } + + // 3R-type + // 31 15 14 10 9 5 4 0 + // | opcode | rk | rj | rd | + static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } + + // 4R-type + // 31 20 19 15 14 10 9 5 4 0 + // | opcode | ra | rk | rj | rd | + static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } + + // 2RI1-type + // 31 11 10 9 5 4 0 + // | opcode | I1 | vj | rd | + static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } + + // 2RI2-type + // 31 12 11 10 9 5 4 0 + // | opcode | I2 | vj | rd | + static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } + + // 2RI3-type + // 31 13 12 10 9 5 4 0 + // | opcode | I3 | vj | vd | + static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } + + // 2RI4-type + // 31 14 13 10 9 5 4 0 + // | opcode | I4 | vj | vd | + static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } + + // 2RI5-type + // 31 15 14 10 9 5 4 0 + // | opcode | I5 | vj | vd | + static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } + + // 2RI6-type + // 31 16 15 10 9 5 4 0 + // | opcode | I6 | vj | vd | + static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } + + // 2RI7-type + // 31 17 16 10 9 5 4 0 + // | opcode | I7 | vj | vd | + static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } + + // 2RI8-type + // 31 18 17 10 9 5 4 0 + // | opcode | I8 | rj | rd | + static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } + + // 2RI12-type + // 31 22 21 10 9 5 4 0 + // | opcode | I12 | rj | rd | + static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } + + + // 2RI14-type + // 31 24 23 10 9 5 4 0 + // | opcode | I14 | rj | rd | + static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } + + // 2RI16-type + // 31 26 25 10 9 5 4 0 + // | opcode | I16 | rj | rd | + static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } + + // 1RI13-type (?) + // 31 18 17 5 4 0 + // | opcode | I13 | vd | + static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } + + // 1RI20-type (?) + // 31 25 24 5 4 0 + // | opcode | I20 | rd | + static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } + + // 1RI21-type + // 31 26 25 10 9 5 4 0 + // | opcode | I21[15:0] | rj |I21[20:16]| + static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } + + // I26-type + // 31 26 25 10 9 0 + // | opcode | I26[15:0] | I26[25:16] | + static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } + + // imm15 + // 31 15 14 0 + // | opcode | I15 | + static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } + + + // get the offset field of beq, bne, blt[u], bge[u] instruction + int offset16(address entry) { + assert(is_simm16((entry - pc()) / 4), "change this code"); + if (!is_simm16((entry - pc()) / 4)) { + tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + + // get the offset field of beqz, bnez instruction + int offset21(address entry) { + assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); + if (!is_simm((int)(entry - pc()) / 4, 21)) { + tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + + // get the offset field of b instruction + int offset26(address entry) { + assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); + if (!is_simm((int)(entry - pc()) / 4, 26)) { + tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + +public: + using AbstractAssembler::offset; + + //sign expand with the sign bit is h + static int expand(int x, int h) { return -(x & (1<> 16; + } + + static int split_high16(int x) { + return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; + } + + static int split_low20(int x) { + return (x & 0xfffff); + } + + // Convert 20-bit x to a sign-extended 20-bit integer + static int simm20(int x) { + assert(x == (x & 0xFFFFF), "must be 20-bit only"); + return (x << 12) >> 12; + } + + static int split_low12(int x) { + return (x & 0xfff); + } + + static inline void split_simm32(jlong si32, jint& si12, jint& si20) { + si12 = ((jint)(si32 & 0xfff) << 20) >> 20; + si32 += (si32 & 0x800) << 1; + si20 = si32 >> 12; + } + + static inline void split_simm38(jlong si38, jint& si18, jint& si20) { + si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; + si38 += (si38 & 0x20000) << 1; + si20 = si38 >> 18; + } + + // Convert 12-bit x to a sign-extended 12-bit integer + static int simm12(int x) { + assert(x == (x & 0xFFF), "must be 12-bit only"); + return (x << 20) >> 20; + } + + // Convert 26-bit x to a sign-extended 26-bit integer + static int simm26(int x) { + assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); + return (x << 6) >> 6; + } + + static intptr_t merge(intptr_t x0, intptr_t x12) { + //lu12i, ori + return (((x12 << 12) | x0) << 32) >> 32; + } + + static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { + //lu32i, lu12i, ori + return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; + } + + static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { + //lu52i, lu32i, lu12i, ori + return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; + } + + // Test if x is within signed immediate range for nbits. + static bool is_simm (int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm16(int x) { return is_simm(x, 16); } + static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + +public: + + void flush() { + AbstractAssembler::flush(); + } + + inline void emit_int32(int x) { + AbstractAssembler::emit_int32(x); + } + + inline void emit_data(int x) { emit_int32(x); } + inline void emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_int32(x); + } + + inline void emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_int32(x); + } + + + // Generic instructions + // Does 32bit or 64bit as needed for the platform. In some sense these + // belong in macro assembler but there is no need for both varieties to exist + + void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } + void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } + void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } + + void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } + + void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } + void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } + + void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + + void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } + + void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + + void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + + void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } + + void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } + + void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } + void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } + void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } + void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } + void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } + void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } + void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } + void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } + void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } + void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } + void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } + void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } + + void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } + + void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } + void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } + + void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } + void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + + void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + + void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + + void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } + + void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } + void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } + void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } + void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } + void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } + void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } + + void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + + void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } + void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + + void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + + void ld_b (Register rd, Address src); + void ld_bu (Register rd, Address src); + void ld_d (Register rd, Address src); + void ld_h (Register rd, Address src); + void ld_hu (Register rd, Address src); + void ll_w (Register rd, Address src); + void ll_d (Register rd, Address src); + void ld_wu (Register rd, Address src); + void ld_w (Register rd, Address src); + void st_b (Register rd, Address dst); + void st_d (Register rd, Address dst); + void st_w (Register rd, Address dst); + void sc_w (Register rd, Address dst); + void sc_d (Register rd, Address dst); + void st_h (Register rd, Address dst); + void fld_s (FloatRegister fd, Address src); + void fld_d (FloatRegister fd, Address src); + void fst_s (FloatRegister fd, Address dst); + void fst_d (FloatRegister fd, Address dst); + + void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } + void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void dbar(int hint) { + assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); + + if (os::is_ActiveCoresMP()) + andi(R0, R0, 0); + else + emit_int32(insn_I15(dbar_op, hint)); + } + void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } + + void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } + void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } + void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } + void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } + + void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } + + void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } + void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } + + + void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + + void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } + void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } + void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } + void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } + void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } + void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } + void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } + void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } + void b(address entry) { b(offset26(entry)); } + void bl(address entry) { bl(offset26(entry)); } + void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } + void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } + + void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } + void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } + void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } + void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } + void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } + void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } + void beqz (Register rj, Label& L) { beqz (rj, target(L)); } + void bnez (Register rj, Label& L) { bnez (rj, target(L)); } + void b(Label& L) { b(target(L)); } + void bl(Label& L) { bl(target(L)); } + void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } + void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } + + typedef enum { + // hint[4] + Completion = 0, + Ordering = (1 << 4), + + // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. + // hint[3:2] and hint[1:0] + LoadLoad = ((1 << 3) | (1 << 1)), + LoadStore = ((1 << 3) | (1 << 0)), + StoreLoad = ((1 << 2) | (1 << 1)), + StoreStore = ((1 << 2) | (1 << 0)), + AnyAny = ((3 << 2) | (3 << 0)), + } Membar_mask_bits; + + // Serializes memory and blows flags + void membar(Membar_mask_bits hint) { + assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); + assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); + dbar(Ordering | (~hint & 0xf)); + } + + // LSX and LASX +#define ASSERT_LSX assert(UseLSX, ""); +#define ASSERT_LASX assert(UseLASX, ""); + + void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } + void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } + + void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } + + void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } + + void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } + void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } + + void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } + + void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } + + void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } + void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } + + void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } + void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } + + void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } + + void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } + + void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } + + void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} + void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} + + void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} + void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} + + void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } + void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } + + void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } + void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } + +#undef ASSERT_LSX +#undef ASSERT_LASX + +public: + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} + + // Decoding + static address locate_operand(address inst, WhichOperand which); + static address locate_next_instruction(address inst); +}; + +#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp new file mode 100644 index 00000000000..9ca0cd45047 --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp new file mode 100644 index 00000000000..c15344eb390 --- /dev/null +++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP +#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. + // we use LoongArch, so return true + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since LoongArch CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { return *(u8*)p; } + + static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } + static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } + static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since LoongArch64 CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#include OS_CPU_HEADER_INLINE(bytes) + +#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp new file mode 100644 index 00000000000..c0eeb639626 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp @@ -0,0 +1,344 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/javaClasses.hpp" +#include "nativeInst_loongarch.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define __ ce->masm()-> + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(SCR2, m); + ce->store_parameter(SCR2, 1); + ce->store_parameter(_bci, 0); + __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) + : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ move(SCR1, _index->as_register()); + } else { + __ li(SCR1, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + assert(_array != NULL, "sanity"); + __ move(SCR2, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } + __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, + CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ move(A3, _klass_reg->as_register()); + __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == S0, "length must in S0,"); + assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); + __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == S0, "length must in S0,"); + assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); + __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) + : MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ lipc(RA, _continuation); + __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "LoongArch64 should not use C1 runtime patching"); +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a scratch register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ move(SCR1, _obj->as_register()); + } + __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + // + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + // push parameters + // (src, src_pos, dest, destPos, length) + Register r[5]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int i = 0; i < 5 ; i++ ) { + VMReg r_1 = args[i].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ stptr_d (r[i], SP, st_off); + } else { + assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + address call = __ trampoline_call(resolve); + if (call == NULL) { + ce->bailout("trampoline stub overflow"); + return; + } + ce->add_call_info_here(info()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); + __ increment(Address(SCR2)); + } +#endif + + __ b(_continuation); +} + +#undef __ diff --git a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp new file mode 100644 index 00000000000..1140e44431d --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// FIXME: There are no callee-saved + +// registers +enum { + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission + + pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls + + pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, + pd_last_callee_saved_reg = 21, + + pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, + + pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan + pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_byte_reg = 0, + pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, +}; + +// Encoding of float value in debug info. This is true on x86 where +// floats are extended to doubles when stored in the stack, false for +// LoongArch64 where floats and doubles are stored in their native form. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp new file mode 100644 index 00000000000..bd8578c72a8 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP + +// No FPU stack on LoongArch +class FpuStackSim; + +#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp new file mode 100644 index 00000000000..1a89c437a83 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +//-------------------------------------------------------- +// FpuStackSim +//-------------------------------------------------------- + +// No FPU stack on LoongArch64 +#include "precompiled.hpp" diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp new file mode 100644 index 00000000000..4f0cf053617 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP + +// On LoongArch64 the frame looks as follows: +// +// +-----------------------------+---------+----------------------------------------+----------------+----------- +// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . +// +-----------------------------+---------+----------------------------------------+----------------+----------- + + public: + static const int pd_c_runtime_reserved_arg_size; + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 16, + nof_reg_args = 8 + }; + + public: + static LIR_Opr receiver_opr; + + static LIR_Opr r0_opr; + static LIR_Opr ra_opr; + static LIR_Opr tp_opr; + static LIR_Opr sp_opr; + static LIR_Opr a0_opr; + static LIR_Opr a1_opr; + static LIR_Opr a2_opr; + static LIR_Opr a3_opr; + static LIR_Opr a4_opr; + static LIR_Opr a5_opr; + static LIR_Opr a6_opr; + static LIR_Opr a7_opr; + static LIR_Opr t0_opr; + static LIR_Opr t1_opr; + static LIR_Opr t2_opr; + static LIR_Opr t3_opr; + static LIR_Opr t4_opr; + static LIR_Opr t5_opr; + static LIR_Opr t6_opr; + static LIR_Opr t7_opr; + static LIR_Opr t8_opr; + static LIR_Opr rx_opr; + static LIR_Opr fp_opr; + static LIR_Opr s0_opr; + static LIR_Opr s1_opr; + static LIR_Opr s2_opr; + static LIR_Opr s3_opr; + static LIR_Opr s4_opr; + static LIR_Opr s5_opr; + static LIR_Opr s6_opr; + static LIR_Opr s7_opr; + static LIR_Opr s8_opr; + + static LIR_Opr ra_oop_opr; + static LIR_Opr a0_oop_opr; + static LIR_Opr a1_oop_opr; + static LIR_Opr a2_oop_opr; + static LIR_Opr a3_oop_opr; + static LIR_Opr a4_oop_opr; + static LIR_Opr a5_oop_opr; + static LIR_Opr a6_oop_opr; + static LIR_Opr a7_oop_opr; + static LIR_Opr t0_oop_opr; + static LIR_Opr t1_oop_opr; + static LIR_Opr t2_oop_opr; + static LIR_Opr t3_oop_opr; + static LIR_Opr t4_oop_opr; + static LIR_Opr t5_oop_opr; + static LIR_Opr t6_oop_opr; + static LIR_Opr t7_oop_opr; + static LIR_Opr t8_oop_opr; + static LIR_Opr fp_oop_opr; + static LIR_Opr s0_oop_opr; + static LIR_Opr s1_oop_opr; + static LIR_Opr s2_oop_opr; + static LIR_Opr s3_oop_opr; + static LIR_Opr s4_oop_opr; + static LIR_Opr s5_oop_opr; + static LIR_Opr s6_oop_opr; + static LIR_Opr s7_oop_opr; + static LIR_Opr s8_oop_opr; + + static LIR_Opr scr1_opr; + static LIR_Opr scr2_opr; + static LIR_Opr scr1_long_opr; + static LIR_Opr scr2_long_opr; + + static LIR_Opr a0_metadata_opr; + static LIR_Opr a1_metadata_opr; + static LIR_Opr a2_metadata_opr; + static LIR_Opr a3_metadata_opr; + static LIR_Opr a4_metadata_opr; + static LIR_Opr a5_metadata_opr; + + static LIR_Opr long0_opr; + static LIR_Opr long1_opr; + static LIR_Opr fpu0_float_opr; + static LIR_Opr fpu0_double_opr; + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + // VMReg name for spilled physical FPU stack slot n + static VMReg fpu_regname (int n); + + static bool is_caller_save_register(LIR_Opr opr) { return true; } + static bool is_caller_save_register(Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + static int last_byte_reg() { return pd_last_byte_reg; } + +#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp new file mode 100644 index 00000000000..3b608990714 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp @@ -0,0 +1,354 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + Register reg2 = r_2->as_Register(); + assert(reg2 == reg, "must be same register"); + opr = as_long_opr(reg); + } else if (is_reference_type(type)) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else if (type == T_ADDRESS) { + opr = as_address_opr(reg); + } else { + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + opr = LIR_OprFact::double_fpu(num); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +LIR_Opr FrameMap::r0_opr; +LIR_Opr FrameMap::ra_opr; +LIR_Opr FrameMap::tp_opr; +LIR_Opr FrameMap::sp_opr; +LIR_Opr FrameMap::a0_opr; +LIR_Opr FrameMap::a1_opr; +LIR_Opr FrameMap::a2_opr; +LIR_Opr FrameMap::a3_opr; +LIR_Opr FrameMap::a4_opr; +LIR_Opr FrameMap::a5_opr; +LIR_Opr FrameMap::a6_opr; +LIR_Opr FrameMap::a7_opr; +LIR_Opr FrameMap::t0_opr; +LIR_Opr FrameMap::t1_opr; +LIR_Opr FrameMap::t2_opr; +LIR_Opr FrameMap::t3_opr; +LIR_Opr FrameMap::t4_opr; +LIR_Opr FrameMap::t5_opr; +LIR_Opr FrameMap::t6_opr; +LIR_Opr FrameMap::t7_opr; +LIR_Opr FrameMap::t8_opr; +LIR_Opr FrameMap::rx_opr; +LIR_Opr FrameMap::fp_opr; +LIR_Opr FrameMap::s0_opr; +LIR_Opr FrameMap::s1_opr; +LIR_Opr FrameMap::s2_opr; +LIR_Opr FrameMap::s3_opr; +LIR_Opr FrameMap::s4_opr; +LIR_Opr FrameMap::s5_opr; +LIR_Opr FrameMap::s6_opr; +LIR_Opr FrameMap::s7_opr; +LIR_Opr FrameMap::s8_opr; + +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::ra_oop_opr; +LIR_Opr FrameMap::a0_oop_opr; +LIR_Opr FrameMap::a1_oop_opr; +LIR_Opr FrameMap::a2_oop_opr; +LIR_Opr FrameMap::a3_oop_opr; +LIR_Opr FrameMap::a4_oop_opr; +LIR_Opr FrameMap::a5_oop_opr; +LIR_Opr FrameMap::a6_oop_opr; +LIR_Opr FrameMap::a7_oop_opr; +LIR_Opr FrameMap::t0_oop_opr; +LIR_Opr FrameMap::t1_oop_opr; +LIR_Opr FrameMap::t2_oop_opr; +LIR_Opr FrameMap::t3_oop_opr; +LIR_Opr FrameMap::t4_oop_opr; +LIR_Opr FrameMap::t5_oop_opr; +LIR_Opr FrameMap::t6_oop_opr; +LIR_Opr FrameMap::t7_oop_opr; +LIR_Opr FrameMap::t8_oop_opr; +LIR_Opr FrameMap::fp_oop_opr; +LIR_Opr FrameMap::s0_oop_opr; +LIR_Opr FrameMap::s1_oop_opr; +LIR_Opr FrameMap::s2_oop_opr; +LIR_Opr FrameMap::s3_oop_opr; +LIR_Opr FrameMap::s4_oop_opr; +LIR_Opr FrameMap::s5_oop_opr; +LIR_Opr FrameMap::s6_oop_opr; +LIR_Opr FrameMap::s7_oop_opr; +LIR_Opr FrameMap::s8_oop_opr; + +LIR_Opr FrameMap::scr1_opr; +LIR_Opr FrameMap::scr2_opr; +LIR_Opr FrameMap::scr1_long_opr; +LIR_Opr FrameMap::scr2_long_opr; + +LIR_Opr FrameMap::a0_metadata_opr; +LIR_Opr FrameMap::a1_metadata_opr; +LIR_Opr FrameMap::a2_metadata_opr; +LIR_Opr FrameMap::a3_metadata_opr; +LIR_Opr FrameMap::a4_metadata_opr; +LIR_Opr FrameMap::a5_metadata_opr; + +LIR_Opr FrameMap::long0_opr; +LIR_Opr FrameMap::long1_opr; +LIR_Opr FrameMap::fpu0_float_opr; +LIR_Opr FrameMap::fpu0_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; + +//-------------------------------------------------------- +// FrameMap +//-------------------------------------------------------- + +void FrameMap::initialize() { + assert(!_init_done, "once"); + int i = 0; + + // caller save register + map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; + + // callee save register + map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; + + // special register + map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase + map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread + map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp + map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp + map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra + map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp + + // tmp register + map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 + map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 + + scr1_opr = t7_opr; + scr2_opr = t4_opr; + scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); + scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); + + long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); + long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); + + fpu0_float_opr = LIR_OprFact::single_fpu(0); + fpu0_double_opr = LIR_OprFact::double_fpu(0); + + // scr1, scr2 not included + _caller_save_cpu_regs[0] = a0_opr; + _caller_save_cpu_regs[1] = a1_opr; + _caller_save_cpu_regs[2] = a2_opr; + _caller_save_cpu_regs[3] = a3_opr; + _caller_save_cpu_regs[4] = a4_opr; + _caller_save_cpu_regs[5] = a5_opr; + _caller_save_cpu_regs[6] = a6_opr; + _caller_save_cpu_regs[7] = a7_opr; + _caller_save_cpu_regs[8] = t0_opr; + _caller_save_cpu_regs[9] = t1_opr; + _caller_save_cpu_regs[10] = t2_opr; + _caller_save_cpu_regs[11] = t3_opr; + _caller_save_cpu_regs[12] = t5_opr; + _caller_save_cpu_regs[13] = t6_opr; + _caller_save_cpu_regs[14] = t8_opr; + + for (int i = 0; i < 8; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } + + _init_done = true; + + ra_oop_opr = as_oop_opr(RA); + a0_oop_opr = as_oop_opr(A0); + a1_oop_opr = as_oop_opr(A1); + a2_oop_opr = as_oop_opr(A2); + a3_oop_opr = as_oop_opr(A3); + a4_oop_opr = as_oop_opr(A4); + a5_oop_opr = as_oop_opr(A5); + a6_oop_opr = as_oop_opr(A6); + a7_oop_opr = as_oop_opr(A7); + t0_oop_opr = as_oop_opr(T0); + t1_oop_opr = as_oop_opr(T1); + t2_oop_opr = as_oop_opr(T2); + t3_oop_opr = as_oop_opr(T3); + t4_oop_opr = as_oop_opr(T4); + t5_oop_opr = as_oop_opr(T5); + t6_oop_opr = as_oop_opr(T6); + t7_oop_opr = as_oop_opr(T7); + t8_oop_opr = as_oop_opr(T8); + fp_oop_opr = as_oop_opr(FP); + s0_oop_opr = as_oop_opr(S0); + s1_oop_opr = as_oop_opr(S1); + s2_oop_opr = as_oop_opr(S2); + s3_oop_opr = as_oop_opr(S3); + s4_oop_opr = as_oop_opr(S4); + s5_oop_opr = as_oop_opr(S5); + s6_oop_opr = as_oop_opr(S6); + s7_oop_opr = as_oop_opr(S7); + s8_oop_opr = as_oop_opr(S8); + + a0_metadata_opr = as_metadata_opr(A0); + a1_metadata_opr = as_metadata_opr(A1); + a2_metadata_opr = as_metadata_opr(A2); + a3_metadata_opr = as_metadata_opr(A3); + a4_metadata_opr = as_metadata_opr(A4); + a5_metadata_opr = as_metadata_opr(A5); + + sp_opr = as_pointer_opr(SP); + fp_opr = as_pointer_opr(FP); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (int i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + // for sp, based address use this: + // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); + return Address(SP, in_bytes(sp_offset)); +} + +// ----------------mapping----------------------- +// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + +// Frame for simple leaf methods (quick entries) +// +// +----------+ +// | ret addr | <- TOS +// +----------+ +// | args | +// | ...... | + +// Frame for standard methods +// +// | .........| <- TOS +// | locals | +// +----------+ +// | old fp, | <- RFP +// +----------+ +// | ret addr | +// +----------+ +// | args | +// | .........| + +// For OopMaps, map a local variable or spill index to an VMRegImpl name. +// This is the offset from sp() in the frame of the slot for the index, +// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) +// +// framesize + +// stack0 stack0 0 <- VMReg +// | | | +// ...........|..............|.............| +// 0 1 2 3 x x 4 5 6 ... | <- local indices +// ^ ^ sp() ( x x indicate link +// | | and return addr) +// arguments non-argument locals + +VMReg FrameMap::fpu_regname(int n) { + // Return the OptoReg name for the fpu stack slot "n" + // A spilled fpu stack slot comprises to two single-word OptoReg's. + return as_FloatRegister(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 +} + +bool FrameMap::validate_frame() { + return true; +} diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp new file mode 100644 index 00000000000..40d9408f1fa --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + + private: + int array_element_size(BasicType type) const; + + void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, + int dest_index, bool pop_fpu_stack); + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + + address int_constant(jlong n); + + bool is_literal_address(LIR_Address* addr); + + // Ensure we have a valid Address (base+offset) to a stack-slot. + Address stack_slot_address(int index, uint shift, int adjust = 0); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval, bool sign); + void casl(Register addr, Register newval, Register cmpval); + + void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); + + static const int max_tableswitches = 20; + struct tableswitch switches[max_tableswitches]; + int tableswitch_count; + + void init() { tableswitch_count = 0; } + + void deoptimize_trap(CodeEmitInfo *info); + + enum { + // call stub: CompiledStaticCall::to_interp_stub_size() + + // CompiledStaticCall::to_trampoline_stub_size() + _call_stub_size = 13 * NativeInstruction::nop_instruction_size, + _call_aot_stub_size = 0, + _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size + }; + +public: + void store_parameter(Register r, int offset_from_sp_in_words); + void store_parameter(jint c, int offset_from_sp_in_words); + void store_parameter(jobject c, int offset_from_sp_in_words); + +#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp new file mode 100644 index 00000000000..c989e25c3a5 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp @@ -0,0 +1,3387 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/gc_globals.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions? + +#define __ _masm-> + +static void select_different_registers(Register preserve, Register extra, + Register &tmp1, Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + +static void select_different_registers(Register preserve, Register extra, + Register &tmp1, Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +//--------------fpu register translations----------------------- + +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +static jlong as_long(LIR_Opr data) { + jlong result; + switch (data->type()) { + case T_INT: + result = (data->as_jint()); + break; + case T_LONG: + result = (data->as_jlong()); + break; + default: + ShouldNotReachHere(); + result = 0; // unreachable + } + return result; +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + Register base = addr->base()->as_pointer_register(); + LIR_Opr opr = addr->index(); + if (opr->is_cpu_register()) { + Register index; + if (opr->is_single_cpu()) + index = opr->as_register(); + else + index = opr->as_register_lo(); + assert(addr->disp() == 0, "must be"); + return Address(base, index, Address::ScaleFactor(addr->scale())); + } else { + assert(addr->scale() == 0, "must be"); + return Address(base, addr->disp()); + } + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + return as_Address(addr); // Ouch + // FIXME: This needs to be much more clever. See x86. +} + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address +// calculation to hold the address in a temporary register. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); + precond(addr.index() == noreg); + precond(addr.base() == SP); + precond(addr.disp() > 0); + uint mask = size - 1; + assert((addr.disp() & mask) == 0, "scaled offsets only"); + return addr; +} + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // A2: osr buffer + // + + // build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // A2: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { + assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ bnez(SCR1, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif + __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); + __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); + __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); + } + } +} + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + Label dont; + + __ verify_oop(receiver); + + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), + "must add explicit null check"); + + __ load_klass(SCR2, receiver); + __ beq(SCR2, ic_klass, dont); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ move(reg, R0); + } else { + int oop_index = __ oop_recorder()->find_index(o); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_li52(reg, (long)o); + } +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ call(target, relocInfo::runtime_call_type); + add_call_info_here(info); +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); +} + +// This specifies the rsp decrement needed to build the frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in A0, and A1 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, true, true, true, true); + + // check that there is really an exception + __ verify_not_null_oop(A0); + + // search an exception handler (A0: exception oop, A1: throwing pc) + __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(V0); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ move(S0, V0); // Preserve the exception + } + + // Perform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::a0_opr); + stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); + __ unlock_object(A5, A4, A0, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ mov_metadata(A1, method()->constant_encoding()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ move(A0, S0); // Restore the exception + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); + guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, + "word returns are in V0,"); + + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + + if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + if (SafepointMechanism::uses_thread_local_poll()) { + __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); + } else { + __ li(SCR2, os::get_polling_page()); + } + __ relocate(relocInfo::poll_return_type); + __ ld_w(SCR1, SCR2, 0); + __ jr(RA); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + guarantee(info != NULL, "Shouldn't be NULL"); + if (SafepointMechanism::uses_thread_local_poll()) { + __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); + } else { + __ li(SCR2, os::get_polling_page()); + } + add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map + __ relocate(relocInfo::poll_type); + __ ld_w(SCR1, SCR2, 0); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + __ move(to_reg, from_reg); +} + +void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register(), c->as_jint()); + break; + case T_ADDRESS: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register(), c->as_jint()); + break; + case T_LONG: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + case T_OBJECT: + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + case T_METADATA: + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + case T_FLOAT: + __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat()))); + __ fld_s(dest->as_float_reg(), SCR1, 0); + break; + case T_DOUBLE: + __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble()))); + __ fld_d(dest->as_double_reg(), SCR1, 0); + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + if (!c->as_jobject()) + __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::scr1_opr, dest, c->type(), false); + } + break; + case T_ADDRESS: + const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::scr1_opr, dest, c->type(), false); + case T_INT: + case T_FLOAT: + if (c->as_jint_bits() == 0) + __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + __ li(SCR2, c->as_jint_bits()); + __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: + case T_DOUBLE: + if (c->as_jlong_bits() == 0) + __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + else { + __ li(SCR2, (intptr_t)c->as_jlong_bits()); + __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, + CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + + void (Assembler::* insn)(Register Rt, Address adr); + + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_d; + break; + case T_LONG: + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::st_d; + break; + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_w; + break; + case T_OBJECT: + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + if (UseCompressedOops && !wide) { + insn = &Assembler::st_w; + } else { + insn = &Assembler::st_d; + } + break; + case T_CHAR: + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_h; + break; + case T_BOOLEAN: + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_b; + break; + default: + ShouldNotReachHere(); + insn = &Assembler::st_d; // unreachable + } + + if (info) add_debug_info_for_null_check_here(info); + (_masm->*insn)(R0, as_Address(to_addr)); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { + if (is_reference_type(src->type())) { + // Surprising to me but we can see move of a long to t_object + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi == f_lo, "must be same"); + assert(t_hi == t_lo, "must be same"); + move_regs(f_lo, t_lo); + } else if (dest->is_single_fpu()) { + __ fmov_s(dest->as_float_reg(), src->as_float_reg()); + } else if (dest->is_double_fpu()) { + __ fmov_d(dest->as_double_reg(), src->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + precond(src->is_register() && dest->is_stack()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (src->is_single_cpu()) { + int index = dest->single_stack_ix(); + if (is_reference_type(type)) { + __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { + __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (src->is_double_cpu()) { + int index = dest->double_stack_ix(); + Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ st_ptr(src->as_register_lo(), dest_addr_LO); + } else if (src->is_single_fpu()) { + int index = dest->single_stack_ix(); + __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (src->is_double_fpu()) { + int index = dest->double_stack_ix(); + __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, + CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + PatchingStub* patch = NULL; + Register compressed_src = SCR2; + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { + __ encode_heap_oop(compressed_src, src->as_register()); + } else { + compressed_src = src->as_register(); + } + } + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ fst_s(src->as_float_reg(), as_Address(to_addr)); + break; + case T_DOUBLE: + __ fst_d(src->as_double_reg(), as_Address(to_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ st_w(compressed_src, as_Address(to_addr)); + } else { + __ st_ptr(compressed_src, as_Address(to_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ st_ptr(src->as_register(), as_Address(to_addr)); + break; + case T_ADDRESS: + __ st_ptr(src->as_register(), as_Address(to_addr)); + break; + case T_INT: + __ st_w(src->as_register(), as_Address(to_addr)); + break; + case T_LONG: + __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); + break; + case T_BYTE: // fall through + case T_BOOLEAN: + __ st_b(src->as_register(), as_Address(to_addr)); + break; + case T_CHAR: // fall through + case T_SHORT: + __ st_h(src->as_register(), as_Address(to_addr)); + break; + default: + ShouldNotReachHere(); + } + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + precond(src->is_stack() && dest->is_register()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (dest->is_single_cpu()) { + int index = src->single_stack_ix(); + if (is_reference_type(type)) { + __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA || type == T_ADDRESS) { + __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (dest->is_double_cpu()) { + int index = src->double_stack_ix(); + Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ ld_ptr(dest->as_register_lo(), src_addr_LO); + } else if (dest->is_single_fpu()) { + int index = src->single_stack_ix(); + __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (dest->is_double_fpu()) { + int index = src->double_stack_ix(); + __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ call(target, relocInfo::runtime_call_type); + add_call_info_here(info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + LIR_Opr temp; + + if (type == T_LONG || type == T_DOUBLE) + temp = FrameMap::scr1_long_opr; + else + temp = FrameMap::scr1_opr; + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, + CodeEmitInfo* info, bool wide, bool /* unaligned */) { + LIR_Address* addr = src->as_address_ptr(); + LIR_Address* from_addr = src->as_address_ptr(); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(addr->base()->as_pointer_register()); + } + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ fld_s(dest->as_float_reg(), as_Address(from_addr)); + break; + case T_DOUBLE: + __ fld_d(dest->as_double_reg(), as_Address(from_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ ld_wu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ ld_wu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_INT: + __ ld_w(dest->as_register(), as_Address(from_addr)); + break; + case T_LONG: + __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); + break; + case T_BYTE: + __ ld_b(dest->as_register(), as_Address(from_addr)); + break; + case T_BOOLEAN: + __ ld_bu(dest->as_register(), as_Address(from_addr)); + break; + case T_CHAR: + __ ld_hu(dest->as_register(), as_Address(from_addr)); + break; + case T_SHORT: + __ ld_h(dest->as_register(), as_Address(from_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } + + if (!UseZGC) { + // Load barrier has not yet been applied, so ZGC can't verify the oop here + __ verify_oop(dest->as_register()); + } + } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { + if (UseCompressedClassPointers) { + __ decode_klass_not_null(dest->as_register()); + } + } +} + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: + case lir_irem: + arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), + op->result_opr(), op->info()); + break; + case lir_fmad: + __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(), + op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg()); + break; + case lir_fmaf: + __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(), + op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg()); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + assert(op->cond() == lir_cond_always, "must be"); +#endif + + if (op->info() != NULL) + add_debug_info_for_branch(op->info()); + + __ b_far(*(op->label())); +} + +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + + if (op->info() != NULL) { + assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), + "shouldn't be codeemitinfo for non-address operands"); + add_debug_info_for_null_check_here(op->info()); // exception possible + } + + Label& L = *(op->label()); + Assembler::Condition acond; + LIR_Opr opr1 = op->in_opr1(); + LIR_Opr opr2 = op->in_opr2(); + assert(op->condition() != lir_cond_always, "must be"); + + if (op->code() == lir_cmp_float_branch) { + bool is_unordered = (op->ublock() == op->block()); + if (opr1->is_single_fpu()) { + FloatRegister reg1 = opr1->as_float_reg(); + assert(opr2->is_single_fpu(), "expect single float register"); + FloatRegister reg2 = opr2->as_float_reg(); + switch(op->condition()) { + case lir_cond_equal: + if (is_unordered) + __ fcmp_cueq_s(FCC0, reg1, reg2); + else + __ fcmp_ceq_s(FCC0, reg1, reg2); + break; + case lir_cond_notEqual: + if (is_unordered) + __ fcmp_cune_s(FCC0, reg1, reg2); + else + __ fcmp_cne_s(FCC0, reg1, reg2); + break; + case lir_cond_less: + if (is_unordered) + __ fcmp_cult_s(FCC0, reg1, reg2); + else + __ fcmp_clt_s(FCC0, reg1, reg2); + break; + case lir_cond_lessEqual: + if (is_unordered) + __ fcmp_cule_s(FCC0, reg1, reg2); + else + __ fcmp_cle_s(FCC0, reg1, reg2); + break; + case lir_cond_greaterEqual: + if (is_unordered) + __ fcmp_cule_s(FCC0, reg2, reg1); + else + __ fcmp_cle_s(FCC0, reg2, reg1); + break; + case lir_cond_greater: + if (is_unordered) + __ fcmp_cult_s(FCC0, reg2, reg1); + else + __ fcmp_clt_s(FCC0, reg2, reg1); + break; + default: + ShouldNotReachHere(); + } + } else if (opr1->is_double_fpu()) { + FloatRegister reg1 = opr1->as_double_reg(); + assert(opr2->is_double_fpu(), "expect double float register"); + FloatRegister reg2 = opr2->as_double_reg(); + switch(op->condition()) { + case lir_cond_equal: + if (is_unordered) + __ fcmp_cueq_d(FCC0, reg1, reg2); + else + __ fcmp_ceq_d(FCC0, reg1, reg2); + break; + case lir_cond_notEqual: + if (is_unordered) + __ fcmp_cune_d(FCC0, reg1, reg2); + else + __ fcmp_cne_d(FCC0, reg1, reg2); + break; + case lir_cond_less: + if (is_unordered) + __ fcmp_cult_d(FCC0, reg1, reg2); + else + __ fcmp_clt_d(FCC0, reg1, reg2); + break; + case lir_cond_lessEqual: + if (is_unordered) + __ fcmp_cule_d(FCC0, reg1, reg2); + else + __ fcmp_cle_d(FCC0, reg1, reg2); + break; + case lir_cond_greaterEqual: + if (is_unordered) + __ fcmp_cule_d(FCC0, reg2, reg1); + else + __ fcmp_cle_d(FCC0, reg2, reg1); + break; + case lir_cond_greater: + if (is_unordered) + __ fcmp_cult_d(FCC0, reg2, reg1); + else + __ fcmp_clt_d(FCC0, reg2, reg1); + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + __ bcnez(FCC0, L); + } else { + if (opr1->is_constant() && opr2->is_single_cpu()) { + // tableswitch + Unimplemented(); + } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { + Register reg1 = as_reg(opr1); + Register reg2 = noreg; + jlong imm2 = 0; + if (opr2->is_single_cpu()) { + // cpu register - cpu register + reg2 = opr2->as_register(); + } else if (opr2->is_double_cpu()) { + // cpu register - cpu register + reg2 = opr2->as_register_lo(); + } else if (opr2->is_constant()) { + switch(opr2->type()) { + case T_INT: + case T_ADDRESS: + imm2 = opr2->as_constant_ptr()->as_jint(); + break; + case T_LONG: + imm2 = opr2->as_constant_ptr()->as_jlong(); + break; + case T_METADATA: + imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); + break; + case T_OBJECT: + case T_ARRAY: + if (opr2->as_constant_ptr()->as_jobject() != NULL) { + reg2 = SCR1; + jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); + } else { + reg2 = R0; + } + break; + default: + ShouldNotReachHere(); + break; + } + } else { + ShouldNotReachHere(); + } + if (reg2 == noreg) { + if (imm2 == 0) { + reg2 = R0; + } else { + reg2 = SCR1; + __ li(reg2, imm2); + } + } + switch (op->condition()) { + case lir_cond_equal: + __ beq_far(reg1, reg2, L); break; + case lir_cond_notEqual: + __ bne_far(reg1, reg2, L); break; + case lir_cond_less: + __ blt_far(reg1, reg2, L, true); break; + case lir_cond_lessEqual: + __ bge_far(reg2, reg1, L, true); break; + case lir_cond_greaterEqual: + __ bge_far(reg1, reg2, L, true); break; + case lir_cond_greater: + __ blt_far(reg2, reg1, L, true); break; + case lir_cond_belowEqual: + __ bge_far(reg2, reg1, L, false); break; + case lir_cond_aboveEqual: + __ bge_far(reg1, reg2, L, false); break; + default: + ShouldNotReachHere(); + } + } + } +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + LIR_Opr tmp = op->tmp(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + __ movgr2fr_w(dest->as_float_reg(), src->as_register()); + __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); + break; + case Bytecodes::_i2d: + __ movgr2fr_w(dest->as_double_reg(), src->as_register()); + __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); + break; + case Bytecodes::_l2d: + __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); + __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); + break; + case Bytecodes::_l2f: + __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); + __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); + break; + case Bytecodes::_f2d: + __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); + break; + case Bytecodes::_d2f: + __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); + break; + case Bytecodes::_i2c: + __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); + break; + case Bytecodes::_i2l: + _masm->block_comment("FIXME: This could be a no-op"); + __ slli_w(dest->as_register_lo(), src->as_register(), 0); + break; + case Bytecodes::_i2s: + __ ext_w_h(dest->as_register(), src->as_register()); + break; + case Bytecodes::_i2b: + __ ext_w_b(dest->as_register(), src->as_register()); + break; + case Bytecodes::_l2i: + __ slli_w(dest->as_register(), src->as_register_lo(), 0); + break; + case Bytecodes::_d2l: + __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); + __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); + break; + case Bytecodes::_f2i: + __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); + __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); + break; + case Bytecodes::_f2l: + __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); + __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); + break; + case Bytecodes::_d2i: + __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); + __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); + break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + __ li(SCR2, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ bne_far(SCR1, SCR2, *op->stub()->entry()); + } + __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), + op->tmp2()->as_register(), op->header_size(), + op->object_size(), op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + if (UseSlowPath || + (!UseFastNewObjectArray && is_reference_type(op->type())) || + (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ move(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ ld_ptr(SCR1, Address(SCR2)); + __ bne(recv, SCR1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + Address recv_addr(SCR2); + __ ld_ptr(SCR1, recv_addr); + __ bnez(SCR1, next_test); + __ st_ptr(recv, recv_addr); + __ li(SCR1, DataLayout::counter_increment); + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ st_ptr(SCR1, Address(SCR2)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, + Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + const bool should_profile = op->should_profile(); + + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + + Label profile_cast_success, profile_cast_failure; + Label *success_target = should_profile ? &profile_cast_success : success; + Label *failure_target = should_profile ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded() && !UseCompressedClassPointers) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (should_profile) { + Label not_null; + __ bnez(obj, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ ld_bu(SCR2, data_addr); + __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); + __ st_b(SCR2, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + } else { + __ beqz(obj, *obj_is_null); + } + + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(SCR2, obj); + __ bne_far(SCR2, k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } else { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne_far(k_RInfo, SCR1, *failure_target); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ beq_far(k_RInfo, SCR1, *success_target); + // check for self + __ beq_far(klass_RInfo, k_RInfo, *success_target); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(klass_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ b(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + __ b(*failure); + } + __ b(*success); +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + const bool should_profile = op->should_profile(); + + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = should_profile ? &profile_cast_success : &done; + Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); + + if (should_profile) { + Label not_null; + __ bnez(value, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ ld_bu(SCR2, data_addr); + __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); + __ st_b(SCR2, data_addr); + __ b(done); + __ bind(not_null); + } else { + __ beqz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // get instance klass (it's already uncompressed) + __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(k_RInfo, *failure_target); + // fall through to the success case + + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + Label update_done; + type_profile_helper(mdo, md, data, recv, &done); + __ b(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ lea(SCR2, counter_addr); + __ ld_ptr(SCR1, Address(SCR2)); + __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); + __ st_ptr(SCR1, Address(SCR2)); + __ b(*stub->entry()); + } + + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ move(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ move(dst, R0); + __ b(done); + __ bind(success); + __ li(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { + __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, + /* retold */ false, /* barrier */ true); +} + +void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { + __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, + /* retold */ false, /* barrier */ true); +} + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr; + if (op->addr()->is_register()) { + addr = as_reg(op->addr()); + } else { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); + assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + Register t1 = op->tmp1()->as_register(); + assert(op->tmp1()->is_valid(), "must be"); + __ encode_heap_oop(t1, cmpval); + cmpval = t1; + __ encode_heap_oop(SCR2, newval); + newval = SCR2; + casw(addr, newval, cmpval, false); + } else { + casl(addr, newval, cmpval); + } + } else if (op->code() == lir_cas_int) { + casw(addr, newval, cmpval, true); + } else { + casl(addr, newval, cmpval); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, + LIR_Opr result, BasicType type) { + Unimplemented(); +} + +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, + LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); + assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); + Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); + Register regl = as_reg(left); + Register regr = noreg; + Register reg1 = noreg; + Register reg2 = noreg; + jlong immr = 0; + + // comparison operands + if (right->is_single_cpu()) { + // cpu register - cpu register + regr = right->as_register(); + } else if (right->is_double_cpu()) { + // cpu register - cpu register + regr = right->as_register_lo(); + } else if (right->is_constant()) { + switch(right->type()) { + case T_INT: + case T_ADDRESS: + immr = right->as_constant_ptr()->as_jint(); + break; + case T_LONG: + immr = right->as_constant_ptr()->as_jlong(); + break; + case T_METADATA: + immr = (intptr_t)right->as_constant_ptr()->as_metadata(); + break; + case T_OBJECT: + case T_ARRAY: + if (right->as_constant_ptr()->as_jobject() != NULL) { + regr = SCR1; + jobject2reg(right->as_constant_ptr()->as_jobject(), regr); + } else { + immr = 0; + } + break; + default: + ShouldNotReachHere(); + break; + } + } else { + ShouldNotReachHere(); + } + + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + if (!Assembler::is_simm(-immr, 12)) { + regr = SCR1; + __ li(regr, immr); + } + break; + default: + if (!Assembler::is_simm(immr, 12)) { + regr = SCR1; + __ li(regr, immr); + } + } + } + + // special cases + if (src1->is_constant() && src2->is_constant()) { + jlong val1 = 0, val2 = 0; + if (src1->type() == T_INT && src2->type() == T_INT) { + val1 = src1->as_jint(); + val2 = src2->as_jint(); + } else if (src1->type() == T_LONG && src2->type() == T_LONG) { + val1 = src1->as_jlong(); + val2 = src2->as_jlong(); + } + if (val1 == 0 && val2 == 1) { + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + } + break; + case lir_cond_notEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + __ xori(regd, regd, 1); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + } + break; + case lir_cond_less: + __ slti(regd, regl, immr); + __ xori(regd, regd, 1); + break; + case lir_cond_lessEqual: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + break; + case lir_cond_greater: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_greaterEqual: + __ slti(regd, regl, immr); + break; + case lir_cond_belowEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ sltu(regd, SCR1, regl); + } + break; + case lir_cond_aboveEqual: + __ sltui(regd, regl, immr); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + break; + case lir_cond_notEqual: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + break; + case lir_cond_less: + __ slt(regd, regl, regr); + __ xori(regd, regd, 1); + break; + case lir_cond_lessEqual: + __ slt(regd, regr, regl); + break; + case lir_cond_greater: + __ slt(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_greaterEqual: + __ slt(regd, regl, regr); + break; + case lir_cond_belowEqual: + __ sltu(regd, regr, regl); + break; + case lir_cond_aboveEqual: + __ sltu(regd, regl, regr); + break; + default: + ShouldNotReachHere(); + } + } + return; + } else if (val1 == 1 && val2 == 0) { + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + if (immr == 0) { + __ sltu(regd, R0, regl); + __ xori(regd, regd, 1); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + } + break; + case lir_cond_notEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + } + break; + case lir_cond_less: + __ slti(regd, regl, immr); + break; + case lir_cond_lessEqual: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_greater: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + break; + case lir_cond_greaterEqual: + __ slti(regd, regl, immr); + __ xori(regd, regd, 1); + break; + case lir_cond_belowEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ sltu(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_aboveEqual: + __ sltui(regd, regl, immr); + __ xori(regd, regd, 1); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + break; + case lir_cond_notEqual: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + break; + case lir_cond_less: + __ slt(regd, regl, regr); + break; + case lir_cond_lessEqual: + __ slt(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_greater: + __ slt(regd, regr, regl); + break; + case lir_cond_greaterEqual: + __ slt(regd, regl, regr); + __ xori(regd, regd, 1); + break; + case lir_cond_belowEqual: + __ sltu(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_aboveEqual: + __ sltu(regd, regl, regr); + __ xori(regd, regd, 1); + break; + default: + ShouldNotReachHere(); + } + } + return; + } + } + + // cmp + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + __ addi_d(SCR2, regl, -immr); + break; + case lir_cond_notEqual: + __ addi_d(SCR2, regl, -immr); + break; + case lir_cond_less: + __ slti(SCR2, regl, immr); + break; + case lir_cond_lessEqual: + __ li(SCR1, immr); + __ slt(SCR2, SCR1, regl); + break; + case lir_cond_greater: + __ li(SCR1, immr); + __ slt(SCR2, SCR1, regl); + break; + case lir_cond_greaterEqual: + __ slti(SCR2, regl, immr); + break; + case lir_cond_belowEqual: + __ li(SCR1, immr); + __ sltu(SCR2, SCR1, regl); + break; + case lir_cond_aboveEqual: + __ sltui(SCR2, regl, immr); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR2, regl, regr); + break; + case lir_cond_notEqual: + __ sub_d(SCR2, regl, regr); + break; + case lir_cond_less: + __ slt(SCR2, regl, regr); + break; + case lir_cond_lessEqual: + __ slt(SCR2, regr, regl); + break; + case lir_cond_greater: + __ slt(SCR2, regr, regl); + break; + case lir_cond_greaterEqual: + __ slt(SCR2, regl, regr); + break; + case lir_cond_belowEqual: + __ sltu(SCR2, regr, regl); + break; + case lir_cond_aboveEqual: + __ sltu(SCR2, regl, regr); + break; + default: + ShouldNotReachHere(); + } + } + + // value operands + if (src1->is_stack()) { + stack2reg(src1, result, result->type()); + reg1 = regd; + } else if (src1->is_constant()) { + const2reg(src1, result, lir_patch_none, NULL); + reg1 = regd; + } else { + reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); + } + + if (src2->is_stack()) { + stack2reg(src2, FrameMap::scr1_opr, result->type()); + reg2 = SCR1; + } else if (src2->is_constant()) { + LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; + const2reg(src2, tmp, lir_patch_none, NULL); + reg2 = SCR1; + } else { + reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); + } + + // cmove + switch (condition) { + case lir_cond_equal: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_notEqual: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_less: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_lessEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_greater: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_greaterEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_belowEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_aboveEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + default: + ShouldNotReachHere(); + } + + __ OR(regd, regd, SCR2); +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; + case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; + case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_double_cpu()) { + Register rreg = right->as_register_lo(); + // single_cpu + double_cpu: can happen with obj+long + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + switch (code) { + case lir_add: __ add_d(dreg, lreg, rreg); break; + case lir_sub: __ sub_d(dreg, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_constant()) { + // cpu register - constant + jlong c; + + // FIXME: This is fugly: we really need to factor all this logic. + switch(right->type()) { + case T_LONG: + c = right->as_constant_ptr()->as_jlong(); + break; + case T_INT: + case T_ADDRESS: + c = right->as_constant_ptr()->as_jint(); + break; + default: + ShouldNotReachHere(); + c = 0; // unreachable + break; + } + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + + switch(left->type()) { + case T_INT: + switch (code) { + case lir_add: __ addi_w(dreg, lreg, c); break; + case lir_sub: __ addi_w(dreg, lreg, -c); break; + default: ShouldNotReachHere(); + } + break; + case T_OBJECT: + case T_ADDRESS: + switch (code) { + case lir_add: __ addi_d(dreg, lreg, c); break; + case lir_sub: __ addi_d(dreg, lreg, -c); break; + default: ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_double_cpu()) { + Register lreg_lo = left->as_register_lo(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + switch (code) { + case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { + case lir_add: + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); + return; + } + code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); + break; + case lir_div: + assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ move(dreg, lreg_lo); + } else { + unsigned int shift = exact_log2_long(c); + // use scr1 as intermediate result register + __ srai_d(SCR1, lreg_lo, 63); + __ srli_d(SCR1, SCR1, 64 - shift); + __ add_d(SCR1, lreg_lo, SCR1); + __ srai_d(dreg, SCR1, shift); + } + break; + case lir_rem: + assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ move(dreg, R0); + } else { + // use scr1/2 as intermediate result register + __ sub_d(SCR1, R0, lreg_lo); + __ slt(SCR2, SCR1, R0); + __ andi(dreg, lreg_lo, c - 1); + __ andi(SCR1, SCR1, c - 1); + __ sub_d(SCR1, R0, SCR1); + __ maskeqz(dreg, dreg, SCR2); + __ masknez(SCR1, SCR1, SCR2); + __ OR(dreg, dreg, SCR1); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_single_fpu()) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: ShouldNotReachHere(); + } + } else if (left->is_double_fpu()) { + if (right->is_double_fpu()) { + // fpu register - fpu register + switch (code) { + case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: ShouldNotReachHere(); + } + } else { + if (right->is_constant()) { + ShouldNotReachHere(); + } + ShouldNotReachHere(); + } + } else if (left->is_single_stack() || left->is_address()) { + assert(left == dest, "left and dest must be equal"); + ShouldNotReachHere(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, + int dest_index, bool pop_fpu_stack) { + Unimplemented(); +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch(code) { + case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; + default : ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: + if (Assembler::is_uimm(right->as_jint(), 12)) { + __ andi(Rdst, Rleft, right->as_jint()); + } else { + __ li(AT, right->as_jint()); + __ AND(Rdst, Rleft, AT); + } + break; + case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; + case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + switch (code) { + case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; + case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; + case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } else { + Register Rdst = dst->as_register_lo(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: + if (Assembler::is_uimm(right->as_jlong(), 12)) { + __ andi(Rdst, Rleft, right->as_jlong()); + } else { + // We can guarantee that transform from HIR LogicOp is in range of + // uimm(12), but the common code directly generates LIR LogicAnd, + // and the right-operand is mask with all ones in the high bits. + __ li(AT, right->as_jlong()); + __ AND(Rdst, Rleft, AT); + } + break; + case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; + case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + switch (code) { + case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; + case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; + case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } +} + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, + LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); + + // operand check + assert(left->is_single_cpu(), "left must be register"); + assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); + assert(result->is_single_cpu(), "result must be register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + + // power-of-2 constant check and codegen + if (right->is_constant()) { + int c = right->as_constant_ptr()->as_jint(); + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (is_irem) { + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ move(dreg, R0); + } else { + // use scr1/2 as intermediate result register + __ sub_w(SCR1, R0, lreg); + __ slt(SCR2, SCR1, R0); + __ andi(dreg, lreg, c - 1); + __ andi(SCR1, SCR1, c - 1); + __ sub_w(SCR1, R0, SCR1); + __ maskeqz(dreg, dreg, SCR2); + __ masknez(SCR1, SCR1, SCR2); + __ OR(dreg, dreg, SCR1); + } + } else { + if (c == 1) { + // move lreg to dreg if divisor is 1 + __ move(dreg, lreg); + } else { + unsigned int shift = exact_log2(c); + // use scr1 as intermediate result register + __ srai_w(SCR1, lreg, 31); + __ srli_w(SCR1, SCR1, 32 - shift); + __ add_w(SCR1, lreg, SCR1); + __ srai_w(dreg, SCR1, shift); + } + } + } else { + Register rreg = right->as_register(); + if (is_irem) + __ mod_w(dreg, lreg, rreg); + else + __ div_w(dreg, lreg, rreg); + } +} + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + Unimplemented(); +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + if (is_unordered_less) { + __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); + __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); + } else { + __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); + __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); + } + } else if (left->is_double_fpu()) { + if (is_unordered_less) { + __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); + __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); + } else { + __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); + __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); + } + } else { + ShouldNotReachHere(); + } + __ movcf2gr(dst->as_register(), FCC0); + __ movcf2gr(SCR1, FCC1); + __ sub_d(dst->as_register(), dst->as_register(), SCR1); + } else if (code == lir_cmp_l2i) { + __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); + __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); + __ sub_d(dst->as_register(), dst->as_register(), SCR1); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::align_call(LIR_Code code) {} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address call = __ ic_call(op->addr()); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + + // Code stream for loading method may be changed. + __ ibar(0); + + // Rmethod contains Method*, it should be relocated for GC + // static stub relocation also tags the Method* in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + __ patchable_jump(__ pc()); + + assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), + "stub too big"); + __ end_a_stub(); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == A0, "must match"); + assert(exceptionPC->as_register() == A1, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { + // As no instructions have been generated yet for this LIR node it's + // possible that an oop map already exists for the current offset. + // In that case insert an dummy NOP here to ensure all oop map PCs + // are unique. See JDK-8237483. + __ nop(); + } + Label L; + int pc_for_athrow_offset = __ offset(); + __ bind(L); + __ lipc(exceptionPC->as_register(), L); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(A0); + // search an exception handler (A0: exception oop, A1: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); + + // FIXME: enough room for two byte trap ???? + __ nop(); +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == A0, "must match"); + __ b(_unwind_handler_entry); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; + case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; + case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; + default: ShouldNotReachHere(); break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; + case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; + case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; + default: ShouldNotReachHere(); break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ slli_w(dreg, lreg, count); break; + case lir_shr: __ srai_w(dreg, lreg, count); break; + case lir_ushr: __ srli_w(dreg, lreg, count); break; + default: ShouldNotReachHere(); break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ slli_d(dreg, lreg, count); break; + case lir_shr: __ srai_d(dreg, lreg, count); break; + case lir_ushr: __ srli_d(dreg, lreg, count); break; + default: ShouldNotReachHere(); break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + +void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ li(SCR2, c); + __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { + ShouldNotReachHere(); +} + +// This code replaces a call to arraycopy; no exception may +// be thrown in this code, they must be thrown in the System.arraycopy +// activation frame; we could save some checks if this would not be the case +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (is_reference_type(basic_type)) + basic_type = T_OBJECT; + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { + Label done; + assert(src == T0 && src_pos == A0, "mismatch in calling convention"); + + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ st_ptr(length, Address(SP, 2 * BytesPerWord)); + __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ st_ptr(src, Address(SP, 4 * BytesPerWord)); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + assert(copyfunc_addr != NULL, "generic arraycopy stub required"); + + // The arguments are in java calling convention so we shift them + // to C convention + assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + __ move(A4, j_rarg4); + assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2); + __ move(A3, j_rarg3); + assert_different_registers(A2, j_rarg0, j_rarg1); + __ move(A2, j_rarg2); + assert_different_registers(A1, j_rarg0); + __ move(A1, j_rarg1); + __ move(A0, j_rarg0); +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); + __ increment(SCR2, 1); + } +#endif + __ call(copyfunc_addr, relocInfo::runtime_call_type); + + __ beqz(A0, *stub->continuation()); + __ move(tmp, A0); + + // Reload values from the stack so they are where the stub + // expects them. + __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); + __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); + + // tmp is -1^K where K == partial copied count + __ nor(SCR1, tmp, R0); + // adjust length down and src/end pos up by partial copied count + __ sub_w(length, length, SCR1); + __ add_w(src_pos, src_pos, SCR1); + __ add_w(dst_pos, dst_pos, SCR1); + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), + "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + Address::ScaleFactor scale = Address::times(elem_size); + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ beqz(src, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ beqz(dst, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(SCR2, (jlong) Klass::_lh_neutral_value); + __ bge_far(SCR1, SCR2, *stub->entry(), true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(SCR2, (jlong) Klass::_lh_neutral_value); + __ bge_far(SCR1, SCR2, *stub->entry(), true); + } + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ blt_far(src_pos, R0, *stub->entry(), true); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ blt_far(dst_pos, R0, *stub->entry(), true); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ blt_far(length, R0, *stub->entry(), true); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ add_w(tmp, src_pos, length); + __ ld_wu(SCR1, src_length_addr); + __ blt_far(SCR1, tmp, *stub->entry(), false); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ add_w(tmp, dst_pos, length); + __ ld_wu(SCR1, dst_length_addr); + __ blt_far(SCR1, tmp, *stub->entry(), false); + } + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + __ ld_wu(tmp, src_klass_addr); + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(tmp, src_klass_addr); + __ ld_ptr(SCR1, dst_klass_addr); + } + __ bne_far(tmp, SCR1, *stub->entry()); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(dst, Address(SP, 0 * wordSize)); + __ st_ptr(src, Address(SP, 1 * wordSize)); + + __ load_klass(src, src); + __ load_klass(dst, dst); + + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(dst, Address(SP, 0 * wordSize)); + __ st_ptr(src, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + + __ bnez(dst, cont); + + __ bind(slow); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ld_w(SCR1, klass_lh_addr); + __ li(SCR2, objArray_lh); + __ XOR(SCR1, SCR1, SCR2); + __ bnez(SCR1, *stub->entry()); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ st_ptr(length, Address(SP, 2 * BytesPerWord)); + __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ st_ptr(src, Address(SP, 4 * BytesPerWord)); + + __ lea(A0, Address(src, src_pos, scale)); + __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A0, dst, dst_pos, length); + __ load_klass(A4, dst); + assert_different_registers(A4, dst, dst_pos, length); + __ lea(A1, Address(dst, dst_pos, scale)); + __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A1, length); + __ bstrpick_d(A2, length, 31, 0); + __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); + __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); + __ call(copyfunc_addr, relocInfo::runtime_call_type); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ bnez(A0, failed); + __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); + __ increment(SCR2, 1); + __ bind(failed); + } +#endif + + __ beqz(A0, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); + __ increment(SCR2, 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1); + __ move(tmp, A0); + + // Restore previously spilled arguments + __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); + __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); + + // return value is -1^K where K is partial copied count + __ nor(SCR1, tmp, R0); + // adjust length down and src/end pos up by partial copied count + __ sub_w(length, length, SCR1); + __ add_w(src_pos, src_pos, SCR1); + __ add_w(dst_pos, dst_pos, SCR1); + } + + __ b(*stub->entry()); + + __ bind(cont); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } + + if (basic_type != T_OBJECT) { + + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(SCR1, dst_klass_addr); + } + __ bne(tmp, SCR1, halt); + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, src_klass_addr); + } else { + __ ld_ptr(SCR1, src_klass_addr); + } + __ beq(tmp, SCR1, known_ok); + } else { + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(SCR1, dst_klass_addr); + } + __ beq(tmp, SCR1, known_ok); + __ beq(src, dst, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); + __ increment(SCR2, 1); + } +#endif + + __ lea(A0, Address(src, src_pos, scale)); + __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A0, dst, dst_pos, length); + __ lea(A1, Address(dst, dst_pos, scale)); + __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A1, length); + __ bstrpick_d(A2, length, 31, 0); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + __ call(entry, relocInfo::runtime_call_type); + } else { + __ call_VM_leaf(entry, 3); + } + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, + "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, + "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + ciMethod* callee = op->profiled_callee(); + int bci = op->profiled_bci(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(SCR2, known_klass->constant_encoding()); + __ lea(SCR1, recv_addr); + __ st_ptr(SCR2, SCR1, 0); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR1, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + + __ bind(update_done); + } + } else { + // Static call + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + } +} + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + __ li(res, StubRoutines::crc_table_addr()); + __ nor(crc, crc, R0); // ~crc + __ update_byte_crc32(crc, val, res); + __ nor(res, crc, R0); // ~crc +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert(mdo_addr.base() != SCR1, "wrong register"); + + __ verify_oop(obj); + + if (tmp != obj) { + __ move(tmp, obj); + } + if (do_null) { + __ bnez(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld_ptr(SCR2, mdo_addr); + __ ori(SCR2, SCR2, TypeEntries::null_seen); + __ st_ptr(SCR2, mdo_addr); + } + if (do_update) { +#ifndef ASSERT + __ b(next); + } +#else + __ b(next); + } + } else { + __ bnez(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(SCR1, exact_klass->constant_encoding()); + __ XOR(SCR1, tmp, SCR1); + __ beqz(SCR1, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ beqz(SCR1, next); + + __ andi(SCR1, tmp, TypeEntries::type_unknown); + __ bnez(SCR1, next); // already unknown. Nothing to do anymore. + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(SCR2, none); + __ li(SCR1, (u1)TypeEntries::null_seen); + __ beq(SCR2, SCR1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + membar_acquire(); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + __ beqz(SCR1, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ld_ptr(tmp, mdo_addr); + __ andi(SCR2, tmp, TypeEntries::type_unknown); + __ bnez(SCR2, next); // already unknown. Nothing to do anymore. + } + + // different than before. Cannot keep accurate profile. + __ ld_ptr(SCR2, mdo_addr); + __ ori(SCR2, SCR2, TypeEntries::type_unknown); + __ st_ptr(SCR2, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ b(next); + + __ bind(none); + // first time here. Set profile type. + __ st_ptr(tmp, mdo_addr); + } + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + __ beqz(SCR1, next); +#ifdef ASSERT + { + Label ok; + __ ld_ptr(SCR1, mdo_addr); + __ beqz(SCR1, ok); + __ li(SCR2, (u1)TypeEntries::null_seen); + __ beq(SCR1, SCR2, ok); + // may have been set by another thread + membar_acquire(); + __ mov_metadata(SCR1, exact_klass->constant_encoding()); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(SCR2, SCR1, SCR2); + assert(TypeEntries::type_mask == -2, "must be"); + __ bstrpick_d(SCR2, SCR2, 63, 1); + __ beqz(SCR2, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ st_ptr(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ld_ptr(tmp, mdo_addr); + __ andi(SCR1, tmp, TypeEntries::type_unknown); + __ bnez(SCR1, next); // already unknown. Nothing to do anymore. + + __ ori(tmp, tmp, TypeEntries::type_unknown); + __ st_ptr(tmp, mdo_addr); + // FIXME: Write barrier needed here? + } + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + +void LIR_Assembler::align_backward_branch_target() {} + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + // tmp must be unused + assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); + + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ sub_w(dest->as_register(), R0, left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ fneg_s(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ fneg_d(dest->as_double_reg(), left->as_double_reg()); + } +} + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, + CodeEmitInfo* info) { + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); +} + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, + LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + __ call(dest, relocInfo::runtime_call_type); + if (info != NULL) { + add_call_info_here(info); + } +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, + CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { + move_op(src, dest, type, lir_patch_none, info, + /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + } else { + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + Label ok; + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); + Register reg1 = as_reg(op->in_opr1()); + Register reg2 = as_reg(op->in_opr2()); + switch (op->condition()) { + case lir_cond_equal: __ beq(reg1, reg2, ok); break; + case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; + case lir_cond_less: __ blt(reg1, reg2, ok); break; + case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; + case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; + case lir_cond_greater: __ blt(reg2, reg1, ok); break; + case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; + case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; + default: ShouldNotReachHere(); + } + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(Assembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); +} + +void LIR_Assembler::membar_release() { + __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); +} + +void LIR_Assembler::membar_loadload() { + __ membar(Assembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { + __ membar(MacroAssembler::LoadStore); +} + +void LIR_Assembler::membar_storeload() { + __ membar(MacroAssembler::StoreLoad); +} + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ move(result_reg->as_register(), TREG); +} + +void LIR_Assembler::peephole(LIR_List *lir) { +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, + LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); + Register dst = as_reg(dest); + Register tmp = as_reg(tmp_op); + bool is_oop = is_reference_type(type); + + if (Assembler::is_simm(addr.disp(), 12)) { + __ addi_d(tmp, addr.base(), addr.disp()); + } else { + __ li(tmp, addr.disp()); + __ add_d(tmp, addr.base(), tmp); + } + if (addr.index() != noreg) { + if (addr.scale() > Address::times_1) + __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); + else + __ add_d(tmp, tmp, addr.index()); + } + + switch(type) { + case T_INT: + break; + case T_LONG: + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + // unsigned int + } else { + // long + } + break; + default: + ShouldNotReachHere(); + } + + if (code == lir_xadd) { + Register inc = noreg; + if (data->is_constant()) { + inc = SCR1; + __ li(inc, as_long(data)); + } else { + inc = as_reg(data); + } + switch(type) { + case T_INT: + __ amadd_db_w(dst, inc, tmp); + break; + case T_LONG: + __ amadd_db_d(dst, inc, tmp); + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + __ amadd_db_w(dst, inc, tmp); + __ lu32i_d(dst, 0); + } else { + __ amadd_db_d(dst, inc, tmp); + } + break; + default: + ShouldNotReachHere(); + } + } else if (code == lir_xchg) { + Register obj = as_reg(data); + if (is_oop && UseCompressedOops) { + __ encode_heap_oop(SCR2, obj); + obj = SCR2; + } + switch(type) { + case T_INT: + __ amswap_db_w(dst, obj, tmp); + break; + case T_LONG: + __ amswap_db_d(dst, obj, tmp); + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + __ amswap_db_w(dst, obj, tmp); + __ lu32i_d(dst, 0); + } else { + __ amswap_db_d(dst, obj, tmp); + } + break; + default: + ShouldNotReachHere(); + } + if (is_oop && UseCompressedOops) { + __ decode_heap_oop(dst); + } + } else { + ShouldNotReachHere(); + } +} + +#undef __ diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp new file mode 100644 index 00000000000..72a80f37c4f --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp @@ -0,0 +1,1396 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::a0_opr; break; + case objectTag: opr = FrameMap::a0_oop_opr; break; + case longTag: opr = FrameMap::long0_opr; break; + case floatTag: opr = FrameMap::fpu0_float_opr; break; + case doubleTag: opr = FrameMap::fpu0_double_opr; break; + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + +//--------- loading items into registers -------------------------------- + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0L; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + // FIXME: Just a guess + if (v->type()->as_IntConstant() != NULL) { + return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + intx large_disp = disp; + + // accumulate fixed displacements + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + if (constant->type() == T_INT) { + large_disp += index->as_jint() << shift; + } else { + assert(constant->type() == T_LONG, "should be"); + jlong c = index->as_jlong() << shift; + if ((jlong)((jint)c) == c) { + large_disp += c; + index = LIR_OprFact::illegalOpr; + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(index, tmp); + index = tmp; + // apply shift and displacement below + } + } + } + + if (index->is_register()) { + // apply the shift and accumulate the displacement + if (shift > 0) { + LIR_Opr tmp = new_pointer_register(); + __ shift_left(index, shift, tmp); + index = tmp; + } + if (large_disp != 0) { + LIR_Opr tmp = new_pointer_register(); + if (Assembler::is_simm(large_disp, 12)) { + __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); + index = tmp; + } else { + __ move(LIR_OprFact::intptrConst(large_disp), tmp); + __ add(tmp, index, tmp); + index = tmp; + } + large_disp = 0; + } + } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { + // index is illegal so replace it with the displacement loaded into a register + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(large_disp), index); + large_disp = 0; + } + + // at this point we either have base + index or base + displacement + if (large_disp == 0 && index->is_register()) { + return new LIR_Address(base, index, type); + } else { + assert(Assembler::is_simm(large_disp, 12), "must be"); + return new LIR_Address(base, large_disp, type); + } +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Address* addr; + if (index_opr->is_constant()) { + addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); + } else { + if (offset_in_bytes) { + LIR_Opr tmp = new_pointer_register(); + __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); + array_opr = tmp; + offset_in_bytes = 0; + } + addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); + } + return addr; +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + if (!Assembler::is_simm(x, 12)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + if (!Assembler::is_simm(x, 12)) { + // This is all rather nasty. We don't know whether our constant + // is required for a logical or an arithmetic operation, wo we + // don't know what the range of valid values is!! + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else { + ShouldNotReachHere(); + r = NULL; // unreachable + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr imm = NULL; + switch(addr->type()) { + case T_INT: + imm = LIR_OprFact::intConst(step); + break; + case T_LONG: + imm = LIR_OprFact::longConst(step); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, imm, reg); + __ store(reg, addr); +} + +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, + int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); +} + +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, + int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp_branch(condition, reg, reg1, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } else { + return false; + } +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, + ciMethod* profiled_method, int profiled_bci) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate (from.result(), result); +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + // float remainder is implemented as a direct call into the runtime + LIRItem right(x->x(), this); + LIRItem left(x->y(), this); + + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + right.load_item(); + + __ move(right.result(), cc->at(0)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + return; + } + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Always load right hand side. + right.load_item(); + + if (!left.is_register()) + left.load_item(); + + LIR_Opr reg = rlock(x); + + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + left.load_item(); + bool need_zero_check = true; + if (right.is_constant()) { + jlong c = right.get_jlong_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) need_zero_check = false; + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { + right.dont_load_item(); + } else { + right.load_item(); + } + } else { + right.load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new DivByZeroStub(info); + __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub); + } + + rlock_result(x); + switch (x->op()) { + case Bytecodes::_lrem: + __ rem (left.result(), right.result(), x->operand()); + break; + case Bytecodes::_ldiv: + __ div (left.result(), right.result(), x->operand()); + break; + default: + ShouldNotReachHere(); + break; + } + } else { + assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (!right.is_register()) { + if (x->op() == Bytecodes::_lmul || !right.is_constant() || + (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || + (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { + right.load_item(); + } else { // add, sub + assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + rlock_result(x); + bool need_zero_check = true; + if (right.is_constant()) { + jint c = right.get_jint_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) need_zero_check = false; + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { + right_arg->dont_load_item(); + } else { + right_arg->load_item(); + } + } else { + right_arg->load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new DivByZeroStub(info); + __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || + (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + jint c = right.get_jint_constant(); + if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + // Cannot use constant op. + right_arg->load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + default: ShouldNotReachHere(); return; + } +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant()) { + right.dont_load_item(); + int c; + switch (x->op()) { + case Bytecodes::_ishl: + c = right.get_jint_constant() & 0x1f; + __ shift_left(left.result(), c, x->operand()); + break; + case Bytecodes::_ishr: + c = right.get_jint_constant() & 0x1f; + __ shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_iushr: + c = right.get_jint_constant() & 0x1f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_lshl: + c = right.get_jint_constant() & 0x3f; + __ shift_left(left.result(), c, x->operand()); + break; + case Bytecodes::_lshr: + c = right.get_jint_constant() & 0x3f; + __ shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_lushr: + c = right.get_jint_constant() & 0x3f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + default: + ShouldNotReachHere(); + } + } else { + right.load_item(); + LIR_Opr tmp = new_register(T_INT); + switch (x->op()) { + case Bytecodes::_ishl: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_ishr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_iushr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lshl: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lshr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lushr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + default: + ShouldNotReachHere(); + } + } +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant() + && ((right.type()->tag() == intTag + && Assembler::is_uimm(right.get_jint_constant(), 12)) + || (right.type()->tag() == longTag + && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { + right.dont_load_item(); + } else { + right.load_item(); + } + switch (x->op()) { + case Bytecodes::_iand: + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: + case Bytecodes::_lor: + __ logical_or (left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + if (tag == longTag) { + left.set_destroys_register(); + } + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, + (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, + LIRItem& cmp_value, LIRItem& new_value) { + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); + if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), + new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), + new_value.result(), ill, ill); + } else if (type == T_LONG) { + __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), + new_value.result(), ill, ill); + } else { + ShouldNotReachHere(); + Unimplemented(); + } + __ move(FrameMap::scr1_opr, result); + return result; +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { + bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop || type == T_LONG , "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xchg(addr, value.result(), result, tmp); + return result; +} + +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || type == T_LONG , "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xadd(addr, value.result(), result, tmp); + return result; +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), + "wrong type"); + if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || + x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || + x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || + x->id() == vmIntrinsics::_dlog10) { + do_LibmIntrinsic(x); + return; + } + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + case vmIntrinsics::_dabs: + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + default: + ShouldNotReachHere(); + } + break; + } + default: + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + + CallingConvention* cc = NULL; + + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); + + value1.set_destroys_register(); + + BasicTypeList signature(2); + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); + } else { + BasicTypeList signature(1); + signature.append(T_DOUBLE); + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + } + + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { + __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dlog: + if (StubRoutines::dlog() != NULL) { + __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dlog10: + if (StubRoutines::dlog10() != NULL) { + __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dpow: + if (StubRoutines::dpow() != NULL) { + __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dsin: + if (StubRoutines::dsin() != NULL) { + __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dcos: + if (StubRoutines::dcos() != NULL) { + __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dtan: + if (StubRoutines::dtan() != NULL) { + __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); + } + break; + default: ShouldNotReachHere(); + } + __ move(result_reg, calc_result); +} + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + Register j_rarg0 = RT0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + Register j_rarg5 = A4; + + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention will give us enough registers + // so that on the stub side the args will be perfect already. + // On the other slow/special case side we call C and the arg + // positions are not similar enough to pick one as the best. + // Also because the java calling convention is a "shifted" version + // of the C convention we can process the java args trivially into C + // args without worry of overwriting during the xfer + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + length.load_item_force (FrameMap::as_opr(j_rarg4)); + + LIR_Opr tmp = FrameMap::as_opr(j_rarg5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), + length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + assert(UseCRC32Intrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + } + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + assert(UseCRC32CIntrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem end(x->argument_at(3), this); + + buf.load_item(); + off.load_nonconstant(); + end.load_nonconstant(); + + // len = end - off + LIR_Opr len = end.result(); + LIR_Opr tmpA = new_register(T_INT); + LIR_Opr tmpB = new_register(T_INT); + __ move(end.result(), tmpA); + __ move(off.result(), tmpB); + __ sub(tmpA, tmpB, tmpA); + len = tmpA; + + LIR_Opr index = off.result(); + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + } + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + __ move(len, cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 3, "wrong type"); + assert(UseFMA, "Needs FMA instructions support."); + LIRItem value(x->argument_at(0), this); + LIRItem value1(x->argument_at(1), this); + LIRItem value2(x->argument_at(2), this); + + value.load_item(); + value1.load_item(); + value2.load_item(); + + LIR_Opr calc_input = value.result(); + LIR_Opr calc_input1 = value1.result(); + LIR_Opr calc_input2 = value2.result(); + LIR_Opr calc_result = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_fmaD: + __ fmad(calc_input, calc_input1, calc_input2, calc_result); + break; + case vmIntrinsics::_fmaF: + __ fmaf(calc_input, calc_input1, calc_input2, calc_result); + break; + default: + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + // arguments of lir_convert + LIR_Opr conv_input = input; + LIR_Opr conv_result = result; + + switch (x->op()) { + case Bytecodes::_f2i: + case Bytecodes::_f2l: + __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); + break; + case Bytecodes::_d2i: + case Bytecodes::_d2l: + __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); + break; + default: + __ convert(x->op(), conv_input, conv_result); + break; + } + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::t0_oop_opr, + FrameMap::t1_oop_opr, + FrameMap::a4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::a3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::s0_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::t0_oop_opr; + LIR_Opr tmp2 = FrameMap::t1_oop_opr; + LIR_Opr tmp3 = FrameMap::a5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::a3_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::t0_oop_opr; + LIR_Opr tmp2 = FrameMap::t1_oop_opr; + LIR_Opr tmp3 = FrameMap::a5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::a3_metadata_opr; + + length.load_item_force(FrameMap::s0_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i*4)); + } + + LIR_Opr klass_reg = FrameMap::a0_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::s0_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::a2_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || + (PatchALot && !x->is_incompatible_class_change_check() && + !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/)); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, + LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception, + Deoptimization::Reason_class_check, + Deoptimization::Action_none); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, + obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + + if (tag == longTag) { + if (yin->is_constant() && yin->get_jlong_constant() == 0) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else if (tag == intTag) { + if (yin->is_constant() && yin->get_jint_constant() == 0) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else { + yin->load_item(); + } + + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), + x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond, left, right); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux()); + } else { + __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(TREG); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ volatile_store_mem_reg(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LD;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and C1 compiles the loads in another. + if (!UseBarriersForVolatile) { + __ membar(); + } + __ volatile_load_mem_reg(address, result, info); +} diff --git a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp new file mode 100644 index 00000000000..6bb15fbf1da --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg1 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || + index()->is_single_cpu(), "wrong index operand"); + assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || + base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); +} +#endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + append(new LIR_OpCmpBranch(condition, left, right, tgt, info)); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + append(new LIR_OpCmpBranch(condition, left, right, block, unordered)); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type)); +} diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp new file mode 100644 index 00000000000..f15dacafeba --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + if (assigned_reg < pd_first_callee_saved_reg) + return true; + if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) + return true; + if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) + return true; + return false; +} + +inline void LinearScan::pd_add_temps(LIR_Op* op) {} + +// Implementation of LinearScanWalker +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || + cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + +#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp new file mode 100644 index 00000000000..219b2e3671c --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on LoongArch64 +} diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp new file mode 100644 index 00000000000..38ff4c58369 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP + +using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); + + // locking + // hdr : must be A0, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : scratch registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register t1, Register t2, int header_size, + int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : scratch register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, + int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + void set_rsp_offset(int n) { _rsp_offset = n; } + + void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, + bool inv_a4, bool inv_a5) PRODUCT_RETURN; + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void load_parameter(int offset_in_words, Register reg); + +#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp new file mode 100644 index 00000000000..17ff93a595d --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp @@ -0,0 +1,344 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +#define T0 RT0 +#define T1 RT1 + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + int null_check_offset = -1; + Label done; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } else { + null_check_offset = offset(); + } + + // Load object header + ld_ptr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + ori(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + st_ptr(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(SCR2, Address(obj, hdr_offset)); + cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub_d(hdr, hdr, SP); + li(SCR1, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, SCR1); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + st_ptr(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case); + // done + bind(done); + if (PrintBiasedLockingStatistics) { + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2); + } + return null_check_offset; +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ld_ptr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); + if (!UseBiasedLocking) { + // load object + ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(SCR1, Address(obj, hdr_offset)); + cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); + } else { + cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); + } + // done + bind(done); +} + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, + int con_size_in_bytes, Register t1, Register t2, + Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, + Register t1, Register t2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1, t2); + ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t + li(t1, (int32_t)(intptr_t)markOopDesc::prototype()); + } + st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(t1, klass); + st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } + + if (len->is_valid()) { + st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(obj, R0); + } +} + +// preserves obj, destroys len_in_bytes +// +// Scratch registers: t1 = T0, t2 = T1 +// +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, + int hdr_size_in_bytes, Register t1, Register t2) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + assert(t1 == T0 && t2 == T1, "must be"); + Label done; + + // len_in_bytes is positive and ptr sized + addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); + beqz(len_in_bytes, done); + + // zero_words() takes ptr in t1 and count in bytes in t2 + lea(t1, Address(obj, hdr_size_in_bytes)); + addi_d(t2, len_in_bytes, -BytesPerWord); + + Label loop; + bind(loop); + stx_d(R0, t1, t2); + addi_d(t2, t2, -BytesPerWord); + bge(t2, R0, loop); + + bind(done); +} + +void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, + int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, t1, t2); + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); +} + +// Scratch registers: t1 = T0, t2 = T1 +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, + int con_size_in_bytes, Register t1, Register t2, + bool is_tlab_allocated) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register index = t2; + if (var_size_in_bytes != noreg) { + move(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1, t2); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + con_size_in_bytes -= hdr_size_in_bytes; + lea(t1, Address(obj, hdr_size_in_bytes)); + Label loop; + li(SCR1, con_size_in_bytes - BytesPerWord); + bind(loop); + stx_d(R0, t1, SCR1); + addi_d(SCR1, SCR1, -BytesPerWord); + bge(SCR1, R0, loop); + } + } + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == A0, "must be"); + call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, + int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, t1, t2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + li(SCR1, (int32_t)max_array_allocation_length); + bge_far(len, SCR1, slow_case, false); + + const Register arr_size = t2; // okay to be the same + // align object end + li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + slli_w(SCR1, len, f); + add_d(arr_size, arr_size, SCR1); + bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); + + try_allocate(obj, arr_size, 0, t1, t2, slow_case); + + initialize_header(obj, klass, len, t1, t2); + + // clear rest of allocated space + initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == A0, "must be"); + call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); +} + +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize); +} + +void C1_MacroAssembler::verified_entry() { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a b, bl, nop, break. + // Make it a NOP. + nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { + // rbp, + 0: link + // + 1: return address + // + 2: argument with offset 0 + // + 3: argument with offset 1 + // + 4: ... + + ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord)); +} + +#ifndef PRODUCT +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(SP, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + bnez(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, + bool inv_a3, bool inv_a4, bool inv_a5) { +#ifdef ASSERT + static int nn; + if (inv_a0) li(A0, 0xDEAD); + if (inv_s0) li(S0, 0xDEAD); + if (inv_a2) li(A2, nn++); + if (inv_a3) li(A3, 0xDEAD); + if (inv_a4) li(A4, 0xDEAD); + if (inv_a5) li(A5, 0xDEAD); +#endif +} +#endif // ifndef PRODUCT diff --git a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp new file mode 100644 index 00000000000..aaa708f71e0 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp @@ -0,0 +1,1138 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_loongarch.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T5 RT5 +#define T6 RT6 +#define T8 RT8 + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, + "registers must be different"); + assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + + move(A0, TREG); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(SP, FP, retaddr); + + // do the call + call(entry, relocInfo::runtime_call_type); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + { Label L; + get_thread(SCR1); + beq(TREG, SCR1, L); + stop("StubAssembler::call_RT: TREG not callee saved?"); + bind(L); + } +#endif + reset_last_Java_frame(true); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); + beqz(SCR1, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result1->is_valid()) { + st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result1->is_valid()) { + get_vm_result(oop_result1, TREG); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, TREG); + } + return call_offset; +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1) { + move(A1, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1, Register arg2) { + if (A1 == arg2) { + if (A2 == arg1) { + move(SCR1, arg1); + move(arg1, arg2); + move(arg2, SCR1); + } else { + move(A2, arg2); + move(A1, arg1); + } + } else { + move(A1, arg1); + move(A2, arg2); + } + return call_RT(oop_result1, metadata_result, entry, 2); +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1, Register arg2, Register arg3) { + // if there is any conflict use the stack + if (arg1 == A2 || arg1 == A3 || + arg2 == A1 || arg2 == A3 || + arg3 == A1 || arg3 == A2) { + addi_d(SP, SP, -4 * wordSize); + st_ptr(arg1, Address(SP, 0 * wordSize)); + st_ptr(arg2, Address(SP, 1 * wordSize)); + st_ptr(arg3, Address(SP, 2 * wordSize)); + ld_ptr(arg1, Address(SP, 0 * wordSize)); + ld_ptr(arg2, Address(SP, 1 * wordSize)); + ld_ptr(arg3, Address(SP, 2 * wordSize)); + addi_d(SP, SP, 4 * wordSize); + } else { + move(A1, arg1); + move(A2, arg2); + move(A3, arg3); + } + return call_RT(oop_result1, metadata_result, entry, 3); +} + +enum return_state_t { + does_not_return, requires_return +}; + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + bool _return_state; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, + return_state_t return_state=requires_return); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + +void StubAssembler::prologue(const char* name, bool must_gc_arguments) { + set_info(name, must_gc_arguments); + enter(); +} + +void StubAssembler::epilogue() { + leave(); + jr(RA); +} + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, + return_state_t return_state) { + _sasm = sasm; + _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + __ load_parameter(offset_in_words, reg); +} + +StubFrame::~StubFrame() { + if (_return_state == requires_return) { + __ epilogue(); + } else { + __ should_not_reach_here(); + } +} + +#undef __ + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FP registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + for (int i = A0->encoding(); i <= T8->encoding(); i++) { + Register r = as_Register(i); + if (i != SCR1->encoding() && i != SCR2->encoding()) { + int sp_offset = cpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } + } + + if (save_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } + } + + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + // integer registers except zr & ra & tp & sp + __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); + + for (int i = 4; i < 32; i++) + __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + if (save_fpu_registers) { + for (int i = 0; i < 32; i++) + __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < 32; i ++) + __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + for (int i = 4; i < 32; i++) + __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); +} + +static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < 32; i ++) + __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + for (int i = 5; i < 32; i++) + __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); +} + +void Runtime1::initialize_pd() { + int sp_offset = 0; + int i; + + // all float registers are saved explicitly + assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } + + for (i = 4; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + cpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, + bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + __ move(A1, SCR1); + __ move(A2, SCR2); + call_offset = __ call_RT(noreg, noreg, target); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + return oop_maps; +} + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = A0; + const Register exception_pc = A1; + // other registers used in this stub + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /*thread*/); + + // load and clear pending exception oop into A0 + __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); + __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into A1 + __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (A0) and + // exception pc (RA) are dead. + const int frame_size = 2 /*fp, return address*/; + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: ShouldNotReachHere(); + } + + // verify that only A0 and A1 are valid at this time + __ invalidate_registers(false, true, true, true, true, true); + // verify that A0 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // A0: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only A0 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true, true, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ st_ptr(A0, Address(FP, 1 * BytesPerWord)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = A0; + // callee-saved copy of exception_oop during runtime call + const Register exception_oop_callee_saved = S0; + // other registers used in this stub + const Register exception_pc = A1; + const Register handler_addr = A3; + + // verify that only A0, is valid at this time + __ invalidate_registers(false, true, true, true, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(RA, Address(SP, 0 * wordSize)); + __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); + // V0: exception handler address of the caller + + // Only V0 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true, true, false, true); + + // move result of call into correct register + __ move(handler_addr, A0); + + // get throwing pc (= return address). + // RA has been destroyed by the call + __ ld_ptr(RA, Address(SP, 0 * wordSize)); + __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + __ move(A1, RA); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // A0: exception oop + // A1: throwing pc + // A3: exception handler + __ jr(handler_addr); +} + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + + __ move(A0, TREG); + Label retaddr; + __ set_last_Java_frame(SP, FP, retaddr); + // do the call + __ call(target, relocInfo::runtime_call_type); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(SCR1); + __ beq(TREG, SCR1, L); + __ stop("StubAssembler::call_RT: rthread not callee saved?"); + __ bind(L); + } +#endif + + __ reset_last_Java_frame(true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Runtime will return true if the nmethod has been deoptimized, this is the + // expected scenario and anything else is an error. Note that we maintain a + // check on the result purely as a defensive measure. + Label no_deopt; + __ beqz(A0, no_deopt); // Have we deoptimized? + + // Perform a re-execute. The proper return address is already on the stack, + // we just need to restore registers, pop all of our frame but the return + // address and jump to the deopt blob. + restore_live_registers(sasm); + __ leave(); + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + + __ bind(no_deopt); + __ stop("deopt not performed"); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called + // from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + OopMap* oop_map = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ jr(RA); + } + break; + + case throw_div0_exception_id: + { + StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { + StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = A3; // Incoming + Register obj = A0; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register obj_size = S0; + Register t1 = T0; + Register t2 = T1; + assert_different_registers(klass, obj, obj_size, t1, t2); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(S0, Address(SP, 0)); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); + __ li(SCR2, InstanceKlass::fully_initialized); + __ bne_far(SCR1, SCR2, slow_path); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); + __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) + __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); + __ beqz(SCR1, ok); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // get the instance size (size is postive so movl is fine for 64bit) + __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, t1, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); + __ verify_oop(obj); + __ ld_ptr(S0, Address(SP, 0)); + __ addi_d(SP, SP, 2 * wordSize); + __ jr(RA); + + __ bind(slow_path); + __ ld_ptr(S0, Address(SP, 0)); + __ addi_d(SP, SP, 2 * wordSize); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + __ verify_oop(obj); + __ leave(); + __ jr(RA); + + // A0,: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = A0, method = A1; + __ enter(); + OopMap* map = save_live_registers(sasm); + // Retrieve bci + __ ld_w(bci, Address(FP, 2 * BytesPerWord)); + // And a pointer to the Method* + __ ld_d(method, Address(FP, 3 * BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ jr(RA); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = S0; // Incoming + Register klass = A3; // Incoming + Register obj = A0; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register t0 = obj; + __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); + __ srai_w(t0, t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ li(SCR1, tag); + __ beq(t0, SCR1, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Register arr_size = A5; + Register t1 = T0; + Register t2 = T1; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, t1, t2); + + // check that array length is small enough for fast path. + __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); + __ blt_far(SCR1, length, slow_path, false); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); + // since size is positive movw does right thing on 64bit + __ move(arr_size, length); + __ sll_w(arr_size, length, t1); + __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + + exact_log2(Klass::_lh_header_size_mask + 1) - 1, + Klass::_lh_header_size_shift); + __ add_d(arr_size, arr_size, t1); + __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); + + __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, t1, t2); + __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andi(t1, t1, Klass::_lh_header_size_mask); + __ sub_d(arr_size, arr_size, t1); // body length + __ add_d(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t1, t2); + __ membar(Assembler::StoreStore); + __ verify_oop(obj); + + __ jr(RA); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + + __ verify_oop(obj); + __ leave(); + __ jr(RA); + + // A0: new array + } + break; + + case new_multi_array_id: + { + StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // A0,: klass + // S0,: rank + // A2: address of 1st dimension + OopMap* map = save_live_registers(sasm); + __ move(A1, A0); + __ move(A3, A2); + __ move(A2, S0); + int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + + // A0,: new multi array + __ verify_oop(A0); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + + __ verify_oop(A0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = A5; + __ load_klass(t, A0); + __ ld_w(t, Address(t, Klass::access_flags_offset())); + __ li(SCR1, JVM_ACC_HAS_FINALIZER); + __ andr(SCR1, t, SCR1); + __ bnez(SCR1, register_finalizer); + __ jr(RA); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ jr(RA); + } + break; + + case throw_class_cast_exception_id: + { + StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ bl(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + a0_off, a0_off_hi, + a2_off, a2_off_hi, + a4_off, a4_off_hi, + a5_off, a5_off_hi, + sup_k_off, sup_k_off_hi, + klass_off, klass_off_hi, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ addi_d(SP, SP, -4 * wordSize); + __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + + // This is called by pushing args and not with C abi + __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass + __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass + + Label miss; + __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); + + // fallthrough on success: + __ li(SCR1, 1); + __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result + __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + __ addi_d(SP, SP, 4 * wordSize); + __ jr(RA); + + __ bind(miss); + __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result + __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + __ addi_d(SP, SP, 4 * wordSize); + __ jr(RA); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(1, A0); // A0,: object + f.load_argument(0, A1); // A1,: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(0, A0); // A0,: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); + OopMap* oop_map = save_live_registers(sasm); + f.load_argument(0, A1); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + } + break; + + case throw_range_check_failed_id: + { + StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { + __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { + StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { + StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { + StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { + StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { + StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { + StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { + StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { + StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + } + break; + + case dtrace_object_alloc_id: + { + // A0: object + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); + + restore_live_registers(sasm); + } + break; + + default: + { + StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); + __ li(A0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { + Unimplemented(); + return 0; +} diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp new file mode 100644 index 00000000000..164016e123e --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, true ); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false ); + +#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp new file mode 100644 index 00000000000..27a4ec5229c --- /dev/null +++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +#ifdef CC_INTERP +define_pd_global(bool, ProfileInterpreter, false); +#else +define_pd_global(bool, ProfileInterpreter, true); +#endif // CC_INTERP +define_pd_global(bool, TieredCompilation, true); +define_pd_global(intx, CompileThreshold, 10000); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 6); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 13); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); + +define_pd_global(intx, ReservedCodeCacheSize, 48*M); +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); + +// Heap related flags +define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp new file mode 100644 index 00000000000..ec78b942d40 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for LoongArch + +extern void reg_mask_init(); + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + reg_mask_init(); +} diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp new file mode 100644 index 00000000000..653d95806bf --- /dev/null +++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP +#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp new file mode 100644 index 00000000000..d063d5d93ef --- /dev/null +++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + precond(cbuf.stubs()->start() != badAddress); + precond(cbuf.stubs()->end() != badAddress); + + if (mark == NULL) { + mark = cbuf.insts_mark(); // get mark within main instrs section + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (base == NULL) return NULL; // CodeBuffer::expand failed + // static stub relocation stores the instruction address of the call + + __ relocate(static_stub_Relocation::spec(mark), 0); + + // Code stream for loading method may be changed. + __ ibar(0); + + // Rmethod contains methodOop, it should be relocated for GC + // static stub relocation also tags the methodOop in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + + cbuf.set_insts_mark(); + __ patchable_jump(__ pc()); + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; +} + +int CompiledStaticCall::to_trampoline_stub_size() { + return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 16; +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(false /* is_aot */); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination(jump->instruction_address()); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + if (os::is_MP()) { + _call->verify_alignment(); + } + + // Verify stub. + address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp new file mode 100644 index 00000000000..54b847a7369 --- /dev/null +++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP +#define CPU_LOONGARCH_COPY_LOONGARCH_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#include OS_CPU_HEADER_INLINE(copy) + +// Template for atomic, element-wise copy. +template +static void copy_conjoint_atomic(const T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp new file mode 100644 index 00000000000..e4a92d10352 --- /dev/null +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_loongarch.hpp" + +// Nothing to do on LoongArch diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp new file mode 100644 index 00000000000..29c292a74ae --- /dev/null +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP +#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP + +// Nothing to do on LoongArch + +#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp new file mode 100644 index 00000000000..04359bc172e --- /dev/null +++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "gpr-names=64"; + } + +#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp new file mode 100644 index 00000000000..6f6d34e0264 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp @@ -0,0 +1,690 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support +// for Profiling - acting on another frame. walks sender frames +// if valid. +// frame profile_find_Java_sender_frame(JavaThread *thread); + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? + JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && + (unextended_sp >= sp); + + if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (java_frame_return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[java_frame_return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(java_frame_sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[java_frame_link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } + sender_unextended_sp = sender_sp; + // On LA the return_address is always the word on the stack + sender_pc = (address) *(sender_sp-1); + // Note: frame::java_frame_sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::java_frame_sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // FP is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[java_frame_return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +#ifdef CC_INTERP +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + // QQQ why does this specialize method exist if frame::sender_sp() does same thing? + // seems odd and if we always know interpreted vs. non then sender_sp() is really + // doing too much work. + return get_interpreterState()->sender_sp(); +} + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*) get_interpreterState()->stack_base(); +} + +#else // CC_INTERP + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); + assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} +#endif // CC_INTERP + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + if (jfa->last_Java_pc() != NULL ) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; + } + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // sp is the raw sp from the sender after adapter or interpreter extension + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + + // The interpreter and compiler(s) always save FP in a known + // location on entry. We must record where that location is + // so this if FP was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves FP if we record where it is then + // we don't have to always save FP on entry and exit to c2 compiled + // code, on entry will be enough. +#ifdef COMPILER2_OR_JVMCI + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset)); + } +#endif // COMPILER2_OR_JVMCI + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On LoongArch, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(FP->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(FP->as_VMReg()->next(), (address) link_addr); + } +} + +//------------------------------sender_for_compiled_frame----------------------- +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + + // On Loongson the return_address is always the word on the stack + // the fp in compiler points to sender fp, but in interpreter, fp points to return address, + // so getting sender for compiled frame is not same as interpreter frame. + // we hard code here temporarily + // spark + address sender_pc = (address) *(sender_sp-1); + + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::java_frame_sender_sp_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of epb there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +// QQQ +#ifdef CC_INTERP +#else + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = safe_interpreter_frame_method(); + + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) return false; + + // stack frames shouldn't be much larger than max_stack elements + + //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { + if (fp() - sp() > 4096) { // stack frames shouldn't be large. + return false; + } + + // validate bci/bcp + + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate ConstantPoolCache* + + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + + if (MetaspaceObj::is_valid(cp) == false) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + +#endif // CC_INTERP + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the interpreterState object + assert(false, "NYI"); + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + tos_addr += 2; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { +#ifdef CC_INTERP + obj = istate->_oop_temp; +#else + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); +#endif // CC_INTERP + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} + +void frame::pd_ps() {} +#endif diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp new file mode 100644 index 00000000000..b16389b3a32 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP +#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// Low +// [expression stack ] * <- sp +// [monitors ] \ +// ... | monitor block size +// [monitors ] / +// [monitor block size ] +// [byte code index/pointr] = bcx() bcx_offset +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset +// [last sp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset +// [old frame pointer ] <- fp = link() +// [return pc ] +// [oop temp ] (only for native calls) +// [locals and parameters ] +// High <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- +// +// ------------------------------ Native (C frame) --------------------------------------- +// Layout of C frame: +// High +// | +// - <----- fp <- sender sp +// fp -8 | [ra] = sender_pc() +// fp-16 | [fp (sender)] = link() +// | [...] +// | +// - <----- sp +// | +// v +// Low +// ------------------------------ Native (C frame) --------------------------------------- + + public: + enum { + pc_return_offset = 0, + + // Java frames + java_frame_link_offset = 0, + java_frame_return_addr_offset = 1, + java_frame_sender_sp_offset = 2, + + // Native frames + native_frame_link_offset = -2, + native_frame_return_addr_offset = -1, + native_frame_sender_sp_offset = 0, + + // Interpreter frames + interpreter_frame_result_handler_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_fp_offset = 0, + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + entry_frame_call_wrapper_offset = -9, + + // Native frames + + native_frame_initial_param_offset = 2 + + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved FP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp new file mode 100644 index 00000000000..1ddc038eea2 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp @@ -0,0 +1,252 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for Loongson frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + + + +inline intptr_t* frame::link() const { + if (is_java_frame()) + return (intptr_t*) *(intptr_t **)addr_at(java_frame_link_offset); + return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset); +} + +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset) + : (intptr_t **)addr_at(native_frame_link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { + if (is_java_frame()) + return (address*) addr_at(java_frame_return_addr_offset); + return (address*) addr_at(native_frame_return_addr_offset); +} + +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +inline intptr_t* frame::sender_sp() const { + if (is_java_frame()) + return addr_at(java_frame_sender_sp_offset); + return addr_at(native_frame_sender_sp_offset); +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL ) { + return sp(); + } else { + // sp() may have been extended by an adapter + assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + +// Compiled frames + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(V0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(V0->as_VMReg())) = obj; +} + +#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp new file mode 100644 index 00000000000..e1e4748c491 --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#endif + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + + if (!dest_uninitialized) { +#ifndef OPT_THREAD + Register thread = T9; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + Label filtered; + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld_w(AT, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ld_b(AT, in_progress); + } + + __ beqz(AT, filtered); + + __ push(saved_regs); + if (count == A0) { + if (addr == A1) { + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, addr); + } + } else { + __ move(A0, addr); + __ move(A1, count); + } + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + } + __ pop(saved_regs); + + __ bind(filtered); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp, RegSet saved_regs) { + __ push(saved_regs); + if (count == A0) { + assert_different_registers(A1, addr); + __ move(A1, count); + __ move(A0, addr); + } else { + assert_different_registers(A0, count); + __ move(A0, addr); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); + __ pop(saved_regs); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // RA is live. It must be saved around calls. + __ enter(); // barrier may call runtime + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + thread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + } +} + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == TREG, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != V0, "check this code"); + } + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld_w(AT, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ld_b(AT, in_progress); + } + __ beqz(AT, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + __ beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ld_d(tmp, index); + __ beqz(tmp, runtime); + + __ addi_d(tmp, tmp, -1 * wordSize); + __ st_d(tmp, index); + __ ld_d(AT, buffer); + + // Record the previous value + __ stx_d(pre_val, tmp, AT); + __ b(done); + + __ bind(runtime); + // save the live input values + if (tosca_live) __ push(V0); + + if (obj != noreg && obj != V0) __ push(obj); + + if (pre_val != V0) __ push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + assert(pre_val != A1, "smashed arg"); + if (thread != A1) __ move(A1, thread); + if (pre_val != A0) __ move(A0, pre_val); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + // save the live input values + if (pre_val != V0) + __ pop(pre_val); + + if (obj != noreg && obj != V0) + __ pop(obj); + + if (tosca_live) __ pop(V0); + + __ bind(done); +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert_different_registers(tmp, tmp2, AT); + assert(thread == TREG, "must be"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + __ xorr(AT, store_addr, new_val); + __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); + __ beqz(AT, done); + + // crosses regions, storing NULL? + __ beqz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp; + const Register cardtable = tmp2; + + __ move(card_addr, store_addr); + __ srli_d(card_addr, card_addr, CardTable::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ add_d(card_addr, card_addr, cardtable); + + __ ld_bu(AT, card_addr, 0); + __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); + __ beqz(AT, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(__ StoreLoad); + __ ld_bu(AT, card_addr, 0); + __ beqz(AT, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + __ st_b(R0, card_addr, 0); + + __ ld_d(AT, queue_index); + __ beqz(AT, runtime); + __ addi_d(AT, AT, -1 * wordSize); + __ st_d(AT, queue_index); + __ ld_d(tmp2, buffer); + __ ld_d(AT, queue_index); + __ stx_d(card_addr, tmp2, AT); + __ b(done); + + __ bind(runtime); + // save the live input values + __ push(store_addr); + __ push(new_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); + __ pop(new_val); + __ pop(store_addr); + + __ bind(done); +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + + bool needs_pre_barrier = as_normal; + bool needs_post_barrier = val != noreg && in_heap; + + Register tmp3 = RT3; + Register rthread = TREG; + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (dst.index() == noreg && dst.disp() == 0) { + if (dst.base() != tmp3) { + __ move(tmp3, dst.base()); + } + } else { + __ lea(tmp3, dst); + } + + if (needs_pre_barrier) { + g1_write_barrier_pre(masm /*masm*/, + tmp3 /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + } + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); + } else { + Register new_val = val; + if (needs_post_barrier) { + // G1 barrier needs uncompressed oop for region cross check. + if (UseCompressedOops) { + new_val = tmp2; + __ move(new_val, val); + } + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); + if (needs_post_barrier) { + g1_write_barrier_post(masm /*masm*/, + tmp3 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } + } +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation()); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register new_val_reg = stub->new_val()->as_register(); + __ beqz(new_val_reg, *stub->continuation()); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); + __ b(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + // arg0 : previous value of memory + + BarrierSet* bs = BarrierSet::barrier_set(); + + const Register pre_val = A0; + const Register thread = TREG; + const Register tmp = SCR2; + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ld_w(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ld_b(tmp, in_progress); + } + __ beqz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ld_ptr(tmp, queue_index); + __ beqz(tmp, runtime); + + __ addi_d(tmp, tmp, -wordSize); + __ st_ptr(tmp, queue_index); + __ ld_ptr(SCR1, buffer); + __ add_d(tmp, tmp, SCR1); + __ load_parameter(0, SCR1); + __ st_ptr(SCR1, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ pushad(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + __ popad(); + __ bind(done); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0: store_address + Address store_addr(FP, 2 * BytesPerWord); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = TREG; + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + const Register card_offset = SCR2; + // RA is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = RA; + + assert_different_registers(card_offset, byte_map_base, SCR1); + + __ load_parameter(0, card_offset); + __ srli_d(card_offset, card_offset, CardTable::card_shift); + __ load_byte_map_base(byte_map_base); + __ ldx_bu(SCR1, byte_map_base, card_offset); + __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val()); + __ beqz(SCR1, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(__ StoreLoad); + __ ldx_bu(SCR1, byte_map_base, card_offset); + __ beqz(SCR1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ stx_b(R0, byte_map_base, card_offset); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add_d(card_addr, byte_map_base, card_addr); + + __ ld_ptr(SCR1, queue_index); + __ beqz(SCR1, runtime); + __ addi_d(SCR1, SCR1, -wordSize); + __ st_ptr(SCR1, queue_index); + + // Reuse RA to hold buffer_addr + const Register buffer_addr = RA; + + __ ld_ptr(buffer_addr, buffer); + __ stx_d(card_addr, buffer_addr, SCR1); + __ b(done); + + __ bind(runtime); + __ pushad(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ popad(); + __ bind(done); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp new file mode 100644 index 00000000000..745046ac0cc --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class LIR_Assembler; +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { + protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs); + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs); + + void g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + public: + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp new file mode 100644 index 00000000000..a890cd3f626 --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + // RA is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (UseCompressedOops) { + __ ld_wu(dst, src); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); + } + } else + { + __ ld_ptr(dst, src); + } + } else { + assert(in_native, "why else?"); + __ ld_ptr(dst, src); + } + break; + } + case T_BOOLEAN: __ ld_bu (dst, src); break; + case T_BYTE: __ ld_b (dst, src); break; + case T_CHAR: __ ld_hu (dst, src); break; + case T_SHORT: __ ld_h (dst, src); break; + case T_INT: __ ld_w (dst, src); break; + case T_LONG: __ ld_d (dst, src); break; + case T_ADDRESS: __ ld_ptr(dst, src); break; + case T_FLOAT: + assert(dst == noreg, "only to ftos"); + __ fld_s(FSF, src); + break; + case T_DOUBLE: + assert(dst == noreg, "only to dtos"); + __ fld_d(FSF, src); + break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + if (UseCompressedOops) { + __ st_w(R0, dst); + } else { + __ st_d(R0, dst); + } + } else { + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ st_w(val, dst); + } else + { + __ st_ptr(val, dst); + } + } + } else { + assert(in_native, "why else?"); + assert(val != noreg, "not supported"); + __ st_ptr(val, dst); + } + break; + } + case T_BOOLEAN: + __ andi(val, val, 0x1); // boolean is true if LSB is 1 + __ st_b(val, dst); + break; + case T_BYTE: + __ st_b(val, dst); + break; + case T_SHORT: + __ st_h(val, dst); + break; + case T_CHAR: + __ st_h(val, dst); + break; + case T_INT: + __ st_w(val, dst); + break; + case T_LONG: + __ st_d(val, dst); + break; + case T_FLOAT: + assert(val == noreg, "only tos"); + __ fst_s(FSF, dst); + break; + case T_DOUBLE: + assert(val == noreg, "only tos"); + __ fst_d(FSF, dst); + break; + case T_ADDRESS: + __ st_ptr(val, dst); + break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::obj_equals(MacroAssembler* masm, + Register obj1, Address obj2) { + Unimplemented(); +} + +void BarrierSetAssembler::obj_equals(MacroAssembler* masm, + Register obj1, Register obj2) { + Unimplemented(); +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + __ clear_jweak_tag(obj); + __ ld_ptr(obj, Address(obj, 0)); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + assert_different_registers(obj, t2); + assert_different_registers(obj, var_size_in_bytes); + Register end = t2; + + // verify_tlab(); + + __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + __ lea(end, Address(obj, con_size_in_bytes)); + } else { + __ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0)); + } + __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); + __ blt_far(SCR1, end, slow_case, false); + + // update the tlab top pointer + __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + __ sub_d(var_size_in_bytes, var_size_in_bytes, obj); + } + // verify_tlab(); +} + +// Defines obj, preserves var_size_in_bytes +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ b_far(slow_case); + } else { + Register end = t1; + Register heap_end = SCR2; + Label retry; + __ bind(retry); + + __ li(SCR1, (address)Universe::heap()->end_addr()); + __ ld_d(heap_end, SCR1, 0); + + // Get the current top of the heap + __ li(SCR1, (address) Universe::heap()->top_addr()); + __ ll_d(obj, SCR1, 0); + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) + __ addi_d(end, obj, con_size_in_bytes); + else + __ add_d(end, obj, var_size_in_bytes); + + // if end < obj then we wrapped around high memory + __ blt_far(end, obj, slow_case, false); + __ blt_far(heap_end, end, slow_case, false); + + // If heap top hasn't been changed by some other thread, update it. + __ sc_d(end, SCR1, 0); + __ beqz(end, retry); + + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1); + } +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + assert(t1->is_valid(), "need temp reg"); + + __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) + __ add_d(t1, t1, var_size_in_bytes); + else + __ addi_d(t1, t1, con_size_in_bytes); + __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); +} diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp new file mode 100644 index 00000000000..a7ebbfaabba --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class InterpreterMacroAssembler; + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, RegSet saved_regs) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch, RegSet saved_regs) {} + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + + virtual void obj_equals(MacroAssembler* masm, + Register obj1, Register obj2); + virtual void obj_equals(MacroAssembler* masm, + Register obj1, Address obj2); + + virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { + // Default implementation does not need to do anything. + } + + // Support for jniFastGetField to try resolving a jobject/jweak in native + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void eden_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + virtual void barrier_stubs_init() {} +}; + +#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp new file mode 100644 index 00000000000..d09e9a75a7e --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" + +#define __ masm-> + +#define T4 RT4 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp, + RegSet saved_regs) { + BarrierSet *bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + intptr_t disp = (intptr_t) ct->byte_map_base(); + + Label L_loop, L_done; + const Register end = count; + assert_different_registers(addr, end); + + __ beq(count, R0, L_done); // zero count - nothing to do + + if (ct->scanned_concurrently()) __ membar(__ StoreStore); + + __ li(tmp, disp); + + __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size + __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive + __ shr(addr, CardTable::card_shift); + __ shr(end, CardTable::card_shift); + __ sub_d(end, end, addr); // end --> cards count + + __ add_d(addr, addr, tmp); + + __ BIND(L_loop); + __ stx_b(R0, addr, count); + __ addi_d(count, count, -1); + __ bge(count, R0, L_loop); + + __ BIND(L_done); +} + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + BarrierSet* bs = BarrierSet::barrier_set(); + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ shr(obj, CardTable::card_shift); + + Address card_addr; + + intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); + Register tmp = T4; + assert_different_registers(tmp, obj); + __ li(tmp, byte_map_base); + __ add_d(tmp, tmp, obj); + + assert(CardTable::dirty_card_val() == 0, "must be"); + + jbyte dirty = CardTable::dirty_card_val(); + if (UseCondCardMark) { + Label L_already_dirty; + __ membar(__ StoreLoad); + __ ld_b(AT, tmp, 0); + __ addi_d(AT, AT, -1 * dirty); + __ beq(AT, R0, L_already_dirty); + __ st_b(R0, tmp, 0); + __ bind(L_already_dirty); + } else { + if (ct->scanned_concurrently()) { + __ membar(Assembler::StoreStore); + } + __ st_b(R0, tmp, 0); + } +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || (dst.index() == noreg && dst.disp() == 0)) { + store_check(masm, dst.base(), dst); + } else { + __ lea(tmp1, dst); + store_check(masm, tmp1, dst); + } + } +} diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp new file mode 100644 index 00000000000..b37c2ba0bc9 --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Address dst); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp, + RegSet saved_regs); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp new file mode 100644 index 00000000000..14c41ea7906 --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, RegSet saved_regs) { + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch, RegSet saved_regs) { + if (is_oop) { + gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs); + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (type == T_OBJECT || type == T_ARRAY) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp new file mode 100644 index 00000000000..8043220effb --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp, RegSet saved_regs) {} + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, RegSet saved_regs); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch, RegSet saved_regs); + + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp new file mode 100644 index 00000000000..dc21d001cc4 --- /dev/null +++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP +#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP +// Size of LoongArch Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +// If set, SharedRuntime::c_calling_convention() must adapt +// signatures accordingly. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +// FIXME: LA +// This makes the games we play when patching difficult, so when we +// come across an access that needs patching we deoptimize. There are +// ways we can avoid this, but these would slow down C1-compiled code +// in the default case. We could revisit this decision if we get any +// evidence that it's worth doing. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORT_RESERVED_STACK_AREA + +#define THREAD_LOCAL_POLL + +#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp new file mode 100644 index 00000000000..e6b758b5543 --- /dev/null +++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 2000); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) +#define DEFAULT_STACK_RESERVED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(intx, StackYellowPages, 2); +define_pd_global(intx, StackRedPages, 1); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); +define_pd_global(bool, UseMembar, true); +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + +define_pd_global(bool, ThreadLocalHandshakes, true); +// Only c2 cares about this at the moment +define_pd_global(intx, AllocatePrefetchStyle, 2); +define_pd_global(intx, AllocatePrefetchDistance, -1); + +#define ARCH_FLAGS(develop, \ + product, \ + diagnostic, \ + experimental, \ + notproduct, \ + range, \ + constraint, \ + writeable) \ + \ + product(bool, UseCodeCacheAllocOpt, true, \ + "Allocate code cache within 32-bit memory address space") \ + \ + product(bool, UseLSX, false, \ + "Use LSX 128-bit vector instructions") \ + \ + product(bool, UseLASX, false, \ + "Use LASX 256-bit vector instructions") \ + \ + product(bool, UseBarriersForVolatile, false, \ + "Use memory barriers to implement volatile accesses") \ + \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + \ + product(bool, UseActiveCoresMP, false, \ + "Eliminate barriers for single active cpu") + +#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp new file mode 100644 index 00000000000..7b976948271 --- /dev/null +++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/oop.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +int InlineCacheBuffer::ic_stub_code_size() { + return NativeMovConstReg::instruction_size + + NativeGeneralJump::instruction_size + + 1; + // so that code_end can be set in CodeBuffer + // 64bit 15 = 6 + 8 bytes + 1 byte + // 32bit 7 = 2 + 4 bytes + 1 byte +} + + +// we use T1 as cached oop(klass) now. this is the target of virtual call, +// when reach here, the receiver in T0 +// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, + address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded oop, we do not need reloc info + // because + // (1) the oop is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); +#define __ masm-> + __ patchable_li52(T1, (long)cached_value); + // TODO: confirm reloc + __ jmp(entry_point, relocInfo::runtime_call_type); + __ flush(); +#undef __ +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // creation also verifies the object + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + // Verifies the jump + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + void* o= (void*)move->data(); + return o; +} diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp new file mode 100644 index 00000000000..1ae7e5376cf --- /dev/null +++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) +{ +#define __ _masm-> + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + address start = __ pc(); + + __ ibar(0); + __ ori(V0, A2, 0); + __ jr(RA); + + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +#undef __ +} diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp new file mode 100644 index 00000000000..3a180549fc6 --- /dev/null +++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP +#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes + line_size = 32, // flush instruction affects a dword + log2_line_size = 5 // log2(line_size) + }; +}; + +#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp new file mode 100644 index 00000000000..53a06ba7fd3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP +#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP + private: + + Register _locals_register; // register that contains the pointer to the locals + Register _bcp_register; // register that contains the bcp + + protected: + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); +#endif // CC_INTERP + + public: + void jump_to_entry(address entry); + // narrow int return value + void narrow(Register result); + + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} + + void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); + void get_4_byte_integer_at_bcp(Register reg, int offset); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void load_earlyret_value(TosState state); + +#ifdef CC_INTERP + void save_bcp() { /* not needed in c++ interpreter and harmless */ } + void restore_bcp() { /* not needed in c++ interpreter and harmless */ } + + // Helpers for runtime call arguments/results + void get_method(Register reg); + +#else + + // Interpreter-specific registers + void save_bcp() { + st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); + } + + void restore_bcp() { + ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); + } + + void restore_locals() { + ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); + } + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); + } + + void get_const(Register reg){ + get_method(reg); + ld_d(reg, reg, in_bytes(Method::const_offset())); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index, Register tmp); + + // load cpool->resolved_klass_at(index) + void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) + Register index, // the constant pool index (corrupted on return) + Register klass); // contains the Klass on return + + void pop_ptr( Register r = FSR); + void pop_i( Register r = FSR); + void pop_l( Register r = FSR); + void pop_f(FloatRegister r = FSF); + void pop_d(FloatRegister r = FSF); + + void push_ptr( Register r = FSR); + void push_i( Register r = FSR); + void push_l( Register r = FSR); + void push_f(FloatRegister r = FSF); + void push_d(FloatRegister r = FSF); + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // NULL last_sp until next java call + st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + } + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state, bool generate_poll = false); + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); +#endif // CC_INTERP + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + +#ifndef CC_INTERP + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); +#endif // !CC_INTERP + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); +}; + +#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp new file mode 100644 index 00000000000..c533a576526 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp @@ -0,0 +1,2043 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_loongarch.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of InterpreterMacroAssembler + +#ifdef CC_INTERP +void InterpreterMacroAssembler::get_method(Register reg) { +} +#endif // CC_INTERP + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + if (UseUnalignedAccesses) { + ld_hu(reg, BCP, offset); + } else { + ld_bu(reg, BCP, offset); + ld_bu(tmp, BCP, offset + 1); + bstrins_d(reg, tmp, 15, 8); + } +} + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { + if (UseUnalignedAccesses) { + ld_wu(reg, BCP, offset); + } else { + ldr_w(reg, BCP, offset); + ldl_w(reg, BCP, offset + 3); + lu32i_d(reg, 0); + } +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + jmp(entry); +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore bcp & locals pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use BCP/LVP as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. +#ifdef ASSERT + save_bcp(); + { + Label L; + ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT,R0,L); + stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); + bind(L); + } +#endif + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + // interpreter specific + // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save them here (see note above) the assert is invalid. +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT, R0, L); + stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); + // interpreter specific + restore_bcp(); + restore_locals(); +} + + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + // Not clear if any other register is available, so load AT twice + assert(AT != java_thread, "check"); + ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_pending_bit); + beq(AT, R0, L); + + ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_processing_bit); + bne(AT, R0, L); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(V0); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); + const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + //V0, oop_addr,V1,val_addr + switch (state) { + case atos: + ld_ptr(V0, oop_addr); + st_ptr(R0, oop_addr); + verify_oop(V0, state); + break; + case ltos: + ld_ptr(V0, val_addr); // fall through + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + ld_w(V0, val_addr); + break; + case ftos: + fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case dtos: + fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + li(AT, (int)ilgl); + st_w(AT, tos_addr); + st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register tmp = T4; + + assert(java_thread != AT, "check"); + assert(java_thread != tmp, "check"); + ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + beq(AT, R0, L); + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); + li(tmp, JvmtiThreadState::earlyret_pending); + bne(tmp, AT, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + move(A0, AT); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); + jr(V0); + bind(L); + } +} + + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + ld_bu(AT, BCP, bcp_offset); + ld_bu(reg, BCP, bcp_offset + 1); + bstrins_w(reg, AT, 15, 8); +} + + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(index, AT, bcp_offset); + } else if (index_size == sizeof(u4)) { + get_4_byte_integer_at_bcp(index, bcp_offset); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + nor(index, index, R0); + slli_w(index, index, 0); + } else if (index_size == sizeof(u1)) { + ld_bu(index, BCP, bcp_offset); + } else { + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + get_cache_index_at_bcp(index, bcp_offset, index_size); + ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); + shl(index, 2); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + alsl_d(AT, index, cache, Address::times_ptr - 1); + ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + if(os::is_MP()) { + membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); + } + + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + srli_d(bytecode, bytecode, shift_count); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + li(AT, ConstantPoolCacheEntry::bytecode_1_mask); + andr(bytecode, bytecode, AT); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + shl(tmp, 2 + LogBytesPerWord); + ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + // skip past the header + addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + add_d(cache, cache, tmp); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld_d(mcs, method, in_bytes(Method::method_counters_offset())); + bne(mcs, R0, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld_d(mcs, method, in_bytes(Method::method_counters_offset())); + beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + shl(index, LogBytesPerHeapOop); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ld_d(result, result, ConstantPool::cache_offset_in_bytes()); + ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); + resolve_oop_handle(result, tmp); + // Add in the index + add_d(result, result, index); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); +} + +// load cpool->resolved_klass_at(index) +void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, + Register index, Register klass) { + alsl_d(AT, index, cpool, Address::times_ptr - 1); + ld_h(index, AT, sizeof(ConstantPool)); + Register resolved_klasses = cpool; + ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); + alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1); + ld_d(klass, AT, Array::base_offset_in_bytes()); +} + +// Resets LVP to locals. Register sub_klass cannot be any of the above. +void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { + + assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); + assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); + assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); + // Profile the not-null value's klass. + // Here T4 and T1 are used as temporary registers. + profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 + + // Do the check. + check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 + + // Profile the failure of the check. + profile_typecheck_failed(T4); // blows T4 + +} + + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld_d(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + ld_w(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld_d(r, SP, 0); + addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + fld_s(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + fld_d(r, SP, 0); + addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + addi_d(SP, SP, - Interpreter::stackElementSize); + st_d(r, SP, 0); +} + +void InterpreterMacroAssembler::push_i(Register r) { + // For compatibility reason, don't change to sw. + addi_d(SP, SP, - Interpreter::stackElementSize); + st_d(r, SP, 0); +} + +void InterpreterMacroAssembler::push_l(Register r) { + addi_d(SP, SP, -2 * Interpreter::stackElementSize); + st_d(r, SP, 0); + st_d(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + addi_d(SP, SP, - Interpreter::stackElementSize); + fst_s(r, SP, 0); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + addi_d(SP, SP, -2 * Interpreter::stackElementSize); + fst_d(r, SP, 0); + st_d(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(FSR, state); +} + +//FSR=V0,SSR=V1 +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(FSR, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + // record last_sp + move(Rsender, SP); + st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. +#ifndef OPT_THREAD + Register thread = temp; + get_thread(temp); +#else + Register thread = TREG; +#endif + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(AT, R0, run_compiled_code); + ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); + jr(AT); + bind(run_compiled_code); + } + + ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); + jr(AT); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. LoongArch64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing LoongArch64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +// assume the next bytecode in T8. +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + + if (VerifyActivationFrameSize) { + Label L; + + sub_d(T2, FP, SP); + int min_frame_size = (frame::java_frame_link_offset - + frame::interpreter_frame_initial_sp_offset) * wordSize; + addi_d(T2, T2, -min_frame_size); + bge(T2, R0, L); + stop("broken stack frame"); + bind(L); + } + // FIXME: I do not know which register should pass to verify_oop + if (verifyoop) verify_oop(FSR, state); + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); + bool needs_thread_local_poll = generate_poll && + SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ld_d(T3, thread, in_bytes(Thread::polling_page_offset())); + andi(T3, T3, SafepointMechanism::poll_bit()); + bne(T3, R0, safepoint); + } + + if((long)table >= (long)Interpreter::dispatch_table(btos) && + (long)table <= (long)Interpreter::dispatch_table(vtos)) { + int table_size = (long)Interpreter::dispatch_table(itos) - + (long)Interpreter::dispatch_table(stos); + int table_offset = ((int)state - (int)itos) * table_size; + + // S8 points to the starting address of Interpreter::dispatch_table(itos). + // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. + if (table_offset != 0) { + if (is_simm(table_offset, 12)) { + alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); + ld_d(T3, T3, table_offset); + } else { + li(T2, table_offset); + alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); + ldx_d(T3, T2, T3); + } + } else { + slli_d(T2, Rnext, LogBytesPerWord); + ldx_d(T3, S8, T2); + } + } else { + li(T3, (long)table); + slli_d(T2, Rnext, LogBytesPerWord); + ldx_d(T3, T2, T3); + } + jr(T3); + + if (needs_thread_local_poll) { + bind(safepoint); + li(T3, (long)safepoint_table); + slli_d(T2, Rnext, LogBytesPerWord); + ldx_d(T3, T3, T2); + jr(T3); + } +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { + // load next bytecode + ld_bu(Rnext, BCP, step); + increment(BCP, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + ld_bu(Rnext, BCP, 0); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +// used registers : T1, T2, T3, T8 +// T1 : thread, method access flags +// T2 : monitor entry pointer +// T3 : method, monitor top +// T8 : unlock flag +void InterpreterMacroAssembler::remove_activation( + TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers V0, V1 and F0, F1 may be in use for the result + // check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into T8 +#ifndef OPT_THREAD + Register thread = T1; + get_thread(thread); +#else + Register thread = TREG; +#endif + ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // reset the flag + st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // get method access flags + ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); + ld_w(T1, T3, in_bytes(Method::access_flags_offset())); + andi(T1, T1, JVM_ACC_SYNCHRONIZED); + beq(T1, R0, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + bne(T8, R0, no_unlock); + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize + - (int)sizeof(BasicObjectLock)); + // address of first monitor + ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, unlock); + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + // I think LA do not need empty_FPU_stack + // remove possible return value from FPU-stack, otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + + } + + b(unlocked); + } + + bind(unlock); + unlock_object(c_rarg0); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // V0, V1: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top(FP, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + + bind(restart); + // points to current entry, starting with top-most entry + ld_d(c_rarg0, monitor_block_top); + // points to word before bottom of monitor block + addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + b(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + // We don't have to preserve c_rarg0, since we are going to + // throw an exception + + push(state); + unlock_object(c_rarg0); + pop(state); + + if (install_monitor_exception) { + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + b(restart); + } + + bind(loop); + ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, exception);// check if current entry is used + + addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry + bind(entry); + bne(c_rarg0, T3, loop); // check if bottom reached + } + + bind(no_unlock); + + // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); + sub_d(AT, TSR, AT); + bge(R0, AT, no_reserved_zone_enabling); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + ld_d(ret_addr, FP, frame::java_frame_return_addr_offset * wordSize); + ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); + move(SP, TSR); // set sp to sender sp +} + +#endif // CC_INTERP + +// Lock object +// +// Args: +// c_rarg0: BasicObjectLock to be used for locking +// +// Kills: +// T1 +// T2 +void InterpreterMacroAssembler::lock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + } else { + Label done, slow_case; + const Register tmp_reg = T2; + const Register scr_reg = T1; + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + // Load object pointer into scr_reg + ld_d(scr_reg, lock_reg, obj_offset); + + if (UseBiasedLocking) { + // Note: we use noreg for the temporary register since it's hard + // to come up with a free register on all incoming code paths + biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); + } + + // Load (object->mark() | 1) into tmp_reg + ld_d(AT, scr_reg, 0); + ori(tmp_reg, AT, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + st_d(tmp_reg, lock_reg, mark_offset); + + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); + + if (PrintBiasedLockingStatistics) { + Label succ, fail; + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); + bind(succ); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + b(done); + bind(fail); + } else { + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) SP <= mark < SP + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in tmp_reg as the result of cmpxchg + sub_d(tmp_reg, tmp_reg, SP); + li(AT, 7 - os::vm_page_size()); + andr(tmp_reg, tmp_reg, AT); + // Save the test result, for recursive case, the result is zero + st_d(tmp_reg, lock_reg, mark_offset); + if (PrintBiasedLockingStatistics) { + bnez(tmp_reg, slow_case); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + } + beqz(tmp_reg, done); + + bind(slow_case); + // Call the runtime routine for slow case + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + + bind(done); + } +} + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg0: BasicObjectLock for lock +// +// Kills: +// T1 +// T2 +// T3 +// Throw an IllegalMonitorException if object is not locked by current thread +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + const Register tmp_reg = T1; + const Register scr_reg = T2; + const Register hdr_reg = T3; + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure + // Store the BasicLock address into tmp_reg + addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); + + // Load oop into scr_reg + ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + // free entry + st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + if (UseBiasedLocking) { + biased_locking_exit(scr_reg, hdr_reg, done); + } + + // Load the old header from BasicLock structure + ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); + // zero for recursive case + beqz(hdr_reg, done); + + // Atomic swap back the old header + cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); + + // Call the runtime routine for slow case. + st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj + call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); + beq(mdp, R0, zero_continue); +} + + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // V0 and T0 will be used as two temporary registers. + push2(V0, T0); + + get_method(T0); + // Test MDO to avoid the call if it is NULL. + ld_d(V0, T0, in_bytes(Method::method_data_offset())); + beq(V0, R0, set_mdp); + + // method: T0 + // bcp: BCP --> S0 + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); + // mdi: V0 + // mdo is guaranteed to be non-zero here, we checked for it before the call. + get_method(T0); + ld_d(T0, T0, in_bytes(Method::method_data_offset())); + addi_d(T0, T0, in_bytes(MethodData::data_offset())); + add_d(V0, T0, V0); + bind(set_mdp); + st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); + pop2(V0, T0); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + Register method = T5; + Register mdp = T6; + Register tmp = A0; + push(method); + push(mdp); + push(tmp); + test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue + get_method(method); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); + ld_d(AT, method, in_bytes(Method::const_offset())); + add_d(tmp, tmp, AT); + addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); + beq(tmp, BCP, verify_continue); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); + bind(verify_continue); + pop(tmp); + pop(mdp); + pop(method); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + st_d(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + // Counter address + Address data(mdp_in, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + Register tmp = S0; + push(tmp); + if (decrement) { + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Decrement the register. + ld_d(AT, data); + sltu(tmp, R0, AT); + sub_d(AT, AT, tmp); + st_d(AT, data); + } else { + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Increment the register. + ld_d(AT, data); + addi_d(tmp, AT, DataLayout::counter_increment); + sltu(tmp, R0, tmp); + add_d(AT, AT, tmp); + st_d(AT, data); + } + pop(tmp); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + Register tmp = S0; + push(tmp); + if (decrement) { + assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Decrement the register. + add_d(tmp, mdp_in, reg); + ld_d(AT, tmp, constant); + sltu(tmp, R0, AT); + sub_d(AT, AT, tmp); + add_d(tmp, mdp_in, reg); + st_d(AT, tmp, constant); + } else { + assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Increment the register. + add_d(tmp, mdp_in, reg); + ld_d(AT, tmp, constant); + addi_d(tmp, AT, DataLayout::counter_increment); + sltu(tmp, R0, tmp); + add_d(AT, AT, tmp); + add_d(tmp, mdp_in, reg); + st_d(AT, tmp, constant); + } + pop(tmp); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::header_offset()); + int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); + // Set the flag + ld_w(AT, Address(mdp_in, header_offset)); + if(Assembler::is_simm(header_bits, 12)) { + ori(AT, AT, header_bits); + } else { + push(T8); + // T8 is used as a temporary register. + li(T8, header_bits); + orr(AT, AT, T8); + pop(T8); + } + st_w(AT, Address(mdp_in, header_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld_d(AT, Address(mdp_in, offset)); + bne(AT, value, not_equal_continue); + } else { + // Put the test value into a register, so caller can use it: + ld_d(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); + ld_d(AT, mdp_in, offset_of_disp); + add_d(mdp_in, mdp_in, AT); + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add_d(AT, reg, mdp_in); + assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); + ld_d(AT, AT, offset_of_disp); + add_d(mdp_in, mdp_in, AT); + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if(Assembler::is_simm(constant, 12)) { + addi_d(mdp_in, mdp_in, constant); + } else { + li(AT, constant); + add_d(mdp_in, mdp_in, AT); + } + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(return_bci); // save/restore across call_VM + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + pop(return_bci); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + addi_d(AT, bumped_count, DataLayout::counter_increment); + sltu(AT, R0, AT); + add_d(bumped_count, bumped_count, AT); + st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + bnez(receiver, not_null); + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +#if INCLUDE_JVMCI +void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { + assert_different_registers(method, mdp, reg2); + if (ProfileInterpreter && MethodProfileWidth > 0) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done; + record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, + &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); + bind(done); + + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} +#endif // INCLUDE_JVMCI + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); + } +#endif // INCLUDE_JVMCI + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif // INCLUDE_JVMCI + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the item from the CallData.) + + // The receiver is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); + b(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + beqz(reg2, found_null); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, non_profiled_offset); + b(done); + bind(found_null); + } else { + bnez(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beqz(reg2, found_null); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + li(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + b(done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + li(reg2, in_bytes(MultiBranchData::per_case_size())); + mul_d(index, index, reg2); + addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); + ld_d(T4, T4, in_bytes(Method::const_offset())); + ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + addi_d(AT, T4, -T_INT); + beq(AT, R0, done); + + // mask integer result to narrower return type. + addi_d(AT, T4, -T_BOOLEAN); + bne(AT, R0, notBool); + andi(result, result, 0x1); + beq(R0, R0, done); + + bind(notBool); + addi_d(AT, T4, -T_BYTE); + bne(AT, R0, notByte); + ext_w_b(result, result); + beq(R0, R0, done); + + bind(notByte); + addi_d(AT, T4, -T_CHAR); + bne(AT, R0, notChar); + bstrpick_d(result, result, 15, 0); + beq(R0, R0, done); + + bind(notChar); + ext_w_h(result, result); + + // Nothing to do for T_INT + bind(done); +} + + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + if (mdo_addr.index() != noreg) { + guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); + guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); + push(T0); + alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); + } + + bnez(obj, update); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::null_seen); + if (mdo_addr.index() == noreg) { + st_d(AT, mdo_addr); + } else { + st_d(AT, T0, mdo_addr.disp()); + } + + b(next); + + bind(update); + load_klass(obj, obj); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + + assert(TypeEntries::type_klass_mask == -4, "must be"); + bstrpick_d(AT, obj, 63, 2); + beqz(AT, next); + + andi(AT, obj, TypeEntries::type_unknown); + bnez(AT, next); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + beqz(AT, none); + + addi_d(AT, AT, -(TypeEntries::null_seen)); + beqz(AT, none); + + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + assert(TypeEntries::type_klass_mask == -4, "must be"); + bstrpick_d(AT, obj, 63, 2); + beqz(AT, next); + + // different than before. Cannot keep accurate profile. + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::type_unknown); + if (mdo_addr.index() == noreg) { + st_d(AT, mdo_addr); + } else { + st_d(AT, T0, mdo_addr.disp()); + } + b(next); + + bind(none); + // first time here. Set profile type. + if (mdo_addr.index() == noreg) { + st_d(obj, mdo_addr); + } else { + st_d(obj, T0, mdo_addr.disp()); + } + + bind(next); + if (mdo_addr.index() != noreg) { + pop(T0); + } +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); + li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + bne(tmp, AT, profile_continue); + + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + if (Assembler::is_simm(off_to_args, 12)) { + addi_d(mdp, mdp, off_to_args); + } else { + li(AT, off_to_args); + add_d(mdp, mdp, AT); + } + + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { + addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); + } else { + li(AT, i*TypeStackSlotEntries::per_arg_count()); + sub_w(tmp, tmp, AT); + } + + li(AT, TypeStackSlotEntries::per_arg_count()); + blt(tmp, AT, done); + } + ld_d(tmp, callee, in_bytes(Method::const_offset())); + + ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); + + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); + sub_d(tmp, tmp, AT); + + addi_w(tmp, tmp, -1); + + Address arg_addr = argument_address(tmp); + ld_d(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + if (Assembler::is_simm(to_add, 12)) { + addi_d(mdp, mdp, to_add); + } else { + li(AT, to_add); + add_d(mdp, mdp, AT); + } + + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); + if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { + addi_w(tmp, tmp, -1 * tmp_arg_counts); + } else { + li(AT, tmp_arg_counts); + sub_w(mdp, mdp, AT); + } + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); + add_d(mdp, mdp, tmp); + } + st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, _bcp_register); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + ld_b(tmp, _bcp_register, 0); + addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); + beqz(AT, do_profile); + addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); + beqz(AT, do_profile); + + get_method(tmp); + ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); + li(AT, vmIntrinsics::_compiledLambdaForm); + bne(tmp, AT, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + add_d(tmp, ret, R0); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); + + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); + blt(tmp1, R0, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add_d(mdp, mdp, tmp1); + ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); + Address arg_type(mdp, tmp1, per_arg_scale, type_base); + + // load offset on the stack from the slot for this parameter + alsl_d(AT, tmp1, mdp, per_arg_scale - 1); + ld_d(tmp2, AT, off_base); + + sub_d(tmp2, R0, tmp2); + + // read the parameter from the local area + slli_d(AT, tmp2, Interpreter::logStackElementSize); + ldx_d(tmp2, AT, _locals_register); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + blt(R0, tmp1, loop); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { +} +#endif // !CC_INTERP + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register tempreg = T0; +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + //Rthread, + thread, + //Rmethod); + S3); + } +} + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + Register tempreg = T0; +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label skip; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // template interpreter will leave it on the top of the stack. + push(state); + ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, skip); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(skip); + pop(state); + } + + { + // Dtrace notification + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + push(state); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + //Rthread, Rmethod); + thread, S3); + pop(state); + } +} + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where) { + assert_different_registers(scratch, AT); + + if (!preloaded) { + ld_w(scratch, counter_addr); + } + addi_w(scratch, scratch, increment); + st_w(scratch, counter_addr); + + li(AT, mask); + andr(scratch, scratch, AT); + + if (cond == Assembler::zero) { + beq(scratch, R0, *where); + } else { + unimplemented(); + } +} diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp new file mode 100644 index 00000000000..d53d951a160 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP +#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_fp_args; + unsigned int _num_int_args; + int _stack_offset; + + void move(int from_offset, int to_offset); + void box(int from_offset, int to_offset); + void pass_int(); + void pass_long(); + void pass_object(); + void pass_float(); + void pass_double(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp new file mode 100644 index 00000000000..e2f31997b73 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of SignatureHandlerGenerator +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + _stack_offset = 0; +} + +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { + __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); + __ st_d(temp(), to(), to_offset * longSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { + __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); + + __ maskeqz(temp(), temp(), AT); + __ st_w(temp(), to(), to_offset * wordSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + // return result handler + __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); + // return + __ jr(RA); + + __ flush(); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ st_w(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ st_d(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + if (_num_int_args < Argument::n_register_parameters - 1) { + Register reg = as_Register(++_num_int_args + A0->encoding()); + if (_num_int_args == 1) { + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); + __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ maskeqz(reg, AT, reg); + } + } else { + __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ maskeqz(temp(), AT, temp()); + __ st_d(temp(), to(), _stack_offset); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + if (_num_fp_args < Argument::n_float_register_parameters) { + __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); + } else if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ st_w(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + if (_num_fp_args < Argument::n_float_register_parameters) { + __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ st_d(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + + +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + unsigned int _num_int_args; + unsigned int _num_fp_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_int_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters) { + *_fp_args++ = from_obj; + _num_fp_args++; + } else if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters) { + *_fp_args++ = from_obj; + *_fp_identifiers |= (1 << _num_fp_args); // mark as double + _num_fp_args++; + } else if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + // see TemplateInterpreterGenerator::generate_slow_signature_handler() + _int_args = to - (method->is_static() ? 15 : 16); + _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp new file mode 100644 index 00000000000..6814fa44a03 --- /dev/null +++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP +#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + +#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp new file mode 100644 index 00000000000..dbcdb7a6a46 --- /dev/null +++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing lfence for LoadLoad barrier, we create data dependency +// between loads, which is more efficient than lfence. + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name = NULL; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + // return pc RA + // jni env A0 + // obj A1 + // jfieldID A2 + + address counter_addr = SafepointSynchronize::safepoint_counter_addr(); + __ li(AT, (long)counter_addr); + __ ld_w(T1, AT, 0); + + // Parameters(A0~A3) should not be modified, since they will be used in slow path + __ andi(AT, T1, 1); + __ bne(AT, R0, slow); + + __ move(T0, A1); + // Both T0 and T4 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T4, slow); + + __ srli_d(T2, A2, 2); // offset + __ add_d(T0, T0, T2); + + __ li(AT, (long)counter_addr); + __ ld_w(AT, AT, 0); + __ bne(T1, AT, slow); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ ld_bu (V0, T0, 0); break; + case T_BYTE: __ ld_b (V0, T0, 0); break; + case T_CHAR: __ ld_hu (V0, T0, 0); break; + case T_SHORT: __ ld_h (V0, T0, 0); break; + case T_INT: __ ld_w (V0, T0, 0); break; + case T_LONG: __ ld_d (V0, T0, 0); break; + case T_FLOAT: __ fld_s (F0, T0, 0); break; + case T_DOUBLE: __ fld_d (F0, T0, 0); break; + default: ShouldNotReachHere(); + } + + __ jr(RA); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + __ jmp(slow_case_addr); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp new file mode 100644 index 00000000000..b281f863728 --- /dev/null +++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP +#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } + +public: + // In LOOGNARCH64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] + // is 8 bytes. + // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. + // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. + // This error occurs in ReflectInvoke.java + // The parameter of DD(int) should be 4 instead of 0x550000004. + // + // See: [runtime/javaCalls.hpp] + + static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + *(jlong*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + *(jlong*) (to + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + *(jlong*) (to + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 0 + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + *(jdouble*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + *(jdouble*) (to + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + *(jdouble*) (to + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp new file mode 100644 index 00000000000..ea481c7fa6c --- /dev/null +++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { + if (inst->is_int_branch() || inst->is_float_branch()) { + return pc_offset + NativeInstruction::nop_instruction_size; + } else if (inst->is_call()) { + return pc_offset + NativeCall::instruction_size; + } else if (inst->is_far_call()) { + return pc_offset + NativeFarCall::instruction_size; + } else if (inst->is_jump()) { + return pc_offset + NativeGeneralJump::instruction_size; + } else if (inst->is_lu12iw_lu32id()) { + // match LoongArch64TestAssembler.java emitCall + // lu12i_w; lu32i_d; jirl + return pc_offset + 3 * NativeInstruction::nop_instruction_size; + } else { + JVMCI_ERROR_0("unsupported type of instruction for call site"); + } + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { + address pc = _instructions->start() + pc_offset; + Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant)); + jobject value = JNIHandles::make_local(obj()); + if (HotSpotObjectConstantImpl::compressed(constant)) { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop
(obj()))))); + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand); + } else { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + move->set_data((intptr_t)(cast_from_oop
(obj()))); + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc, rspec); + } +} + +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { + address pc = _instructions->start() + pc_offset; + if (HotSpotMetaspaceConstantImpl::compressed(constant)) { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK); + move->set_data((intptr_t) narrowOop); + TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); + } else { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + void* reference = record_metadata_reference(_instructions, pc, constant, CHECK); + move->set_data((intptr_t) reference); + TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); + } +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { + address pc = _instructions->start() + pc_offset; + NativeInstruction* inst = nativeInstruction_at(pc); + if (inst->is_pcaddu12i_add()) { + address dest = _constants->start() + data_offset; + _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); + TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); + } else { + JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc)); + } +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { + address pc = (address) inst; + if (inst->is_call()) { + NativeCall* call = nativeCall_at(pc); + call->set_destination((address) foreign_call_destination); + _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); + } else if (inst->is_far_call()) { + NativeFarCall* call = nativeFarCall_at(pc); + call->set_destination((address) foreign_call_destination); + _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); + } else if (inst->is_jump()) { + NativeGeneralJump* jump = nativeGeneralJump_at(pc); + jump->set_jump_destination((address) foreign_call_destination); + _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); + } else if (inst->is_lu12iw_lu32id()) { + // match emitCall of LoongArch64TestAssembler.java + // lu12i_w; lu32i_d; jirl + MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination); + } else { + JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); + } + TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); +} + +void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) { +#ifdef ASSERT + Method* method = NULL; + // we need to check, this might also be an unresolved method + if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { + method = getMethodFromHotSpotMethod(hotspot_method()); + } +#endif + switch (_next_call_type) { + case INLINE_INVOKE: + break; + case INVOKEVIRTUAL: + case INVOKEINTERFACE: { + assert(!method->is_static(), "cannot call static method with invokeinterface"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); + call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub()); + break; + } + case INVOKESTATIC: { + assert(method->is_static(), "cannot call non-static method with invokestatic"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); + call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub()); + break; + } + case INVOKESPECIAL: { + assert(!method->is_static(), "cannot call static method with invokespecial"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); + call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub()); + break; + } + default: + JVMCI_ERROR("invalid _next_call_type value"); + break; + } +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { + switch (mark) { + case POLL_NEAR: + JVMCI_ERROR("unimplemented"); + break; + case POLL_FAR: + _instructions->relocate(pc, relocInfo::poll_type); + break; + case POLL_RETURN_NEAR: + JVMCI_ERROR("unimplemented"); + break; + case POLL_RETURN_FAR: + _instructions->relocate(pc, relocInfo::poll_return_type); + break; + default: + JVMCI_ERROR("invalid mark value"); + break; + } +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { + if (jvmci_reg < RegisterImpl::number_of_registers) { + return as_Register(jvmci_reg)->as_VMReg(); + } else { + jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; + if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) { + return as_FloatRegister(floatRegisterNumber)->as_VMReg(); + } + JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); + } +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return !hotspotRegister->is_FloatRegister(); +} diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad new file mode 100644 index 00000000000..80dff0c7626 --- /dev/null +++ b/src/hotspot/cpu/loongarch/loongarch.ad @@ -0,0 +1,25 @@ +// +// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad new file mode 100644 index 00000000000..cc3824a4021 --- /dev/null +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad @@ -0,0 +1,13917 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// GodSon3 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +// format: +// reg_def name (call convention, c-call convention, ideal type, encoding); +// call convention : +// NS = No-Save +// SOC = Save-On-Call +// SOE = Save-On-Entry +// AS = Always-Save +// ideal type : +// see opto/opcodes.hpp for more info +// reg_class name (reg, ...); +// alloc_class name (reg, ...); +register %{ + +// General Registers +// Integer Registers + reg_def R0 ( NS, NS, Op_RegI, 0, R0->as_VMReg()); + reg_def R0_H ( NS, NS, Op_RegI, 0, R0->as_VMReg()->next()); + reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); + reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); + reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); + reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); + reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); + reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); + reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); + reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); + reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); + reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); + reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); + reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); + reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); + reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); + reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); + reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); + reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); + reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); + reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); + reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); + reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); + reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); + reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); + reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); + reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); + reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); + reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); + reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); + reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); + reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); + reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); + reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); + reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); + reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); + reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); + reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); + reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); + reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); + reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); + reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); + reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); + reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); + reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); + reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); + reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); + reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); + reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); + reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); + reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); + reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); + reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); + reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); + reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); + reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); + reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); + reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); + reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); + reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); + reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); + reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); + reg_def S8 (SOC, SOE, Op_RegI, 31, S8->as_VMReg()); + reg_def S8_H (SOC, SOE, Op_RegI, 31, S8->as_VMReg()->next()); + + +// Floating/Vector registers. + reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); + reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); + reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); + reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); + reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); + reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); + reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); + reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); + + reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); + reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); + reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); + reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); + reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); + reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); + reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); + reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); + + reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); + reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); + reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); + reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); + reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); + reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); + reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); + reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); + + reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); + reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); + reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); + reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); + reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); + reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); + reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); + reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); + + reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); + reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); + reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); + reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); + reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); + reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); + reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); + reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); + + reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); + reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); + reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); + reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); + reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); + reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); + reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); + reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); + + reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); + reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); + reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); + reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); + reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); + reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); + reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); + reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); + + reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); + reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); + reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); + reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); + reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); + reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); + reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); + reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); + + reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); + reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); + reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); + reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); + reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); + reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); + reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); + reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); + + reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); + reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); + reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); + reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); + reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); + reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); + reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); + reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); + + reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); + reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); + reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); + reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); + reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); + reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); + reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); + reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); + + reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); + reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); + reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); + reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); + reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); + reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); + reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); + reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); + + reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); + reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); + reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); + reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); + reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); + reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); + reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); + reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); + + reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); + reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); + reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); + reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); + reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); + reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); + reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); + reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); + + reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); + reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); + reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); + reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); + reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); + reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); + reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); + reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); + + reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); + reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); + reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); + reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); + reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); + reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); + reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); + reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); + + reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); + reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); + reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); + reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); + reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); + reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); + reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); + reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); + + reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); + reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); + reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); + reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); + reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); + reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); + reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); + reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); + + reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); + reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); + reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); + reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); + reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); + reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); + reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); + reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); + + reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); + reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); + reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); + reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); + reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); + reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); + reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); + reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); + + reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); + reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); + reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); + reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); + reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); + reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); + reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); + reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); + + reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); + reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); + reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); + reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); + reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); + reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); + reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); + reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); + + reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); + reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); + reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); + reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); + reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); + reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); + reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); + reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); + + reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); + reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); + reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); + reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); + reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); + reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); + reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); + reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); + + reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); + reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next() ); + reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); + reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); + reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); + reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); + reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); + reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); + + reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); + reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next() ); + reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); + reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); + reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); + reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); + reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); + reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); + + reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); + reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next() ); + reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); + reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); + reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); + reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); + reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); + reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); + + reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); + reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next() ); + reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); + reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); + reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); + reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); + reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); + reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); + + reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); + reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next() ); + reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); + reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); + reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); + reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); + reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); + reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); + + reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); + reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next() ); + reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); + reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); + reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); + reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); + reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); + reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); + + reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); + reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next() ); + reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); + reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); + reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); + reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); + reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); + reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); + + reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); + reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next() ); + reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); + reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); + reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); + reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); + reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); + reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); + + +// ---------------------------- +// Special Registers +//S6 is used for get_thread(S6) +//S5 is uesd for heapbase of compressed oop +alloc_class chunk0( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S5, S5_H, + S6, S6_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T8, T8_H, + T4, T4_H, + T1, T1_H, // inline_cache_reg + T6, T6_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + T5, T5_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + S8, S8_H + RA, RA_H, + SP, SP_H, // stack_pointer + FP, FP_H, // frame_pointer + + // non-allocatable registers + T7, T7_H, + TP, TP_H, + RX, RX_H, + R0, R0_H, + ); + +// F23 is scratch reg +alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, + F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, + F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, + F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, + F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, + F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, + F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, + F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, + F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, + F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, + F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, + F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, + F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, + F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, + F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, + F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, + F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, + F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, + F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, + F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, + F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, + F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, + F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, + F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, + F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, + F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, + F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, + F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, + F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, + F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, + F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O, + + // non-allocatable registers + F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O, + ); + +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); +reg_class s0_reg( S0 ); +reg_class s1_reg( S1 ); +reg_class s2_reg( S2 ); +reg_class s3_reg( S3 ); +reg_class s4_reg( S4 ); +reg_class s5_reg( S5 ); +reg_class s6_reg( S6 ); +reg_class s7_reg( S7 ); + +reg_class t_reg( T0, T1, T2, T3, T8, T4 ); +reg_class t0_reg( T0 ); +reg_class t1_reg( T1 ); +reg_class t2_reg( T2 ); +reg_class t3_reg( T3 ); +reg_class t8_reg( T8 ); +reg_class t4_reg( T4 ); + +reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); +reg_class a0_reg( A0 ); +reg_class a1_reg( A1 ); +reg_class a2_reg( A2 ); +reg_class a3_reg( A3 ); +reg_class a4_reg( A4 ); +reg_class a5_reg( A5 ); +reg_class a6_reg( A6 ); +reg_class a7_reg( A7 ); + +// TODO: LA +//reg_class v0_reg( A0 ); +//reg_class v1_reg( A1 ); + +reg_class sp_reg( SP, SP_H ); +reg_class fp_reg( FP, FP_H ); + +reg_class v0_long_reg( A0, A0_H ); +reg_class v1_long_reg( A1, A1_H ); +reg_class a0_long_reg( A0, A0_H ); +reg_class a1_long_reg( A1, A1_H ); +reg_class a2_long_reg( A2, A2_H ); +reg_class a3_long_reg( A3, A3_H ); +reg_class a4_long_reg( A4, A4_H ); +reg_class a5_long_reg( A5, A5_H ); +reg_class a6_long_reg( A6, A6_H ); +reg_class a7_long_reg( A7, A7_H ); +reg_class t0_long_reg( T0, T0_H ); +reg_class t1_long_reg( T1, T1_H ); +reg_class t2_long_reg( T2, T2_H ); +reg_class t3_long_reg( T3, T3_H ); +reg_class t8_long_reg( T8, T8_H ); +reg_class t4_long_reg( T4, T4_H ); +reg_class s0_long_reg( S0, S0_H ); +reg_class s1_long_reg( S1, S1_H ); +reg_class s2_long_reg( S2, S2_H ); +reg_class s3_long_reg( S3, S3_H ); +reg_class s4_long_reg( S4, S4_H ); +reg_class s5_long_reg( S5, S5_H ); +reg_class s6_long_reg( S6, S6_H ); +reg_class s7_long_reg( S7, S7_H ); + +//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); + +reg_class all_reg32( + S8, + S7, + S5, /* S5_heapbase */ + /* S6, S6 TREG */ + S4, + S3, + S2, + S1, + S0, + T8, + /* T7, AT */ + T6, + T5, + /* T4, jarl T4 */ + T3, + T2, + T1, + T0, + A7, + A6, + A5, + A4, + A3, + A2, + A1, + A0 ); + +reg_class int_reg %{ + return _ANY_REG32_mask; +%} + +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); + +reg_class p_reg %{ + return _PTR_REG_mask; +%} + +reg_class no_T8_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class no_Ax_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + T0, T0_H + ); + +reg_class all_reg( + S8, S8_H, + S7, S7_H, + /* S6, S6_H, S6 TREG */ + S5, S5_H, /* S5_heapbase */ + S4, S4_H, + S3, S3_H, + S2, S2_H, + S1, S1_H, + S0, S0_H, + T8, T8_H, + /* T7, T7_H, AT */ + T6, T6_H, + T5, T5_H, + /* T4, T4_H, jalr T4 */ + T3, T3_H, + T2, T2_H, + T1, T1_H, + T0, T0_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H + ); + + +reg_class long_reg %{ + return _ANY_REG_mask; +%} + +// Floating point registers. +// F31 are not used as temporary registers in D2I +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); + +reg_class dbl_reg( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F30, F30_H, + F31, F31_H); + +// Class for all 128bit vector registers +reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, + F1, F1_H, F1_J, F1_K, + F2, F2_H, F2_J, F2_K, + F3, F3_H, F3_J, F3_K, + F4, F4_H, F4_J, F4_K, + F5, F5_H, F5_J, F5_K, + F6, F6_H, F6_J, F6_K, + F7, F7_H, F7_J, F7_K, + F8, F8_H, F8_J, F8_K, + F9, F9_H, F9_J, F9_K, + F10, F10_H, F10_J, F10_K, + F11, F11_H, F11_J, F11_K, + F12, F12_H, F12_J, F12_K, + F13, F13_H, F13_J, F13_K, + F14, F14_H, F14_J, F14_K, + F15, F15_H, F15_J, F15_K, + F16, F16_H, F16_J, F16_K, + F17, F17_H, F17_J, F17_K, + F18, F18_H, F18_J, F18_K, + F19, F19_H, F19_J, F19_K, + F20, F20_H, F20_J, F20_K, + F21, F21_H, F21_J, F21_K, + F22, F22_H, F22_J, F22_K, + F24, F24_H, F24_J, F24_K, + F25, F25_H, F25_J, F25_K, + F26, F26_H, F26_J, F26_K, + F27, F27_H, F27_J, F27_K, + F28, F28_H, F28_J, F28_K, + F29, F29_H, F29_J, F29_K, + F30, F30_H, F30_J, F30_K, + F31, F31_H, F31_J, F31_K); + +// Class for all 256bit vector registers +reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, + F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, + F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, + F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, + F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, + F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, + F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, + F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, + F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, + F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, + F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, + F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, + F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, + F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, + F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, + F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, + F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, + F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, + F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, + F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, + F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, + F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, + F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, + F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, + F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, + F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, + F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, + F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, + F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, + F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, + F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); + +// TODO: LA +//reg_class flt_arg0( F0 ); +//reg_class dbl_arg0( F0, F0_H ); +//reg_class dbl_arg1( F1, F1_H ); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + + // Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + // we use jr instruction to construct call, so more expensive + int_def CALL_COST ( 500, DEFAULT_COST * 5); +/* + int_def EQUAL ( 1, 1 ); + int_def NOT_EQUAL ( 2, 2 ); + int_def GREATER ( 3, 3 ); + int_def GREATER_EQUAL ( 4, 4 ); + int_def LESS ( 5, 5 ); + int_def LESS_EQUAL ( 6, 6 ); +*/ +%} + + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +extern RegMask _ANY_REG32_mask; +extern RegMask _ANY_REG_mask; +extern RegMask _PTR_REG_mask; + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + // NativeCall instruction size is the same as NativeJump. + // exception handler starts out as jump and can be patched to + // a call be deoptimization. (4932387) + // Note that this value is also credited (in output.cpp) to + // the size of the code section. + int size = NativeFarCall::instruction_size; + const uintx m = 16 - 1; + return mask_bits(size + m, ~m); + //return round_to(size, 16); + } + + static uint size_deopt_handler() { + int size = NativeFarCall::instruction_size; + const uintx m = 16 - 1; + return mask_bits(size + m, ~m); + //return round_to(size, 16); + } +}; + +bool is_CAS(int opcode); +bool use_AMO(int opcode); + +bool unnecessary_acquire(const Node *barrier); +bool unnecessary_release(const Node *barrier); +bool unnecessary_volatile(const Node *barrier); +bool needs_releasing_store(const Node *store); + +%} // end source_hpp + +source %{ + +#define NO_INDEX 0 +#define RELOC_IMM64 Assembler::imm_operand +#define RELOC_DISP32 Assembler::disp32_operand + +#define V0_num A0_num +#define V0_H_num A0_H_num + +#define __ _masm. + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +RegMask _ANY_REG32_mask; +RegMask _ANY_REG_mask; +RegMask _PTR_REG_mask; + +void reg_mask_init() { + _ANY_REG32_mask = _ALL_REG32_mask; + _ANY_REG_mask = _ALL_REG_mask; + _PTR_REG_mask = _ALL_REG_mask; + + if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) { + _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg())); + _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask); + _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask); + } +} + +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_exception_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_deopt_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call(SharedRuntime::deopt_blob()->unpack()); + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + + return ret_value; // Per default match rules are supported. +} + +const bool Matcher::has_predicated_vectors(void) { + return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + Unimplemented(); + return default_pressure_threshold; +} + +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + const int safety_zone = 3 * BytesPerInstWord; + int offs = offset - br_size + 4; + // To be conservative on LoongArch + // branch node should be end with: + // branch inst + offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; + switch (rule) { + case jmpDir_long_rule: + case jmpDir_short_rule: + return Assembler::is_simm(offs, 26); + case jmpCon_flags_long_rule: + case jmpCon_flags_short_rule: + case branchConP_0_long_rule: + case branchConP_0_short_rule: + case branchConN2P_0_long_rule: + case branchConN2P_0_short_rule: + case cmpN_null_branch_long_rule: + case cmpN_null_branch_short_rule: + case branchConF_reg_reg_long_rule: + case branchConF_reg_reg_short_rule: + case branchConD_reg_reg_long_rule: + case branchConD_reg_reg_short_rule: + return Assembler::is_simm(offs, 21); + default: + return Assembler::is_simm(offs, 16); + } + return false; +} + + +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// No CMOVF/CMOVD with SSE2 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +const bool Matcher::need_masked_shift_count = false; + +bool Matcher::narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; +} + +bool Matcher::const_oop_prefer_decode() { + // Prefer ConN+DecodeN over ConP. + return true; +} + +bool Matcher::const_klass_prefer_decode() { + // TODO: Either support matching DecodeNKlass (heap-based) in operand + // or condisider the following: + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + //return Universe::narrow_klass_base() == NULL; + return true; +} + +// This is UltraSparc specific, true just means we have fast l2f conversion +const bool Matcher::convL2FSupported(void) { + return true; +} + +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); + switch(size) { + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + +// Only lowest bits of xmm reg are used for vector shift count. +const uint Matcher::vector_shift_count_ideal_reg(int size) { + assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); + switch(size) { + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + + +const bool Matcher::convi2l_type_required = true; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + return (int)MaxVectorSize; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + int max_size = max_vector_size(bt); + int size = 0; + + if (UseLSX) size = 16; + size = size / type2aelembytes(bt); + return MIN2(size,max_size); +} + +// LoongArch supports misaligned vectors store/load? FIXME +const bool Matcher::misaligned_vectors_ok() { + return false; + //return !AlignVector; // can be changed by flag +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + return true; +} + + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() + if ( reg == T0_num || reg == T0_H_num + || reg == A0_num || reg == A0_H_num + || reg == A1_num || reg == A1_H_num + || reg == A2_num || reg == A2_H_num + || reg == A3_num || reg == A3_H_num + || reg == A4_num || reg == A4_H_num + || reg == A5_num || reg == A5_H_num + || reg == A6_num || reg == A6_H_num + || reg == A7_num || reg == A7_H_num ) + return true; + + if ( reg == F0_num || reg == F0_H_num + || reg == F1_num || reg == F1_H_num + || reg == F2_num || reg == F2_H_num + || reg == F3_num || reg == F3_H_num + || reg == F4_num || reg == F4_H_num + || reg == F5_num || reg == F5_H_num + || reg == F6_num || reg == F6_H_num + || reg == F7_num || reg == F7_H_num ) + return true; + + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + +// LoongArch doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +int CallStaticJavaDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallLeafDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallRuntimeDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = false; +// Do floats take an entire double register or just half? +//const bool Matcher::float_in_double = true; +bool Matcher::float_in_double() { return false; } +// Do ints take an entire long register or just half? +const bool Matcher::int_in_long = true; +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Indicate if the safepoint node needs the polling page as an input. +// it does if the polling page is more than disp32 away. +bool SafePointNode::needs_polling_address_input() { + return SafepointMechanism::uses_thread_local_poll(); +} + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("BRK"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + __ brk(5); +} + +uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + return MachNode::size(ra_); +} + + + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +int MachCallStaticJavaNode::ret_addr_offset() { + // bl + return NativeCall::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + // lu12i_w IC_Klass, + // ori IC_Klass, + // lu32i_d IC_Klass + // lu52i_d IC_Klass + + // bl + return NativeMovConstReg::instruction_size + NativeCall::instruction_size; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if( !OptoReg::is_valid(reg) ) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +// Helper methods for MachSpillCopyNode::implementation(). +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecX: + __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); + break; + case Op_VecY: + __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecX: + st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + break; + case Op_VecY: + st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + break; + default: + ShouldNotReachHere(); + } +#endif + } + size += 4; + return size; +} + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + if (is_load) { + switch (ireg) { + case Op_VecX: + __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + case Op_VecY: + __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecX: + __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + case Op_VecY: + __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + default: + ShouldNotReachHere(); + } + } +#ifndef PRODUCT + } else if (!do_size) { + if (is_load) { + switch (ireg) { + case Op_VecX: + st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecX: + st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } +#endif + } + size += 4; + return size; +} + +static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecX: + __ vld(F23, SP, src_offset); + __ vst(F23, SP, dst_offset); + break; + case Op_VecY: + __ xvld(F23, SP, src_offset); + __ xvst(F23, SP, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecX: + st->print("vld f23, %d(sp)\n\t" + "vst f23, %d(sp)\t# 128-bit mem-mem spill", + src_offset, dst_offset); + break; + case Op_VecY: + st->print("xvld f23, %d(sp)\n\t" + "xvst f23, %d(sp)\t# 256-bit mem-mem spill", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } + size += 8; + return size; +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + + if( src_first == dst_first && src_second == dst_second ) + return 0; // Self copy, no move + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { + vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { + int stack_offset = ra_->reg2offset(dst_first); + vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { + int stack_offset = ra_->reg2offset(src_first); + vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + return 0; + } + + if (src_first_rc == rc_stack) { + // mem -> + if (dst_first_rc == rc_stack) { + // mem -> mem + assert(src_second != dst_first, "overlap"); + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_d(AT, Address(SP, src_offset)); + __ st_d(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\tld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" + "st_d AT, [SP + #%d]", + src_offset, dst_offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + // No pushl/popl, so: + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_w(AT, Address(SP, src_offset)); + __ st_w(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\tld_w AT, [SP + #%d] spill 2\n\t" + "st_w AT, [SP + #%d]\n\t", + src_offset, dst_offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // mem -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\tld_d %s, [SP + #%d]\t# spill 3", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + else { + if (Assembler::is_simm(offset, 12)) { + __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + } else { + __ li(AT, offset); + __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); + } + } +#ifndef PRODUCT + } else { + if (this->ideal_reg() == Op_RegI) + st->print("\tld_w %s, [SP + #%d]\t# spill 4", + Matcher::regName[dst_first], + offset); + else + st->print("\tld_wu %s, [SP + #%d]\t# spill 5", + Matcher::regName[dst_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // mem-> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\tfld_d %s, [SP + #%d]\t# spill 6", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\tfld_s %s, [SP + #%d]\t# spill 7", + Matcher::regName[dst_first], + offset); +#endif + } + } + } + return 0; + } else if (src_first_rc == rc_int) { + // gpr -> + if (dst_first_rc == rc_stack) { + // gpr -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\tst_d %s, [SP + #%d] # spill 8", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\tst_w %s, [SP + #%d]\t# spill 9", + Matcher::regName[src_first], offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // gpr -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ move(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\tmove(64bit) %s <-- %s\t# spill 10", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); + else + __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("move(32-bit) %s <-- %s\t# spill 11", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } + } else if (dst_first_rc == rc_float) { + // gpr -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movgr2fr_d %s, %s\t# spill 12", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movgr2fr_w %s, %s\t# spill 13", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } else if (src_first_rc == rc_float) { + // xmm -> + if (dst_first_rc == rc_stack) { + // xmm -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fst_d %s, [SP + #%d]\t# spill 14", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fst_s %s, [SP + #%d]\t# spill 15", + Matcher::regName[src_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // xmm -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movfr2gr_d %s, %s\t# spill 16", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movfr2gr_s %s, %s\t# spill 17", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // xmm -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fmov_d %s <-- %s\t# spill 18", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fmov_s %s <-- %s\t# spill 19", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } + + assert(0," foo "); + Unimplemented(); + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile *C = ra_->C; + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); + st->print("\t"); + st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); + st->print("\t"); + st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); + + if( do_polling() && C->is_method_compilation() ) { + st->print("\t"); + if (SafepointMechanism::uses_thread_local_poll()) { + st->print_cr("ld_d AT, poll_offset[thread] #polling_page_address\n\t" + "ld_w AT, [AT]\t" + "# Safepoint: poll for GC"); + } else { + st->print_cr("Poll Safepoint # MachEpilogNode"); + } + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile *C = ra_->C; + MacroAssembler _masm(&cbuf); + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + __ ld_d(RA, Address(SP, framesize - wordSize)); + __ ld_d(FP, Address(SP, framesize - wordSize * 2)); + if (Assembler::is_simm(framesize, 12)) { + __ addi_d(SP, SP, framesize); + } else { + __ li(AT, framesize); + __ add_d(SP, SP, AT); + } + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if( do_polling() && C->is_method_compilation() ) { + if (SafepointMechanism::uses_thread_local_poll()) { + __ ld_d(AT, thread, in_bytes(Thread::polling_page_offset())); + __ relocate(relocInfo::poll_return_type); + __ ld_w(AT, AT, 0); + } else { + __ li(AT, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + __ ld_w(AT, AT, 0); + } + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachEpilogNode::reloc() const { + return 0; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { return 0; } + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); +} +#endif + + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + + if (Assembler::is_simm(offset, 12)) + return 4; + else + return 3 * 4; +} + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + if (Assembler::is_simm(offset, 12)) { + __ addi_d(as_Register(reg), SP, offset); + } else { + __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); + __ ori(AT, AT, Assembler::split_low12(offset)); + __ add_d(as_Register(reg), SP, AT); + } +} + +int MachCallRuntimeNode::ret_addr_offset() { + // pcaddu18i + // jirl + return NativeFarCall::instruction_size; +} + + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + int i = 0; + for(i = 0; i < _count; i++) + __ nop(); +} + +uint MachNopNode::size(PhaseRegAlloc *) const { + return 4 * _count; +} +const Pipeline* MachNopNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + st->print_cr("load_klass(T4, T0)"); + st->print_cr("\tbeq(T4, iCache, L)"); + st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); + st->print_cr(" L:"); +} +#endif + + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int ic_reg = Matcher::inline_cache_reg_encode(); + Label L; + Register receiver = T0; + Register iCache = as_Register(ic_reg); + + __ load_klass(T4, receiver); + __ beq(T4, iCache, L); + __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + __ bind(L); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register Rtoc = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = cbuf.consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be equal"); + + if (consts_section->size()) { + assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, + "insts must be immediately follow consts"); + // Materialize the constant table base. + address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); + jint offs = (baseaddr - __ pc()) >> 2; + guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); + __ pcaddi(Rtoc, offs); + } +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + // pcaddi + return 1 * BytesPerInstWord; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Register r = as_Register(ra_->get_encode(this)); + st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); +} +#endif + + +//============================================================================= +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile* C = ra_->C; + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("# stack bang"); st->print("\t"); + } + st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); + st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + st->print("addi_d FP, SP, -%d \n\t", wordSize*2); + st->print("addi_d SP, SP, -%d \t",framesize); +} +#endif + + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + +#ifdef ASSERT + address start = __ pc(); +#endif + + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + if (Assembler::is_simm(-framesize, 12)) { + __ addi_d(SP, SP, -framesize); + } else { + __ li(AT, -framesize); + __ add_d(SP, SP, AT); + } + __ st_d(RA, Address(SP, framesize - wordSize)); + __ st_d(FP, Address(SP, framesize - wordSize * 2)); + if (Assembler::is_simm(framesize - wordSize * 2, 12)) { + __ addi_d(FP, SP, framesize - wordSize * 2); + } else { + __ li(AT, framesize - wordSize * 2); + __ add_d(FP, SP, AT); + } + + assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); + + C->set_frame_complete(cbuf.insts_size()); + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachPrologNode::reloc() const { + return 0; // a large enough number +} + +bool is_CAS(int opcode) +{ + switch(opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: + return true; + default: + return false; + } +} + +bool use_AMO(int opcode) +{ + switch(opcode) { + // We handle these + case Op_StoreI: + case Op_StoreL: + case Op_StoreP: + case Op_StoreN: + case Op_StoreNKlass: + return true; + default: + return false; + } +} + +bool unnecessary_acquire(const Node *barrier) +{ + assert(barrier->is_MemBar(), "expecting a membar"); + + if (UseBarriersForVolatile) { + // we need to plant a dbar + return false; + } + + MemBarNode* mb = barrier->as_MemBar(); + + if (mb->trailing_load_store()) { + Node* load_store = mb->in(MemBarNode::Precedent); + assert(load_store->is_LoadStore(), "unexpected graph shape"); + return is_CAS(load_store->Opcode()); + } + + return false; +} + +bool unnecessary_release(const Node *n) +{ + assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); + + if (UseBarriersForVolatile) { + // we need to plant a dbar + return false; + } + + MemBarNode *barrier = n->as_MemBar(); + + if (!barrier->leading()) { + return false; + } else { + Node* trailing = barrier->trailing_membar(); + MemBarNode* trailing_mb = trailing->as_MemBar(); + assert(trailing_mb->trailing(), "Not a trailing membar?"); + assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); + + Node* mem = trailing_mb->in(MemBarNode::Precedent); + if (mem->is_Store()) { + assert(mem->as_Store()->is_release(), ""); + assert(trailing_mb->Opcode() == Op_MemBarVolatile, ""); + return use_AMO(mem->Opcode()); + } else { + assert(mem->is_LoadStore(), ""); + assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); + return is_CAS(mem->Opcode()); + } + } + + return false; +} + +bool unnecessary_volatile(const Node *n) +{ + // assert n->is_MemBar(); + if (UseBarriersForVolatile) { + // we need to plant a dbar + return false; + } + + MemBarNode *mbvol = n->as_MemBar(); + + bool release = false; + if (mbvol->trailing_store()) { + Node* mem = mbvol->in(MemBarNode::Precedent); + release = use_AMO(mem->Opcode()); + } + + assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), ""); +#ifdef ASSERT + if (release) { + Node* leading = mbvol->leading_membar(); + assert(leading->Opcode() == Op_MemBarRelease, ""); + assert(leading->as_MemBar()->leading_store(), ""); + assert(leading->as_MemBar()->trailing_membar() == mbvol, ""); + } +#endif + + return release; +} + +bool needs_releasing_store(const Node *n) +{ + // assert n->is_Store(); + if (UseBarriersForVolatile) { + // we use a normal store and dbar combination + return false; + } + + StoreNode *st = n->as_Store(); + + return st->trailing_membar() != NULL; +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes generate functions which are called by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// Instructions specify two basic values for encoding. They use the +// ins_encode keyword to specify their encoding class (which must be one of +// the class names specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf + MacroAssembler _masm(&cbuf); + // This is the instruction starting address for relocation info. + __ block_comment("Java_To_Runtime"); + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call((address)$meth$$method); + %} + + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + MacroAssembler _masm(&cbuf); + address addr = (address)$meth$$method; + address call; + __ block_comment("Java_Static_Call"); + + if ( !_method ) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + + // + // [Ref: LIR_Assembler::ic_call() ] + // + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL + MacroAssembler _masm(&cbuf); + __ block_comment("Java_Dynamic_Call"); + address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + + enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ + Register result = $result$$Register; + Register sub = $sub$$Register; + Register super = $super$$Register; + Register length = $tmp$$Register; + Register tmp = T4; + Label miss; + + // result may be the same as sub + // 47c B40: # B21 B41 <- B20 Freq: 0.155379 + // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 + // 4bc mov S2, NULL #@loadConP + // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 + // + MacroAssembler _masm(&cbuf); + Label done; + __ check_klass_subtype_slow_path(sub, super, length, tmp, + NULL, &miss, + /*set_cond_codes:*/ true); + // Refer to X86_64's RDI + __ move(result, 0); + __ b(done); + + __ bind(miss); + __ li(result, 1); + __ bind(done); + %} + +%} + + +//---------LOONGARCH FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add SharedInfo::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | old | | 3 +// | | SP-+--------+----> Matcher::_old_SP, even aligned +// v | | ret | 3 return address +// Owned by +--------+ +// Self | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> SharedInfo::stack0, even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by new | | +// Callee SP-+--------+----> Matcher::_new_SP, even aligned +// | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + + +frame %{ + + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention + // for more information. + + inline_cache_reg(T1); // Inline Cache Register + interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset32); + + // Number of stack slots consumed by locking an object + // generate Compile::sync_stack_slots + sync_stack_slots(2); + + frame_pointer(SP); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + + interpreter_frame_pointer(FP); + + // generate Matcher::stack_alignment + stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); + return_addr(REG RA); + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) + // StartNode::calling_convention call this. + calling_convention %{ + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + + + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // SEE CallRuntimeNode::calling_convention for more information. + c_calling_convention %{ + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + + // Location of C & interpreter return values + // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. + // SEE Matcher::match. + c_return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + + // Location of return values + // register(s) contain(s) return value for Op_StartC2I and Op_Start. + // SEE Matcher::match. + + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(0); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(100); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_pc_relative(0); // Required PC Relative flag +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) + // specifies the alignment that some part of the instruction (not + // necessarily the start) requires. If > 1, a compute_padding() + // function must be provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand vecY() %{ + constraint(ALLOC_IN_RC(vectory_reg)); + match(VecY); + + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand FlagsReg() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegFlags); + + format %{ "T0" %} + interface(REG_INTER); +%} + +//----------Simple Operands---------------------------------------------------- +// TODO: Should we need to define some more special immediate number ? +// Immediate Operands +// Integer Immediate +operand immI() %{ + match(ConI); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU1() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 1)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU2() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 3)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU3() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 7)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU4() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 15)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU5() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 31)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU6() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 63)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU8() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 255)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI10() %{ + predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI12() %{ + predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M65536() %{ + predicate(n->get_int() == -65536); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immI_M1() %{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for zero +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_2() %{ + predicate(n->get_int() == 2); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for long shifts +operand immI_32() %{ + predicate(n->get_int() == 32); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immI_255() %{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_MaxI() %{ + predicate(n->get_int() == 2147483647); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M2047_2048() %{ + predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes +operand immI_0_3() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_31() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_4095() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 4095); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_1_4() %{ + predicate(1 <= n->get_int() && (n->get_int() <= 4)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M128_255() %{ + predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive integer mask +operand immI_nonneg_mask() %{ + predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate +operand immL() %{ + match(ConL); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immLU5() %{ + predicate((0 <= n->get_long()) && (n->get_long() <= 31)); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immL10() %{ + predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immL12() %{ + predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit signed +operand immL32() +%{ + predicate(n->get_long() == (int)n->get_long()); + match(ConL); + + op_cost(15); + format %{ %} + interface(CONST_INTER); +%} + +// bit 3..6 zero +operand immL_M121() %{ + predicate(n->get_long() == -121L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..2 zero +operand immL_M8() %{ + predicate(n->get_long() == -8L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 1..2 zero +operand immL_M7() %{ + predicate(n->get_long() == -7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 2 zero +operand immL_M5() %{ + predicate(n->get_long() == -5L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..1 zero +operand immL_M4() %{ + predicate(n->get_long() == -4L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate zero +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_7() %{ + predicate(n->get_long() == 7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_MaxUI() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(20); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M2047_2048() %{ + predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_0_4095() %{ + predicate(n->get_long() >= 0 && n->get_long() <= 4095); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive long mask +operand immL_nonneg_mask() %{ + predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP() %{ + match(ConP); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP_0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP_no_oop_cheap() %{ + predicate(!n->bottom_type()->isa_oop_ptr()); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer for polling page +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + op_cost(5); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN_0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point zero +operand immF_0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point immediate +operand immD() %{ + match(ConD); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point zero +operand immD_0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Register Operands +// Integer Register +operand mRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegI() %{ + constraint(ALLOC_IN_RC(no_Ax_int_reg)); + match(RegI); + match(mRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand mS0RegI() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegI); + match(mRegI); + + format %{ "S0" %} + interface(REG_INTER); +%} + +operand mS1RegI() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegI); + match(mRegI); + + format %{ "S1" %} + interface(REG_INTER); +%} + +operand mS3RegI() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegI); + match(mRegI); + + format %{ "S3" %} + interface(REG_INTER); +%} + +operand mS4RegI() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegI); + match(mRegI); + + format %{ "S4" %} + interface(REG_INTER); +%} + +operand mS5RegI() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegI); + match(mRegI); + + format %{ "S5" %} + interface(REG_INTER); +%} + +operand mS6RegI() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegI); + match(mRegI); + + format %{ "S6" %} + interface(REG_INTER); +%} + +operand mS7RegI() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegI); + match(mRegI); + + format %{ "S7" %} + interface(REG_INTER); +%} + + +operand mT0RegI() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegI); + match(mRegI); + + format %{ "T0" %} + interface(REG_INTER); +%} + +operand mT1RegI() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegI); + match(mRegI); + + format %{ "T1" %} + interface(REG_INTER); +%} + +operand mT2RegI() %{ + constraint(ALLOC_IN_RC(t2_reg)); + match(RegI); + match(mRegI); + + format %{ "T2" %} + interface(REG_INTER); +%} + +operand mT3RegI() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegI); + match(mRegI); + + format %{ "T3" %} + interface(REG_INTER); +%} + +operand mT8RegI() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegI); + match(mRegI); + + format %{ "T8" %} + interface(REG_INTER); +%} + +operand mT4RegI() %{ + constraint(ALLOC_IN_RC(t4_reg)); + match(RegI); + match(mRegI); + + format %{ "T4" %} + interface(REG_INTER); +%} + +operand mA0RegI() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegI); + match(mRegI); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand mA1RegI() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegI); + match(mRegI); + + format %{ "A1" %} + interface(REG_INTER); +%} + +operand mA2RegI() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegI); + match(mRegI); + + format %{ "A2" %} + interface(REG_INTER); +%} + +operand mA3RegI() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegI); + match(mRegI); + + format %{ "A3" %} + interface(REG_INTER); +%} + +operand mA4RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(mRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + +operand mA5RegI() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegI); + match(mRegI); + + format %{ "A5" %} + interface(REG_INTER); +%} + +operand mA6RegI() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegI); + match(mRegI); + + format %{ "A6" %} + interface(REG_INTER); +%} + +operand mA7RegI() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegI); + match(mRegI); + + format %{ "A7" %} + interface(REG_INTER); +%} + +operand mRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegN() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegN() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegN() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegN() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegN() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegN() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegN() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegN() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegN() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a5_RegN() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegN() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegN() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s0_RegN() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegN() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegN() %{ + constraint(ALLOC_IN_RC(s2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegN() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegN() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegN() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegN() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegN() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand mRegP() %{ + constraint(ALLOC_IN_RC(p_reg)); + match(RegP); + match(a0_RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_T8_mRegP() %{ + constraint(ALLOC_IN_RC(no_T8_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegP() %{ + constraint(ALLOC_IN_RC(no_Ax_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegP() +%{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegP() +%{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegP() +%{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegP() +%{ + constraint(ALLOC_IN_RC(s5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegP() +%{ + constraint(ALLOC_IN_RC(s6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegP() +%{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegP() +%{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegP() +%{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t2_RegP() +%{ + constraint(ALLOC_IN_RC(t2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegP() +%{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegP() +%{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegP() +%{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegP() +%{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegP() +%{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + + +operand a5_RegP() +%{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegP() +%{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegP() +%{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegP() +%{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegP() +%{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand mRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand mRegI2L(mRegI reg) %{ + match(ConvI2L reg); + + format %{ %} + interface(REG_INTER); +%} + +operand mRegL2I(mRegL reg) %{ + match(ConvL2I reg); + + format %{ %} + interface(REG_INTER); +%} + +operand v0RegL() %{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v1RegL() %{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a0RegL() %{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegL); + match(mRegL); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand a1RegL() %{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a2RegL() %{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a3RegL() %{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t0RegL() %{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t1RegL() %{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t3RegL() %{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t8RegL() %{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a4RegL() %{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a5RegL() %{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a6RegL() %{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a7RegL() %{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s0RegL() %{ + constraint(ALLOC_IN_RC(s0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s1RegL() %{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s3RegL() %{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s4RegL() %{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s7RegL() %{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Floating register operands +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +//Double Precision Floating register operands +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- +// Indirect Memory Operand +operand indirect(mRegP reg) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(reg); + + format %{ "[$reg] @ indirect" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset12(mRegP reg, immL12 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg off); + + op_cost(10); + format %{ "[$reg + $off (12-bit)] @ indOffset12" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +operand indOffset12I2L(mRegP reg, immI12 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg (ConvI2L off)); + + op_cost(10); + format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register +operand indIndex(mRegP addr, mRegL index) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP addr index); + + op_cost(20); + format %{"[$addr + $index] @ indIndex" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +operand indIndexI2L(mRegP reg, mRegI ireg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (ConvI2L ireg)); + op_cost(10); + format %{ "[$reg + $ireg] @ indIndexI2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Operand +operand indirectNarrow(mRegN reg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeN reg); + + format %{ "[$reg] @ indirectNarrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset12Narrow(mRegN reg, immL12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeN reg) off); + + format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOp. + +// Comparision Code +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + +operand cmpOpEqNe() %{ + match(Bool); + predicate(n->as_Bool()->_test._test == BoolTest::ne + || n->as_Bool()->_test._test == BoolTest::eq); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + + +//------------------------OPERAND CLASSES-------------------------------------- +opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, + indirectNarrow, indOffset12Narrow); +opclass memory_loadRange(indOffset12, indirect); + +opclass mRegLorI2L(mRegI2L, mRegL); +opclass mRegIorL2I( mRegI, mRegL2I); + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +pipeline %{ + + //----------ATTRIBUTES--------------------------------------------------------- + attributes %{ + fixed_size_instructions; // Fixed size instructions + max_instructions_per_bundle = 1; // 1 instruction per bundle + max_bundles_per_cycle = 4; // Up to 4 bundles per cycle + bundle_unit_size=4; + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( MachNop ); + %} + + //----------RESOURCES---------------------------------------------------------- + // Resources are the functional units available to the machine + + resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); + + //----------PIPELINE DESCRIPTION----------------------------------------------- + // Pipeline Description specifies the stages in the machine's pipeline + + // IF: fetch + // ID: decode + // RD: read + // CA: caculate + // WB: write back + // CM: commit + + pipe_desc(IF, ID, RD, CA, WB, CM); + + + //----------PIPELINE CLASSES--------------------------------------------------- + // Pipeline Classes describe the stages in which input and output are + // referenced by the hardware pipeline. + + //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ + single_instruction; + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+1; + DECODE : ID; + ALU : CA; + %} + + //No.19 Integer mult operation : dst <-- reg1 mult reg2 + pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer div operation : dst <-- reg1 div reg2 + pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer mod operation : dst <-- reg1 mod reg2 + pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //no.16 load Long from memory : + pipe_class ialu_loadL(mRegL dst, memory mem) %{ + instruction_count(2); + mem : RD(read); + dst : WB(write)+5; + DECODE : ID; + MEM : RD; + %} + + //No.17 Store Long to Memory : + pipe_class ialu_storeL(mRegL src, memory mem) %{ + instruction_count(2); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ + single_instruction; + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.3 Integer move operation : dst <-- reg + pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.4 No instructions : do nothing + pipe_class empty( ) %{ + instruction_count(0); + %} + + //No.5 UnConditional branch : + pipe_class pipe_jump( label labl ) %{ + multiple_bundles; + DECODE : ID; + BR : RD; + %} + + //No.6 ALU Conditional branch : + pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + + //no.7 load integer from memory : + pipe_class ialu_loadI(mRegI dst, memory mem) %{ + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.8 Store Integer to Memory : + pipe_class ialu_storeI(mRegI src, memory mem) %{ + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + + //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 + pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + //No.22 Floating div operation : dst <-- reg1 div reg2 + pipe_class fpu_div(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + pipe_class fcvt_I2D(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class fcvt_D2I(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class pipe_mfc1(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD; + %} + + pipe_class pipe_mtc1(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD(5); + %} + + //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 + pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + //No.11 Load Floating from Memory : + pipe_class fpu_loadF(regF dst, memory mem) %{ + instruction_count(1); + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.12 Store Floating to Memory : + pipe_class fpu_storeF(regF src, memory mem) %{ + instruction_count(1); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.13 FPU Conditional branch : + pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + +//No.14 Floating FPU reg operation : dst <-- op reg + pipe_class fpu1_regF(regF dst, regF src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + pipe_class long_memory_op() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(30); + %} + + pipe_class simple_call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + BR : RD; + %} + + pipe_class call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + %} + + //FIXME: + //No.9 Piple slow : for multi-instructions + pipe_class pipe_slow( ) %{ + instruction_count(20); + force_serialization; + multiple_bundles; + fixed_latency(50); + %} + +%} + + + +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// respectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + + +// Load Integer +instruct loadI(mRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + + ins_cost(125); + format %{ "ld_w $dst, $mem #@loadI" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadI_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(125); + format %{ "ld_w $dst, $mem #@loadI_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Integer (32 bit signed) to Byte (8 bit signed) +instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Short (16 bit signed) +instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + + ins_cost(125); + format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) +instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Long. +instruct loadL(mRegL dst, memory mem) %{ +// predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + + ins_cost(250); + format %{ "ld_d $dst, $mem #@loadL" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(mRegL dst, memory mem) %{ + match(Set dst (LoadL_unaligned mem)); + + // FIXME: Need more effective ldl/ldr + ins_cost(450); + format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Store Long +instruct storeL_reg(memory mem, mRegL src) %{ + match(Set mem (StoreL mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(200); + format %{ "st_d $mem, $src #@storeL_reg\n" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +instruct storeL_reg_volatile(indirect mem, mRegL src) %{ + match(Set mem (StoreL mem src)); + + ins_cost(205); + format %{ "amswap_db_d R0, $src, $mem #@storeL_reg\n" %} + ins_encode %{ + __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeL ); +%} + +instruct storeL_immL_0(memory mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(180); + format %{ "st_d zero, $mem #@storeL_immL_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + + ins_cost(185); + format %{ "amswap_db_d AT, R0, $mem #@storeL_immL_0" %} + ins_encode %{ + __ amswap_db_d(AT, R0, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeL ); +%} + +// Load Compressed Pointer +instruct loadN(mRegN dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2P(mRegP dst, memory mem) +%{ + match(Set dst (DecodeN (LoadN mem))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Pointer +instruct loadP(mRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + + ins_cost(125); + format %{ "ld_d $dst, $mem #@loadP" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Klass Pointer +instruct loadKlass(mRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(mRegN dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2PKlass(mRegP dst, memory mem) +%{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Constant +instruct loadConI(mRegI dst, immI src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "mov $dst, $src #@loadConI" %} + ins_encode %{ + Register dst = $dst$$Register; + int value = $src$$constant; + __ li(dst, value); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct loadConL(mRegL dst, immL src) %{ + match(Set dst src); + ins_cost(120); + format %{ "li $dst, $src @ loadConL" %} + ins_encode %{ + __ li($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +// Load Range +instruct loadRange(mRegI dst, memory_loadRange mem) %{ + match(Set dst (LoadRange mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadRange" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct storeP(memory mem, mRegP src ) %{ + match(Set mem (StoreP mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); + format %{ "st_d $src, $mem #@storeP" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP_volatile(indirect mem, mRegP src ) %{ + match(Set mem (StoreP mem src)); + + ins_cost(130); + format %{ "amswap_db_d R0, $src, $mem #@storeP" %} + ins_encode %{ + __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +// Store NULL Pointer, mark word, or other simple pointer constant. +instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ + match(Set mem (StoreP mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); + format %{ "mov $mem, $zero #@storeImmP_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{ + match(Set mem (StoreP mem zero)); + + ins_cost(130); + format %{ "amswap_db_d AT, R0, $mem #@storeImmP_0" %} + ins_encode %{ + __ amswap_db_d(AT, R0, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Compressed Pointer +instruct storeN(memory mem, mRegN src) +%{ + match(Set mem (StoreN mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeN_volatile(indirect mem, mRegN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(130); // XXX + format %{ "amswap_db_w R0, $src, $mem # compressed ptr @ storeN" %} + ins_encode %{ + __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2N(memory mem, mRegP src) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && !needs_releasing_store(n)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# @ storeP2N" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2N_volatile(indirect mem, mRegP src) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(130); // XXX + format %{ "amswap_db_w R0, $src, $mem # @ storeP2N" %} + ins_encode %{ + __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeNKlass(memory mem, mRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeNKlass_volatile(indirect mem, mRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(130); + format %{ "amswap_db_w R0, $src, $mem # compressed klass ptr @ storeNKlass" %} + ins_encode %{ + __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2NKlass(memory mem, mRegP src) +%{ + match(Set mem (StoreNKlass mem (EncodePKlass src))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && !needs_releasing_store(n)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2NKlass_volatile(indirect mem, mRegP src) +%{ + match(Set mem (StoreNKlass mem (EncodePKlass src))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(130); + format %{ "amswap_db_w R0, $src, $mem # @ storeP2NKlass" %} + ins_encode %{ + __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmN_immN_0(memory mem, immN_0 zero) +%{ + match(Set mem (StoreN mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); // XXX + format %{ "storeN0 zero, $mem\t# compressed ptr" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero) +%{ + match(Set mem (StoreN mem zero)); + + ins_cost(130); // XXX + format %{ "amswap_db_w AT, R0, $mem # compressed ptr" %} + ins_encode %{ + __ amswap_db_w(AT, R0, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Byte +instruct storeB_immB_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreB mem zero)); + + format %{ "mov $mem, zero #@storeB_immB_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeB(memory mem, mRegIorL2I src) %{ + match(Set mem (StoreB mem src)); + + ins_cost(125); + format %{ "st_b $src, $mem #@storeB" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Byte (8bit signed) +instruct loadB(mRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + + ins_cost(125); + format %{ "ld_b $dst, $mem #@loadB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "ld_b $dst, $mem #@loadB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Byte (8bit UNsigned) +instruct loadUB(mRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + + ins_cost(125); + format %{ "ld_bu $dst, $mem #@loadUB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16bit signed) +instruct loadS(mRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "ld_h $dst, $mem #@loadS" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "ld_h $dst, $mem #@loadS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Integer Immediate +instruct storeI_immI_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreI mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(120); + format %{ "mov $mem, zero #@storeI_immI_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{ + match(Set mem (StoreI mem zero)); + + ins_cost(125); + format %{ "amswap_db_w AT, R0, $mem #@storeI_immI_0" %} + ins_encode %{ + __ amswap_db_w(AT, R0, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Integer +instruct storeI(memory mem, mRegIorL2I src) %{ + match(Set mem (StoreI mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(125); + format %{ "st_w $mem, $src #@storeI" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeI_volatile(indirect mem, mRegIorL2I src) %{ + match(Set mem (StoreI mem src)); + + ins_cost(130); + format %{ "amswap_db_w R0, $src, $mem #@storeI" %} + ins_encode %{ + __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(150); + format %{ "loadF $dst, $mem #@loadF" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadConP_general(mRegP dst, immP src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "li $dst, $src #@loadConP_general" %} + + ins_encode %{ + Register dst = $dst$$Register; + long* value = (long*)$src$$constant; + + if($src->constant_reloc() == relocInfo::metadata_type){ + int klass_index = __ oop_recorder()->find_index((Klass*)value); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec); + __ patchable_li52(dst, (long)value); + } else if($src->constant_reloc() == relocInfo::oop_type){ + int oop_index = __ oop_recorder()->find_index((jobject)value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + __ relocate(rspec); + __ patchable_li52(dst, (long)value); + } else if ($src->constant_reloc() == relocInfo::none) { + __ li(dst, (long)value); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ + match(Set dst src); + + ins_cost(80); + format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} + + ins_encode %{ + if ($src->constant_reloc() == relocInfo::metadata_type) { + __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); + } else { + __ li($dst$$Register, $src$$constant); + } + %} + + ins_pipe(ialu_regI_regI); +%} + + +instruct loadConP_poll(mRegP dst, immP_poll src) %{ + match(Set dst src); + + ins_cost(50); + format %{ "li $dst, $src #@loadConP_poll" %} + + ins_encode %{ + Register dst = $dst$$Register; + intptr_t value = (intptr_t)$src$$constant; + + __ li(dst, (jlong)value); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_immP_0(mRegP dst, immP_0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "mov $dst, R0\t# ptr" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + __ add_d(dst_reg, R0, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ + match(Set dst src); + format %{ "move $dst, R0\t# compressed NULL ptr" %} + ins_encode %{ + __ move($dst$$Register, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN(mRegN dst, immN src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_oop(dst, (jobject)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +instruct loadConNKlass(mRegN dst, immNKlass src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_klass(dst, (Klass*)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +//FIXME +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ + match(TailCall jump_target method_oop ); + ins_cost(300); + format %{ "JMP $jump_target \t# @TailCalljmpInd" %} + + ins_encode %{ + Register target = $jump_target$$Register; + Register oop = $method_oop$$Register; + + // RA will be used in generate_forward_exception() + __ push(RA); + + __ move(S3, oop); + __ jr(target); + %} + + ins_pipe( pipe_jump ); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( a0_RegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + + // use the following format syntax + format %{ "# exception oop is in A0; no code emitted @CreateException" %} + ins_encode %{ + // X86 leaves this function empty + __ block_comment("CreateException is empty in LA"); + %} + ins_pipe( empty ); +// ins_pipe( pipe_jump ); +%} + + +/* The mechanism of exception handling is clear now. + +- Common try/catch: + [stubGenerator_loongarch.cpp] generate_forward_exception() + |- V0, V1 are created + |- T4 <= SharedRuntime::exception_handler_for_return_address + `- jr T4 + `- the caller's exception_handler + `- jr OptoRuntime::exception_blob + `- here +- Rethrow(e.g. 'unwind'): + * The callee: + |- an exception is triggered during execution + `- exits the callee method through RethrowException node + |- The callee pushes exception_oop(T0) and exception_pc(RA) + `- The callee jumps to OptoRuntime::rethrow_stub() + * In OptoRuntime::rethrow_stub: + |- The VM calls _rethrow_Java to determine the return address in the caller method + `- exits the stub with tailjmpInd + |- pops exception_oop(V0) and exception_pc(V1) + `- jumps to the return address(usually an exception_handler) + * The caller: + `- continues processing the exception_blob with V0/V1 +*/ + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + // use the following format syntax + format %{ "JMP rethrow_stub #@RethrowException" %} + ins_encode %{ + __ block_comment("@ RethrowException"); + + cbuf.set_insts_mark(); + cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); + + // call OptoRuntime::rethrow_stub to get the exception handler in parent method + __ patchable_jump((address)OptoRuntime::rethrow_stub()); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Branch Instructions --- long offset versions + +// Jump Direct +instruct jmpDir_long(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_long" %} + + ins_encode %{ + Label* L = $labl$$label; + __ jmp_far(*L); + %} + + ins_pipe( pipe_jump ); + //ins_pc_relative(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, L, true /* signed */); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + int val = $src2$$constant; + + if (val == 0) { + __ cmp_branch_long(flag, op1, R0, L, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_long(flag, op1, AT, L, true /* signed */); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} + + ins_encode %{ + Label* L = $labl$$label; + switch($cop$$cmpcode) { + case 0x01: //equal + __ bne_long($cr$$Register, R0, *L); + break; + case 0x02: //not equal + __ beq_long($cr$$Register, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +// Conditional jumps +instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, R0, L, true /* signed */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, R0, L, true /* signed */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_long" %} + ins_encode %{ + Register op1 = $op1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, R0, L, true /* signed */); + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_long" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */); + } else { + __ li(AT, val); + __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, L, true /* signed */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_long(flag, op1, R0, L, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_long(flag, op1, AT, L, true /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, target, true /* signed */); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmp_branch_long(flag, op1, op2, target, false /* signed */); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + long val = $src2$$constant; + + if (val == 0) { + __ cmp_branch_long(flag, op1, R0, target, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_long(flag, op1, AT, target, true /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + long val = $src2$$constant; + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_long(flag, op1, R0, target, false /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_long(flag, op1, AT, target, false /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +//FIXME +instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: //not_equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: //greater + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: //greater_equal + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: //less + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: //less_equal + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + +instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: //not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: //greater + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: //greater_equal + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: //less + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: //less_equal + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + + +// ============================================================================ +// Branch Instructions -- short offset versions + +// Jump Direct +instruct jmpDir_short(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + if(&L) + __ b(L); + else + __ b(int(0)); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, L, true /* signed */); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + int val = $src2$$constant; + + if (val == 0) { + __ cmp_branch_short(flag, op1, R0, L, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_short(flag, op1, AT, L, true /* signed */); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + switch($cop$$cmpcode) { + case 0x01: //equal + if (&L) + __ bnez($cr$$Register, L); + else + __ bnez($cr$$Register, (int)0); + break; + case 0x02: //not equal + if (&L) + __ beqz($cr$$Register, L); + else + __ beqz($cr$$Register, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Conditional jumps +instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branchEqNe_off21(flag, op1, L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branchEqNe_off21(flag, op1, L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branchEqNe_off21(flag, op1, L); + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */); + } else { + __ li(AT, val); + __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, L, true /* signed */); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_short(flag, op1, R0, L, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_short(flag, op1, AT, L, true /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, target, true /* signed */); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + Label& target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ cmp_branch_short(flag, op1, op2, target, false /* signed */); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + long val = $src2$$constant; + + if (val == 0) { + __ cmp_branch_short(flag, op1, R0, target, true /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_short(flag, op1, AT, target, true /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + long val = $src2$$constant; + Label& target = *($labl$$label); + int flag = $cmp$$cmpcode; + + if (val == 0) { + __ cmp_branch_short(flag, op1, R0, target, false /* signed */); + } else { + __ li(AT, val); + __ cmp_branch_short(flag, op1, AT, target, false /* signed */); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +//FIXME +instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + if (&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x02: //not_equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + if (&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x03: //greater + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x04: //greater_equal + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x05: //less + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x06: //less_equal + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + if (&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x02: //not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + if (&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x03: //greater + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x04: //greater_equal + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x05: //less + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x06: //less_equal + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +// =================== End of branch instructions ========================== + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime ); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,runtime #@CallRuntimeDirect" %} + ins_encode( Java_To_Runtime( meth ) ); + ins_pipe( pipe_slow ); + ins_alignment(4); +%} + + + +//------------------------MemBar Instructions------------------------------- +//Memory barrier flavors + +instruct unnecessary_membar_acquire() %{ + predicate(unnecessary_acquire(n)); + match(MemBarAcquire); + ins_cost(0); + + format %{ "membar_acquire (elided)" %} + + ins_encode %{ + __ block_comment("membar_acquire (elided)"); + %} + + ins_pipe(empty); +%} + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(400); + + format %{ "MEMBAR-acquire @ membar_acquire" %} + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + %} + ins_pipe(empty); +%} + +instruct load_fence() %{ + match(LoadFence); + ins_cost(400); + + format %{ "MEMBAR @ load_fence" %} + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_acquire_lock() +%{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct unnecessary_membar_release() %{ + predicate(unnecessary_release(n)); + match(MemBarRelease); + ins_cost(0); + + format %{ "membar_release (elided)" %} + + ins_encode %{ + __ block_comment("membar_release (elided)"); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(400); + + format %{ "MEMBAR-release @ membar_release" %} + + ins_encode %{ + // Attention: DO NOT DELETE THIS GUY! + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); + %} + + ins_pipe(pipe_slow); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(400); + + format %{ "MEMBAR @ store_fence" %} + + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_release_lock() +%{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct unnecessary_membar_volatile() %{ + predicate(unnecessary_volatile(n)); + match(MemBarVolatile); + ins_cost(0); + + format %{ "membar_volatile (elided)" %} + + ins_encode %{ + __ block_comment("membar_volatile (elided)"); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(400); + + format %{ "MEMBAR-volatile" %} + ins_encode %{ + if( !os::is_MP() ) return; // Not needed on single CPU + __ membar(__ StoreLoad); + + %} + ins_pipe(pipe_slow); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + + ins_cost(400); + format %{ "MEMBAR-storestore @ membar_storestore" %} + ins_encode %{ + __ membar(__ StoreStore); + %} + ins_pipe(empty); +%} + +//----------Move Instructions-------------------------------------------------- +instruct castX2P(mRegP dst, mRegL src) %{ + match(Set dst (CastX2P src)); + format %{ "castX2P $dst, $src @ castX2P" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_cost(10); + ins_pipe( ialu_regI_mov ); +%} + +instruct castP2X(mRegL dst, mRegP src ) %{ + match(Set dst (CastP2X src)); + + format %{ "mov $dst, $src\t #@castP2X" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_pipe( ialu_regI_mov ); +%} + +instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ movfr2gr_s(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_w(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ movfr2gr_d(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + Register src = as_Register($src$$reg); + + __ movgr2fr_d(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg\n" + "\tCMOV $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg2\n" + "\tCMOV $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_dst_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpI_dst_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ + match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + Label L; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg\n" + "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg\n" + "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg2\n" + "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1))); + ins_cost(50); + format %{ + "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg2\n" + "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2" + %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Register dst = $dst$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_dst_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpL_dst_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_dst_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpUL_dst_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + // Use signed comparison here, because the most significant bit of the + // user-space virtual address must be 0. + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +//FIXME +instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +// Manifest a CmpL result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(1000); + format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} + ins_encode %{ + Register opr1 = as_Register($src1$$reg); + Register opr2 = as_Register($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ slt(AT, opr1, opr2); + __ slt(dst, opr2, opr1); + __ sub_d(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +// +// less_rsult = -1 +// greater_result = 1 +// equal_result = 0 +// nan_result = -1 +// +instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ + match(Set dst (CmpF3 src1 src2)); + ins_cost(1000); + format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ fcmp_clt_s(FCC0, src2, src1); + __ fcmp_cult_s(FCC1, src1, src2); + __ movcf2gr(dst, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(dst, dst, AT); + + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ + match(Set dst (CmpD3 src1 src2)); + ins_cost(1000); + format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ fcmp_clt_d(FCC0, src2, src1); + __ fcmp_cult_d(FCC1, src1, src2); + __ movcf2gr(dst, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + Register cnt = $cnt$$Register; + Label Loop, done; + + __ beq(cnt, R0, done); + + __ bind(Loop); + __ st_d(R0, base, 0); + __ addi_d(cnt, cnt, -1); + __ addi_d(base, base, wordSize); + __ bne(cnt, R0, Loop); + + __ bind(done); + %} + ins_pipe( pipe_slow ); +%} + +instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + long cnt = $cnt$$constant; + Label Loop, done; + + int tmp = cnt % 8; + int i = 0; + for (; i < tmp; i++) { + __ st_d(R0, base, i * 8); + } + if (cnt - tmp) { + __ li(AT, cnt); + __ alsl_d(AT, AT, base, 2); + __ addi_d(base, base, i * 8); + __ bind(Loop); + __ st_d(R0, base, 0); + __ st_d(R0, base, 8); + __ st_d(R0, base, 16); + __ st_d(R0, base, 24); + __ st_d(R0, base, 32); + __ st_d(R0, base, 40); + __ st_d(R0, base, 48); + __ st_d(R0, base, 56); + __ addi_d(base, base, 64); + __ blt(base, AT, Loop); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{ + match(Set result (HasNegatives ary1 len)); + effect(USE_KILL ary1, USE_KILL len); + format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %} + + ins_encode %{ + __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_indexofU_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ "String IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %} + + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::LL); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::UU); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::LU); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::UL); + %} + + ins_pipe( pipe_slow ); +%} + +// fast char[] to byte[] compression +instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, + mRegL tmp1, mRegL tmp2, mRegL tmp3) +%{ + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ "String Compress $src,$dst -> $result @ string_compress " %} + ins_encode %{ + __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $tmp1$$Register, + $tmp2$$Register, $tmp3$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// byte[] to char[] inflation +instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len, + mRegL tmp1, mRegL tmp2) +%{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2); + + format %{ "String Inflate $src,$dst @ string_inflate " %} + ins_encode %{ + __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$Register, $tmp2$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// intrinsic optimization +instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2); + + format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %} + ins_encode %{ + __ arrays_equals($str1$$Register, $str2$$Register, + $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register, + false/* byte */); + %} + + ins_pipe( pipe_slow ); +%} + +//----------Arithmetic Instructions------------------------------------------- +//----------Addition Instructions--------------------------------------------- +instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ add_w(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1, immI12 src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + __ addi_w(dst, src1, imm); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ + match(Set dst (AddI src1 (LShiftI src2 shift))); + + format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + int sh = $shift$$constant; + __ alsl_w(dst, src2, src1, sh - 1); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ add_d(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ + match(Set dst (AddP src1 (AndL src2 M8))); + format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_M8" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ bstrins_d(src2, R0, 2, 0); + __ add_d(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} + ins_encode %{ + Register src1 = $src1$$Register; + long src2 = $src2$$constant; + Register dst = $dst$$Register; + + __ addi_d(dst, src1, src2); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); + + format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + int sh = $shift$$constant; + __ alsl_d(dst, src2, src1, sh - 1); + %} + + ins_pipe(ialu_regI_regI); +%} + +// Add Long Register with Register +instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ add_d(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) +%{ + match(Set dst (AddL src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ addi_d(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +//----------Abs Instructions------------------------------------------- + +// Integer Absolute Instructions +instruct absI_rReg(mRegI dst, mRegI src) +%{ + match(Set dst (AbsI src)); + effect(TEMP dst); + format %{ "AbsI $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ srai_w(AT, src, 31); + __ xorr(dst, src, AT); + __ sub_w(dst, dst, AT); + %} + + ins_pipe(ialu_regI_regI); +%} + +// Long Absolute Instructions +instruct absL_rReg(mRegL dst, mRegLorI2L src) +%{ + match(Set dst (AbsL src)); + effect(TEMP dst); + format %{ "AbsL $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ srai_d(AT, src, 63); + __ xorr(dst, src, AT); + __ sub_d(dst, dst, AT); + %} + + ins_pipe(ialu_regL_regL); +%} + +//----------Subtraction Instructions------------------------------------------- +// Integer Subtraction Instructions +instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(100); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ sub_w(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1, immI_M2047_2048 src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addi_w(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negI_Reg(mRegI dst, immI_0 zero, mRegIorL2I src) %{ + match(Set dst (SubI zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negI_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ sub_w(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ + match(Set dst (SubL zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negL_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ sub_d(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addi_d(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Subtract Long Register with Register. +instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ sub_d(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Integer MOD with Register +instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(300); + format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ mod_w(dst, src1, src2); + %} + + //ins_pipe( ialu_mod ); + ins_pipe( ialu_regI_regI ); +%} + +instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (ModL src1 src2)); + format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mod_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(300); + format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mul_w(dst, src1, src2); + %} + ins_pipe( ialu_mult ); +%} + +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (DivI src1 src2)); + + ins_cost(300); + format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ div_w(dst, src1, src2); + + %} + ins_pipe( ialu_mod ); +%} + +instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(300); + format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fdiv_s(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(300); + format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fdiv_d(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (MulL src1 src2)); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mul_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (MulHiL src1 src2)); + format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mulh_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (DivL src1 src2)); + format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ div_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fadd_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsub_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fadd_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsub_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + format %{ "negF $dst, $src @negF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fneg_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + format %{ "negD $dst, $src @negD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fneg_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmul_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Mul two double precision floating piont number +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmul_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + ins_cost(100); + format %{ "absF $dst, $src @absF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fabs_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// intrinsics for math_native. +// AbsD SqrtD CosD SinD TanD LogD Log10D + +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + ins_cost(100); + format %{ "absD $dst, $src @absD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fabs_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + ins_cost(100); + format %{ "SqrtD $dst, $src @sqrtD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsqrt_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + ins_cost(100); + format %{ "SqrtF $dst, $src @sqrtF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsqrt_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +// src1 * src2 + src3 +instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary src1 src2))); + + format %{ "fmadd_s $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 + src3 +instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary src1 src2))); + + format %{ "fmadd_d $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 - src3 +instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary src1 src2))); + + format %{ "fmsub_s $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 - src3 +instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary src1 src2))); + + format %{ "fmsub_d $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 - src3 +instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); + match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); + + format %{ "fnmadds $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 - src3 +instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); + match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); + + format %{ "fnmaddd $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 + src3 +instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary (NegF src1) src2))); + match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); + + format %{ "fnmsubs $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 + src3 +instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary (NegD src1) src2))); + match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); + + format %{ "fnmsubd $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +instruct copySignF_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (CopySignF src1 src2)); + effect(TEMP_DEF dst, USE src1, USE src2); + + format %{ "fcopysign_s $dst $src1 $src2 @ copySignF_reg" %} + + ins_encode %{ + __ fcopysign_s($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + + ins_pipe( fpu_regF_regF ); +%} + +instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{ + match(Set dst (CopySignD src1 (Binary src2 zero))); + effect(TEMP_DEF dst, USE src1, USE src2); + + format %{ "fcopysign_d $dst $src1 $src2 @ copySignD_reg" %} + + ins_encode %{ + __ fcopysign_d($dst$$FloatRegister, + $src1$$FloatRegister, + $src2$$FloatRegister); + %} + + ins_pipe( fpu_regF_regF ); +%} + +//----------------------------------Logical Instructions---------------------- +//__________________________________Integer Logical Instructions------------- + +//And Instuctions +// And Register with Immediate +instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (AndI src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ andi(dst, src, val); + + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ + match(Set dst (AndI src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_int_mask($mask$$constant); + + __ bstrpick_w(dst, src, size-1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ + match(Set dst (AndL src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_jlong_mask($mask$$constant); + + __ bstrpick_d(dst, src, size-1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (XorI src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1, immI_M1 M1) %{ + match(Set dst (XorI src1 M1)); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ orn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI mask (LoadB mem))); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_lmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadB mem) mask)); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_rmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andr(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI src1 (XorI src2 M1))); + + format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI src1 (XorI src2 M1))); + + format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ orn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI (XorI src1 M1) src2)); + + format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI (XorI src1 M1) src2)); + + format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ orn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Long Register with Register +instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ + match(Set dst (AndL dst M8)); + ins_cost(60); + + format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ + match(Set dst (AndL dst M5)); + ins_cost(60); + + format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ + match(Set dst (AndL dst M7)); + ins_cost(60); + + format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ + match(Set dst (AndL dst M4)); + ins_cost(60); + + format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ + match(Set dst (AndL dst M121)); + ins_cost(60); + + format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 6, 3); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Or Long Register with Register +instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (OrL src1 src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ + match(Set dst (OrL (CastP2X src1) src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Long Register with Register +instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (XorL src1 src2)); + format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ xorr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left by 5-bit immediate +instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ slli_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ + match(Set dst (AndI (LShiftI src shift) mask)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ slli_w(dst, src, 16); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); + + format %{ "andi $dst, $src, 7\t# @land7_2_s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ andi(dst, src, 7); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. +// This idiom is used by the compiler the i2s bytecode. +instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); + + format %{ "i2s $dst, $src\t# @i2s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ ext_w_h(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. +// This idiom is used by the compiler for the i2b bytecode. +instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) +%{ + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); + + format %{ "i2b $dst, $src\t# @i2b" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ ext_w_b(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + + +instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ slli_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shamt = $shift$$Register; + __ sll_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + + +// Shift Left Long 6-bit immI +instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ slli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left Long +instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ sll_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long 6-bit +instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srai_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (RShiftL src shift))); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srai_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long arithmetically +instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ sra_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long logically +instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(100); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ srl_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ + match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); + ins_cost(80); + format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (URShiftL src shift))); + predicate(n->in(1)->in(2)->get_int() > 32); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Instructions +// Xor Register with Register +instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ xorr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Instructions +instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} + ins_encode %{ + __ ori($dst$$Register, $src1$$Register, $src2$$constant); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Register with Register +instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); + predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); + + format %{ "rotri_w $dst, $src, 1 ...\n\t" + "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int rshift = $rshift$$constant; + + __ rotri_w(dst, src, 1); + if (rshift - 1) { + __ srli_w(dst, dst, rshift - 1); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right by 5-bit immediate +instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ + match(Set dst (URShiftI src shift)); + //effect(KILL cr); + + format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + + __ srli_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ + match(Set dst (AndI (URShiftI src shift) mask)); + + format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int pos = $shift$$constant; + int size = Assembler::is_int_mask($mask$$constant); + + __ bstrpick_w(dst, src, pos+size-1, pos); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); + + ins_cost(100); + format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_w(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + + ins_cost(100); + format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_w(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right +instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (URShiftI src shift)); + + format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srl_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + __ srai_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ sra_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +//----------Convert Int to Boolean--------------------------------------------- + +instruct convI2B(mRegI dst, mRegI src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convI2B $dst, $src @ convI2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, src); + } else { + __ move(AT, src); + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct convI2L_reg( mRegL dst, mRegI src) %{ + match(Set dst (ConvI2L src)); + + ins_cost(100); + format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if(dst != src) __ slli_w(dst, src, 0); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ + match(Set dst (ConvL2I src)); + + format %{ "MOV $dst, $src @ convL2I_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ slli_w(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2D_reg( regD dst, mRegL src ) %{ + match(Set dst (ConvL2D src)); + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_d(dst, src); + __ ffint_d_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + + +// Convert double to int. +// If the double is NaN, stuff a zero in instead. +instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ + match(Set dst (ConvD2I src)); + effect(USE src, TEMP tmp); + + format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} + + ins_encode %{ + __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ + match(Set dst (ConvD2L src)); + effect(USE src, TEMP tmp); + + format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} + + ins_encode %{ + __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +// Convert float to int. +// If the float is NaN, stuff a zero in instead. +instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ + match(Set dst (ConvF2I src)); + effect(USE src, TEMP tmp); + + format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} + + ins_encode %{ + __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ + match(Set dst (ConvF2L src)); + effect(USE src, TEMP tmp); + + format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} + + ins_encode %{ + __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convL2F_reg( regF dst, mRegL src ) %{ + match(Set dst (ConvL2F src)); + format %{ "convl2f $dst, $src @ convL2F_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + Label L; + + __ movgr2fr_d(dst, src); + __ ffint_s_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convI2F_reg( regF dst, mRegI src ) %{ + match(Set dst (ConvI2F src)); + format %{ "convi2f $dst, $src @ convI2F_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + + __ movgr2fr_w(dst, src); + __ ffint_s_w(dst, dst); + %} + + ins_pipe( fpu_regF_regF ); +%} + +instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ + match(Set dst (CmpLTMask p zero)); + ins_cost(100); + + format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} + ins_encode %{ + Register src = $p$$Register; + Register dst = $dst$$Register; + + __ srai_w(dst, src, 31); + %} + ins_pipe( pipe_slow ); +%} + + +instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ + match(Set dst (CmpLTMask p q)); + ins_cost(400); + + format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} + ins_encode %{ + Register p = $p$$Register; + Register q = $q$$Register; + Register dst = $dst$$Register; + + __ slt(dst, p, q); + __ sub_d(dst, R0, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct convP2B(mRegI dst, mRegP src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convP2B $dst, $src @ convP2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, src); + } else { + __ move(AT, src); + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + + +instruct convI2D_reg_reg(regD dst, mRegI src) %{ + match(Set dst (ConvI2D src)); + format %{ "conI2D $dst, $src @convI2D_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ movgr2fr_w(dst ,src); + __ ffint_d_w(dst, dst); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convF2D_reg_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ fcvt_d_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convD2F_reg_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ fcvt_s_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// Convert oop pointer into compressed form +instruct encodeHeapOop(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop $dst,$src" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ encode_heap_oop(dst, src); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + + __ decode_heap_oop(d, s); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_heap_oop_not_null(d, s); + } else { + __ decode_heap_oop_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_heap_klass_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_klass_not_null(d, s); + } else { + __ decode_klass_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +//FIXME +instruct tlsLoadP(mRegP dst) %{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + format %{ " get_thread in $dst #@tlsLoadP" %} + ins_encode %{ + Register dst = $dst$$Register; +#ifdef OPT_THREAD + __ move(dst, TREG); +#else + __ get_thread(dst); +#endif + %} + + ins_pipe( ialu_loadI ); +%} + + +instruct checkCastPP( mRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} + ins_encode( /*empty encoding*/ ); + ins_pipe( empty ); +%} + +instruct castPP(mRegP dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(empty); +%} + +instruct castII( mRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "#castII of $dst empty encoding" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe( empty ); +%} + +// Return Instruction +// Remove the return address & jump to it. +instruct Ret() %{ + match(Return); + format %{ "RET #@Ret" %} + + ins_encode %{ + __ jr(RA); + %} + + ins_pipe( pipe_jump ); +%} + + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +//FIXME +instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(200); + format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} + ins_encode %{ + Register target = $jump_target$$Register; + + // V0, V1 are indicated in: + // [stubGenerator_loongarch.cpp] generate_forward_exception() + // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() + // + Register oop = $ex_oop$$Register; + Register exception_oop = V0; + Register exception_pc = V1; + + __ move(exception_pc, RA); + __ move(exception_oop, oop); + + __ jr(target); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Procedure Call/Return Instructions +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,static #@CallStaticJavaDirect " %} + ins_encode( Java_Static_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(300); + format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" + "CallDynamic @ CallDynamicJavaDirect" %} + ins_encode( Java_Dynamic_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF_NOFP,runtime " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Prefetch instructions for allocation. + +instruct prefetchAlloc(memory mem) %{ + match(PrefetchAllocation mem); + ins_cost(125); + format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if (index != 0) { + if (scale == 0) { + __ add_d(AT, as_Register(base), as_Register(index)); + } else { + __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); + } + + if (Assembler::is_simm(disp, 12)) { + __ preld(8, AT, disp); + } else { + __ li(T4, disp); + __ add_d(AT, AT, T4); + __ preld(8, AT, 0); + } + } else { + if (Assembler::is_simm(disp, 12)) { + __ preld(8, as_Register(base), disp); + } else { + __ li(T4, disp); + __ add_d(AT, as_Register(base), T4); + __ preld(8, AT, 0); + } + } + %} + ins_pipe(pipe_slow); +%} + +// Call runtime without safepoint +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Load Char (16bit unsigned) +instruct loadUS(mRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadC" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Char (16bit unsigned) +instruct storeC(memory mem, mRegIorL2I src) %{ + match(Set mem (StoreC mem src)); + + ins_cost(125); + format %{ "storeC $src, $mem @ storeC" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeC_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreC mem zero)); + + ins_cost(125); + format %{ "storeC $zero, $mem @ storeC_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct loadConF_immF_0(regF dst, immF_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConF_immF_0\n"%} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + + __ movgr2fr_w(dst, R0); + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConF(regF dst, immF src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm(con_offset, 12)) { + __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ li(AT, con_offset); + __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); + } + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConD_immD_0(regD dst, immD_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConD_immD_0"%} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_d(dst, R0); + %} + ins_pipe( fpu_loadF ); +%} + +instruct loadConD(regD dst, immD src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm(con_offset, 12)) { + __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ li(AT, con_offset); + __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); + } + %} + ins_pipe( fpu_loadF ); +%} + +// Store register Float value (it is faster than store from FPU register) +instruct storeF_reg( memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeF_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeF_immF_0( memory mem, immF_0 zero) %{ + match(Set mem (StoreF mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + + ins_cost(150); + format %{ "loadD $dst, $mem #@loadD" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(250); + // FIXME: Need more effective ldl/ldr + format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeD_reg( memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeD_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeD_immD_0( memory mem, immD_0 zero) %{ + match(Set mem (StoreD mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +instruct loadSSI(mRegI dst, stackSlotI src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); + __ ld_w($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSI(stackSlotI dst, mRegI src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); + __ st_w($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSL(mRegL dst, stackSlotL src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); + __ ld_d($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSL(stackSlotL dst, mRegL src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); + __ st_d($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSP(mRegP dst, stackSlotP src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); + __ ld_d($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSP(stackSlotP dst, mRegP src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); + __ st_d($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSF(regF dst, stackSlotF src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); + __ fld_s($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSF(stackSlotF dst, regF src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); + __ fst_s($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +// Use the same format since predicate() can not be used here. +instruct loadSSD(regD dst, stackSlotD src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); + __ fld_d($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSD(stackSlotD dst, regD src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "fst_d $dst, $src\t# double stk @ storeSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); + __ fst_d($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +// Store CMS card-mark Immediate 0 +instruct storeImmCM_order(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + predicate(UseConcMarkSweepGC && !UseCondCardMark); + ins_cost(100); + format %{ "StoreCM MEMBAR storestore\n\t" + "st_b $mem, zero\t! card-mark imm0" %} + ins_encode %{ + __ membar(__ StoreStore); + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmCM(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + + ins_cost(150); + format %{ "st_b $mem, zero\t! card-mark imm0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(300); + + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ + if (is_reachable()) { + // Here we should emit illtrap! + __ stop("ShouldNotReachHere"); + } + %} + ins_pipe( pipe_jump ); +%} + +instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + + __ addi_d(dst, base, disp); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) +%{ + match(Set dst (AddP reg (LShiftL lreg scale))); + + ins_cost(110); + format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = $reg$$Register; + Register index = $lreg$$Register; + int scale = $scale$$constant; + + if (scale == 0) { + __ add_d($dst$$Register, $reg$$Register, index); + } else { + __ alsl_d(dst, index, base, scale - 1); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// NZ for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL tmp); + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} + + ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); + ins_pipe( pipe_slow ); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. + +instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + format %{ "move AT, $newval\n\t" + "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" + "move $cr, AT\n" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); + + int index = $heap_top_ptr$$index; + int scale = $heap_top_ptr$$scale; + int disp = $heap_top_ptr$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storePConditional: index != 0"); + } else { + __ move(AT, newval); + __ sc_d(AT, addr); + __ move($cr$$Register, AT); + } + %} + ins_pipe(long_memory_op); +%} + +// Conditional-store of an int value. +// AT flag is set on success, reset otherwise. +instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} + + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg32(addr, oldval, newval, cr, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(cr, AT); + } + } + %} + + ins_pipe(long_memory_op); +%} + +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + + format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg(addr, oldval, newval, cr, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(cr, AT); + } + } + %} + ins_pipe(long_memory_op); +%} + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +instruct loadPLocked(mRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} + size(12); + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + ins_cost(3 * MEMORY_REF_COST); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + ins_cost(3 * MEMORY_REF_COST); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + ins_cost(3 * MEMORY_REF_COST); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + ins_cost(3 * MEMORY_REF_COST); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, false, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, false, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{ + match(Set prev (GetAndSetI mem newv)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amswap_db_w $prev, $newv, [$mem]" %} + ins_encode %{ + Register prev = $prev$$Register; + Register newv = $newv$$Register; + Register addr = as_Register($mem$$base); + if (prev == newv || prev == addr) { + __ amswap_db_w(AT, newv, addr); + __ move(prev, AT); + } else { + __ amswap_db_w(prev, newv, addr); + } + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{ + match(Set prev (GetAndSetL mem newv)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amswap_db_d $prev, $newv, [$mem]" %} + ins_encode %{ + Register prev = $prev$$Register; + Register newv = $newv$$Register; + Register addr = as_Register($mem$$base); + if (prev == newv || prev == addr) { + __ amswap_db_d(AT, newv, addr); + __ move(prev, AT); + } else { + __ amswap_db_d(prev, newv, addr); + } + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{ + match(Set prev (GetAndSetN mem newv)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amswap_db_w $prev, $newv, [$mem]" %} + ins_encode %{ + Register prev = $prev$$Register; + Register newv = $newv$$Register; + Register addr = as_Register($mem$$base); + __ amswap_db_w(AT, newv, addr); + __ bstrpick_d(prev, AT, 31, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{ + match(Set prev (GetAndSetP mem newv)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amswap_db_d $prev, $newv, [$mem]" %} + ins_encode %{ + Register prev = $prev$$Register; + Register newv = $newv$$Register; + Register addr = as_Register($mem$$base); + if (prev == newv || prev == addr) { + __ amswap_db_d(AT, newv, addr); + __ move(prev, AT); + } else { + __ amswap_db_d(prev, newv, addr); + } + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{ + match(Set newval (GetAndAddL mem incr)); + ins_cost(2 * MEMORY_REF_COST + 1); + format %{ "amadd_db_d $newval, [$mem], $incr" %} + ins_encode %{ + Register newv = $newval$$Register; + Register incr = $incr$$Register; + Register addr = as_Register($mem$$base); + if (newv == incr || newv == addr) { + __ amadd_db_d(AT, incr, addr); + __ move(newv, AT); + } else { + __ amadd_db_d(newv, incr, addr); + } + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem incr)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amadd_db_d [$mem], $incr" %} + ins_encode %{ + __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{ + match(Set newval (GetAndAddI mem incr)); + ins_cost(2 * MEMORY_REF_COST + 1); + format %{ "amadd_db_w $newval, [$mem], $incr" %} + ins_encode %{ + Register newv = $newval$$Register; + Register incr = $incr$$Register; + Register addr = as_Register($mem$$base); + if (newv == incr || newv == addr) { + __ amadd_db_w(AT, incr, addr); + __ move(newv, AT); + } else { + __ amadd_db_w(newv, incr, addr); + } + %} + ins_pipe(pipe_slow); +%} + +instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem incr)); + ins_cost(2 * MEMORY_REF_COST); + format %{ "amadd_db_w [$mem], $incr" %} + ins_encode %{ + __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} + +//----------Max and Min-------------------------------------------------------- + +// Min Register with Register (generic version) +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MinI dst src)); + //effect(KILL flags); + ins_cost(80); + + format %{ "MIN $dst, $src @minI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, src, dst); + __ masknez(dst, dst, AT); + __ maskeqz(AT, src, AT); + __ OR(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +// Max Register with Register (generic version) +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MaxI dst src)); + ins_cost(80); + + format %{ "MAX $dst, $src @maxI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, dst, src); + __ masknez(dst, dst, AT); + __ maskeqz(AT, src, AT); + __ OR(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ + match(Set dst (MaxI dst zero)); + ins_cost(50); + + format %{ "MAX $dst, 0 @maxI_Reg_zero" %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ slt(AT, dst, R0); + __ masknez(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL src mask)); + + format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) +%{ + match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); + + format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + if (src1 == dst) { + __ bstrins_d(dst, src2, 63, 32); + } else if (src2 == dst) { + __ slli_d(dst, dst, 32); + __ bstrins_d(dst, src1, 31, 0); + } else { + __ bstrpick_d(dst, src1, 31, 0); + __ bstrins_d(dst, src2, 63, 32); + } + %} + ins_pipe(ialu_regI_regI); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +// Match loading integer and casting it to unsigned int in long register. +// LoadI + ConvI2L + AndL 0xffffffff. +instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL mask (ConvI2L (LoadI mem)))); + + format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + + +// ============================================================================ +// Safepoint Instruction + +instruct safePoint_poll() %{ + predicate(SafepointMechanism::uses_global_page_poll()); + match(SafePoint); + + ins_cost(105); + format %{ "poll for GC @ safePoint_poll" %} + + ins_encode %{ + __ block_comment("Safepoint:"); + __ li(T4, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_type); + __ ld_w(AT, T4, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +instruct safePoint_poll_tls(mRegP poll) %{ + match(SafePoint poll); + predicate(SafepointMechanism::uses_thread_local_poll()); + effect(USE poll); + + ins_cost(125); + format %{ "ld_w AT, [$poll]\t" + "Safepoint @ [$poll] : poll for GC" %} + size(4); + ins_encode %{ + Register poll_reg = $poll$$Register; + + __ block_comment("Safepoint:"); + __ relocate(relocInfo::poll_type); + address pre_pc = __ pc(); + __ ld_w(AT, poll_reg, 0); + assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); + %} + + ins_pipe( ialu_storeI ); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- + +instruct roundFloat_nop(regF dst) +%{ + match(Set dst (RoundFloat dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +instruct roundDouble_nop(regD dst) +%{ + match(Set dst (RoundDouble dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +//----------BSWAP Instructions------------------------------------------------- +instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{ + match(Set dst (ReverseBytesI src)); + + format %{ "RevB_I $dst, $src" %} + ins_encode %{ + __ revb_2w($dst$$Register, $src$$Register); + __ slli_w($dst$$Register, $dst$$Register, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct bytes_reverse_long(mRegL dst, mRegL src) %{ + match(Set dst (ReverseBytesL src)); + + format %{ "RevB_L $dst, $src" %} + ins_encode %{ + __ revb_d($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{ + match(Set dst (ReverseBytesUS src)); + + format %{ "RevB_US $dst, $src" %} + ins_encode %{ + __ revb_2h($dst$$Register, $src$$Register); + __ bstrpick_d($dst$$Register, $dst$$Register, 15, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{ + match(Set dst (ReverseBytesS src)); + + format %{ "RevB_S $dst, $src" %} + ins_encode %{ + __ revb_2h($dst$$Register, $src$$Register); + __ ext_w_h($dst$$Register, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} + +//---------- Zeros Count Instructions ------------------------------------------ +// CountLeadingZerosINode CountTrailingZerosINode +instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{ + match(Set dst (CountLeadingZerosI src)); + + format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ clz_w($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countLeadingZerosL(mRegI dst, mRegL src) %{ + match(Set dst (CountLeadingZerosL src)); + + format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ clz_d($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{ + match(Set dst (CountTrailingZerosI src)); + + format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ ctz_w($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosL(mRegI dst, mRegL src) %{ + match(Set dst (CountTrailingZerosL src)); + + format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ ctz_d($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// --------------------------------- Load ------------------------------------- + +instruct loadV16(vecX dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + format %{ "vload $dst, $mem\t# @loadV16" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadV32(vecY dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 32); + match(Set dst (LoadVector mem)); + format %{ "xvload $dst, $mem\t# @loadV32" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- Store ------------------------------------ + +instruct storeV16(memory mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + format %{ "vstore $src, $mem\t# @storeV16" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV32(memory mem, vecY src) %{ + predicate(n->as_StoreVector()->memory_size() == 32); + match(Set mem (StoreVector mem src)); + format %{ "xvstore $src, $mem\t# @storeV32" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------- Replicate ---------------------------------- + +instruct repl16B(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} + ins_encode %{ + __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB imm)); + format %{ "vldi $dst, $imm\t# @repl16B_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8S(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} + ins_encode %{ + __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8S_imm(vecX dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS imm)); + format %{ "vldi $dst, $imm\t# @repl8S_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4I(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} + ins_encode %{ + __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4I_imm(vecX dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI imm)); + format %{ "vldi $dst, $imm\t# @repl4I_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2L(vecX dst, mRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} + ins_encode %{ + __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2L_imm(vecX dst, immL10 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL imm)); + format %{ "vldi $dst, $imm\t# @repl2L_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4F(vecX dst, regF src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} + ins_encode %{ + __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2D(vecX dst, regD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} + ins_encode %{ + __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl32B(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB src)); + format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} + ins_encode %{ + __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB imm)); + format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16S(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS src)); + format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} + ins_encode %{ + __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16S_imm(vecY dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS imm)); + format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8I(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI src)); + format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} + ins_encode %{ + __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8I_imm(vecY dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI imm)); + format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4L(vecY dst, mRegL src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} + ins_encode %{ + __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4L_imm(vecY dst, immL10 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL imm)); + format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8F(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF src)); + format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} + ins_encode %{ + __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4D(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD src)); + format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} + ins_encode %{ + __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ADD -------------------------------------- + +instruct add16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} + ins_encode %{ + __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src (ReplicateB imm))); + format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} + ins_encode %{ + __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} + ins_encode %{ + __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src (ReplicateS imm))); + format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} + ins_encode %{ + __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} + ins_encode %{ + __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src (ReplicateI imm))); + format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} + ins_encode %{ + __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} + ins_encode %{ + __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src (ReplicateL imm))); + format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} + ins_encode %{ + __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} + ins_encode %{ + __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} + ins_encode %{ + __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} + ins_encode %{ + __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AddVB src (ReplicateB imm))); + format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} + ins_encode %{ + __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} + ins_encode %{ + __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVS src (ReplicateS imm))); + format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} + ins_encode %{ + __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVI src1 src2)); + format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} + ins_encode %{ + __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVI src (ReplicateI imm))); + format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} + ins_encode %{ + __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVL src1 src2)); + format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} + ins_encode %{ + __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVL src (ReplicateL imm))); + format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} + ins_encode %{ + __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVF src1 src2)); + format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} + ins_encode %{ + __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVD src1 src2)); + format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} + ins_encode %{ + __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct sub16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} + ins_encode %{ + __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src (ReplicateB imm))); + format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} + ins_encode %{ + __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} + ins_encode %{ + __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src (ReplicateS imm))); + format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} + ins_encode %{ + __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} + ins_encode %{ + __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src (ReplicateI imm))); + format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} + ins_encode %{ + __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} + ins_encode %{ + __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src (ReplicateL imm))); + format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} + ins_encode %{ + __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} + ins_encode %{ + __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} + ins_encode %{ + __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} + ins_encode %{ + __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (SubVB src (ReplicateB imm))); + format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} + ins_encode %{ + __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVS src1 src2)); + format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} + ins_encode %{ + __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVS src (ReplicateS imm))); + format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} + ins_encode %{ + __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVI src1 src2)); + format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} + ins_encode %{ + __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVI src (ReplicateI imm))); + format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} + ins_encode %{ + __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVL src1 src2)); + format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} + ins_encode %{ + __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVL src (ReplicateL imm))); + format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} + ins_encode %{ + __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVF src1 src2)); + format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} + ins_encode %{ + __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVD src1 src2)); + format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} + ins_encode %{ + __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MUL -------------------------------------- +instruct mul16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + format %{ "vmul.b $dst, $src1, $src2\t# @mul16B" %} + ins_encode %{ + __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} + ins_encode %{ + __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} + ins_encode %{ + __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVL src1 src2)); + format %{ "vmul.d $dst, $src1, $src2\t# @mul2L" %} + ins_encode %{ + __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} + ins_encode %{ + __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} + ins_encode %{ + __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (MulVB src1 src2)); + format %{ "xvmul.b $dst, $src1, $src2\t# @mul32B" %} + ins_encode %{ + __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (MulVS src1 src2)); + format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} + ins_encode %{ + __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVI src1 src2)); + format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} + ins_encode %{ + __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVL src1 src2)); + format %{ "xvmul.d $dst, $src1, $src2\t# @mul4L" %} + ins_encode %{ + __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVF src1 src2)); + format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} + ins_encode %{ + __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVD src1 src2)); + format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} + ins_encode %{ + __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- DIV -------------------------------------- +instruct div4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} + ins_encode %{ + __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} + ins_encode %{ + __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (DivVF src1 src2)); + format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} + ins_encode %{ + __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVD src1 src2)); + format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} + ins_encode %{ + __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ABS -------------------------------------- + +instruct abs16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AbsVB src)); + effect(TEMP_DEF dst); + format %{ "vabs $dst, $src\t# @abs16B" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs8S(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AbsVS src)); + effect(TEMP_DEF dst); + format %{ "vabs $dst, $src\t# @abs8S" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs4I(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVI src)); + effect(TEMP_DEF dst); + format %{ "vabs $dst, $src\t# @abs4I" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs2L(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVL src)); + effect(TEMP_DEF dst); + format %{ "vabs $dst, $src\t# @abs2L" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVF src)); + format %{ "vbitclri.w $dst, $src\t# @abs4F" %} + ins_encode %{ + __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + format %{ "vbitclri.d $dst, $src\t# @abs2D" %} + ins_encode %{ + __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs32B(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AbsVB src)); + effect(TEMP_DEF dst); + format %{ "xvabs $dst, $src\t# @abs32B" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs16S(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AbsVS src)); + effect(TEMP_DEF dst); + format %{ "xvabs $dst, $src\t# @abs16S" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs8I(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AbsVI src)); + effect(TEMP_DEF dst); + format %{ "xvabs $dst, $src\t# @abs8I" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs4L(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVL src)); + effect(TEMP_DEF dst); + format %{ "xvabs $dst, $src\t# @abs4L" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs8F(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AbsVF src)); + format %{ "xvbitclri.w $dst, $src\t# @abs8F" %} + ins_encode %{ + __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); + %} + ins_pipe( pipe_slow ); +%} + +instruct abs4D(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVD src)); + format %{ "xvbitclri.d $dst, $src\t# @abs4D" %} + ins_encode %{ + __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ABS DIFF --------------------------------- + +instruct absd4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVI (SubVI src1 src2))); + format %{ "vabsd.w $dst, $src1, $src2\t# @absd4I" %} + ins_encode %{ + __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct absd2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVL (SubVL src1 src2))); + format %{ "vabsd.d $dst, $src1, $src2\t# @absd2L" %} + ins_encode %{ + __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct absd8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AbsVI (SubVI src1 src2))); + format %{ "xvabsd.w $dst, $src1, $src2\t# @absd8I" %} + ins_encode %{ + __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct absd4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVL (SubVL src1 src2))); + format %{ "xvabsd.d $dst, $src1, $src2\t# @absd4L" %} + ins_encode %{ + __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MAX -------------------------------------- + +instruct max16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxV src1 src2)); + format %{ "vmax.b $dst, $src1, $src2\t# @max16B" %} + ins_encode %{ + __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxV src1 src2)); + format %{ "vmax.h $dst, $src1, $src2\t# @max8S" %} + ins_encode %{ + __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxV src1 src2)); + format %{ "vmax.w $dst, $src1, $src2\t# @max4I" %} + ins_encode %{ + __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxV src1 src2)); + format %{ "vmax.d $dst, $src1, $src2\t# @max2L" %} + ins_encode %{ + __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %} + ins_encode %{ + __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %} + ins_encode %{ + __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxV src1 src2)); + format %{ "xvmax.b $dst, $src1, $src2\t# @max32B" %} + ins_encode %{ + __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxV src1 src2)); + format %{ "xvmax.h $dst, $src1, $src2\t# @max16S" %} + ins_encode %{ + __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxV src1 src2)); + format %{ "xvmax.w $dst, $src1, $src2\t# @max8I" %} + ins_encode %{ + __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxV src1 src2)); + format %{ "xvmax.d $dst, $src1, $src2\t# @max4L" %} + ins_encode %{ + __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %} + ins_encode %{ + __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %} + ins_encode %{ + __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MIN -------------------------------------- + +instruct min16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinV src1 src2)); + format %{ "vmin.b $dst, $src1, $src2\t# @min16B" %} + ins_encode %{ + __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinV src1 src2)); + format %{ "vmin.h $dst, $src1, $src2\t# @min8S" %} + ins_encode %{ + __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinV src1 src2)); + format %{ "vmin.w $dst, $src1, $src2\t# @min4I" %} + ins_encode %{ + __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinV src1 src2)); + format %{ "vmin.d $dst, $src1, $src2\t# @min2L" %} + ins_encode %{ + __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %} + ins_encode %{ + __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %} + ins_encode %{ + __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinV src1 src2)); + format %{ "xvmin.b $dst, $src1, $src2\t# @min32B" %} + ins_encode %{ + __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinV src1 src2)); + format %{ "xvmin.h $dst, $src1, $src2\t# @min16S" %} + ins_encode %{ + __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinV src1 src2)); + format %{ "xvmin.w $dst, $src1, $src2\t# @min8I" %} + ins_encode %{ + __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinV src1 src2)); + format %{ "xvmin.d $dst, $src1, $src2\t# @min4L" %} + ins_encode %{ + __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %} + ins_encode %{ + __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %} + ins_encode %{ + __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); + __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- NEG -------------------------------------- + +instruct neg4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (NegVF src)); + format %{ "vbitrevi.w $dst, $src\t# @neg4F" %} + ins_encode %{ + __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); + %} + ins_pipe( pipe_slow ); +%} + +instruct neg2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVD src)); + format %{ "vbitrevi.d $dst, $src\t# @neg2D" %} + ins_encode %{ + __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); + %} + ins_pipe( pipe_slow ); +%} + +instruct neg8F(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (NegVF src)); + format %{ "xvbitrevi.w $dst, $src\t# @neg8F" %} + ins_encode %{ + __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); + %} + ins_pipe( pipe_slow ); +%} + +instruct neg4D(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (NegVD src)); + format %{ "xvbitrevi.d $dst, $src\t# @neg4D" %} + ins_encode %{ + __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- SQRT ------------------------------------- + +instruct sqrt4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SqrtVF src)); + format %{ "vfsqrt.s $dst, $src\t# @sqrt4F" %} + ins_encode %{ + __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sqrt2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SqrtVD src)); + format %{ "vfsqrt.d $dst, $src\t# @sqrt2D" %} + ins_encode %{ + __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sqrt8F(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SqrtVF src)); + format %{ "xvfsqrt.s $dst, $src\t# @sqrt8F" %} + ins_encode %{ + __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sqrt4D(vecY dst, vecY src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SqrtVD src)); + format %{ "xvfsqrt.d $dst, $src\t# @sqrt4D" %} + ins_encode %{ + __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MADD ------------------------------------- + +instruct madd16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (MulVB src1 src2))); + format %{ "vmadd.b $dst, $src1, $src2\t# @madd16B" %} + ins_encode %{ + __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (MulVS src1 src2))); + format %{ "vmadd.h $dst, $src1, $src2\t# @madd8S" %} + ins_encode %{ + __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (MulVI src1 src2))); + format %{ "vmadd $dst, $src1, $src2\t# @madd4I" %} + ins_encode %{ + __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst (MulVL src1 src2))); + format %{ "vmadd.d $dst, $src1, $src2\t# @madd2L" %} + ins_encode %{ + __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 + src3 +instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF src3 (Binary src1 src2))); + format %{ "vfmadd.s $dst, $src1, $src2, $src3\t# @madd4F" %} + ins_encode %{ + __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 + src3 +instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD src3 (Binary src1 src2))); + format %{ "vfmadd.d $dst, $src1, $src2, $src3\t# @madd2D" %} + ins_encode %{ + __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AddVB dst (MulVB src1 src2))); + format %{ "xvmadd.b $dst, $src1, $src2\t# @madd32B" %} + ins_encode %{ + __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVS dst (MulVS src1 src2))); + format %{ "xvmadd.h $dst, $src1, $src2\t# @madd16S" %} + ins_encode %{ + __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVI dst (MulVI src1 src2))); + format %{ "xvmadd.w $dst, $src1, $src2\t# @madd8I" %} + ins_encode %{ + __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct madd4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVL dst (MulVL src1 src2))); + format %{ "xvmadd.d $dst, $src1, $src2\t# @madd4L" %} + ins_encode %{ + __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 + src3 +instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set dst (FmaVF src3 (Binary src1 src2))); + format %{ "xvfmadd.s $dst, $src1, $src2, $src3\t# @madd8F" %} + ins_encode %{ + __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 + src3 +instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVD src3 (Binary src1 src2))); + format %{ "xvfmadd.d $dst, $src1, $src2, $src3\t# @madd4D" %} + ins_encode %{ + __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MSUB ------------------------------------- + +instruct msub16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB dst (MulVB src1 src2))); + format %{ "vmsub.b $dst, $src1, $src2\t# @msub16B" %} + ins_encode %{ + __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS dst (MulVS src1 src2))); + format %{ "vmsub.h $dst, $src1, $src2\t# @msub8S" %} + ins_encode %{ + __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI dst (MulVI src1 src2))); + format %{ "vmsub.w $dst, $src1, $src2\t# @msub4I" %} + ins_encode %{ + __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL dst (MulVL src1 src2))); + format %{ "vmsub.d $dst, $src1, $src2\t# @msub2L" %} + ins_encode %{ + __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 - src3 +instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); + format %{ "vfmsub.s $dst, $src1, $src2, $src3\t# @msub4F" %} + ins_encode %{ + __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 - src3 +instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); + format %{ "vfmsub.d $dst, $src1, $src2, $src3\t# @msub2D" %} + ins_encode %{ + __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (SubVB dst (MulVB src1 src2))); + format %{ "xvmsub.b $dst, $src1, $src2\t# @msub32B" %} + ins_encode %{ + __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVS dst (MulVS src1 src2))); + format %{ "xvmsub.h $dst, $src1, $src2\t# @msub16S" %} + ins_encode %{ + __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVI dst (MulVI src1 src2))); + format %{ "xvmsub.w $dst, $src1, $src2\t# @msub8I" %} + ins_encode %{ + __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct msub4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVL dst (MulVL src1 src2))); + format %{ "xvmsub.d $dst, $src1, $src2\t# @msub4L" %} + ins_encode %{ + __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 - src3 +instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); + format %{ "xvfmsub.s $dst, $src1, $src2, $src3\t# @msub8F" %} + ins_encode %{ + __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// src1 * src2 - src3 +instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); + format %{ "xvfmsub.d $dst, $src1, $src2, $src3\t# @msub4D" %} + ins_encode %{ + __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- FNMADD ----------------------------------- + +// -src1 * src2 - src3 +instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); + match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); + format %{ "vfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd4F" %} + ins_encode %{ + __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 - src3 +instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); + match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); + format %{ "vfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd2D" %} + ins_encode %{ + __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 - src3 +instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); + match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); + format %{ "xvfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd8F" %} + ins_encode %{ + __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 - src3 +instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); + match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); + format %{ "xvfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd4D" %} + ins_encode %{ + __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- FNMSUB ----------------------------------- + +// -src1 * src2 + src3 +instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); + match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); + format %{ "vfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub4F" %} + ins_encode %{ + __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 + src3 +instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); + match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); + format %{ "vfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub2D" %} + ins_encode %{ + __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 + src3 +instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 8); + match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); + match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); + format %{ "xvfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub8F" %} + ins_encode %{ + __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// -src1 * src2 + src3 +instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); + match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); + format %{ "xvfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub4D" %} + ins_encode %{ + __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ Shift --------------------------------------- + +instruct shiftcntX(vecX dst, mRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} + ins_encode %{ + __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct shiftcntY(vecY dst, mRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} + ins_encode %{ + __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ LeftShift ----------------------------------- + +instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} + ins_encode %{ + __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} + ins_encode %{ + __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} + ins_encode %{ + __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} + ins_encode %{ + __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} + ins_encode %{ + __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} + ins_encode %{ + __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (LShiftVB src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} + ins_encode %{ + __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (LShiftVB src shift)); + format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} + ins_encode %{ + __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} + ins_encode %{ + __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} + ins_encode %{ + __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} + ins_encode %{ + __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} + ins_encode %{ + __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// ----------------------- LogicalRightShift ---------------------------------- + +instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} + ins_encode %{ + __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} + ins_encode %{ + __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} + ins_encode %{ + __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} + ins_encode %{ + __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} + ins_encode %{ + __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} + ins_encode %{ + __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (URShiftVB src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} + ins_encode %{ + __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (URShiftVB src shift)); + format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} + ins_encode %{ + __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} + ins_encode %{ + __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} + ins_encode %{ + __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} + ins_encode %{ + __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} + ins_encode %{ + __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------- ArithmeticRightShift ----------------------------- + +instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + effect(TEMP tmp); + format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} + ins_encode %{ + __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); + } else { + __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + effect(TEMP tmp); + format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} + ins_encode %{ + __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); + } else { + __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} + ins_encode %{ + __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} + ins_encode %{ + __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} + ins_encode %{ + __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} + ins_encode %{ + __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (RShiftVB src shift)); + effect(TEMP tmp); + format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} + ins_encode %{ + __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (RShiftVB src shift)); + format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); + } else { + __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + effect(TEMP tmp); + format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} + ins_encode %{ + __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); + } else { + __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} + ins_encode %{ + __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} + ins_encode %{ + __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} + ins_encode %{ + __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} + ins_encode %{ + __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- AND -------------------------------------- + +instruct andV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} + ins_encode %{ + __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AndV src (ReplicateB imm))); + format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} + ins_encode %{ + __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct andV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src1 src2)); + format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} + ins_encode %{ + __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AndV src (ReplicateB imm))); + format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} + ins_encode %{ + __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- OR --------------------------------------- + +instruct orV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} + ins_encode %{ + __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (OrV src (ReplicateB imm))); + format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} + ins_encode %{ + __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct orV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src1 src2)); + format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} + ins_encode %{ + __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (OrV src (ReplicateB imm))); + format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} + ins_encode %{ + __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct xorV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (XorV src (ReplicateB imm))); + format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} + ins_encode %{ + __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct xorV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV src1 src2)); + format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (XorV src (ReplicateB imm))); + format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} + ins_encode %{ + __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- NOR -------------------------------------- + +instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); + format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} + ins_encode %{ + __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); + format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} + ins_encode %{ + __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); + format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} + ins_encode %{ + __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); + format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} + ins_encode %{ + __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ANDN ------------------------------------- + +instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); + format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} + ins_encode %{ + __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); + format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} + ins_encode %{ + __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ORN -------------------------------------- + +instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); + format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} + ins_encode %{ + __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); + format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} + ins_encode %{ + __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// ----------------------------- Reduction Add -------------------------------- + +instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (AddReductionVL src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (AddReductionVF src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (AddReductionVD src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (AddReductionVL src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (AddReductionVF src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (AddReductionVD src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +// ----------------------------- Reduction Mul -------------------------------- + +instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MulReductionVL src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MulReductionVF src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MulReductionVD src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MulReductionVI src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MulReductionVL src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MulReductionVF src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MulReductionVD src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %} + ins_encode %{ + __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +// ----------------------------- Reduction Max -------------------------------- + +instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +// ----------------------------- Reduction Min -------------------------------- + +instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV src vsrc)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %} + ins_encode %{ + __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ RoundDoubleModeV ---------------------------- + +instruct round2D(vecX dst, vecX src, immI rmode) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "vfrint $dst, $src, $rmode\t# @round2D" %} + ins_encode %{ + DEBUG_ONLY(Unimplemented()); // unverified + switch ($rmode$$constant) { + case 0: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; + case 1: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; + case 2: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; + } + %} + ins_pipe( pipe_slow ); +%} + +instruct round4D(vecY dst, vecY src, immI rmode) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "xvfrint $dst, $src, $rmode\t# @round4D" %} + ins_encode %{ + DEBUG_ONLY(Unimplemented()); // unverified + switch ($rmode$$constant) { + case 0: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; + case 1: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; + case 2: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; + } + %} + ins_pipe( pipe_slow ); +%} + +// ---------------------------- PopCount -------------------------------------- + +instruct popcount4I(vecX dst, vecX src) %{ + predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); + match(Set dst (PopCountVI src)); + format %{ "vpcnt.w $dst, $src\t# @popcount4I" %} + ins_encode %{ + DEBUG_ONLY(Unimplemented()); // unverified + __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct popcount8I(vecY dst, vecY src) %{ + predicate(UsePopCountInstruction && n->as_Vector()->length() == 8); + match(Set dst (PopCountVI src)); + format %{ "xvpcnt.w $dst, $src\t# @popcount8I" %} + ins_encode %{ + DEBUG_ONLY(Unimplemented()); // unverified + __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceeding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch ( incI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( decI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addI_eReg_imm movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addP_eReg_imm movP ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +//peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +//%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp new file mode 100644 index 00000000000..9720fd176d0 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp @@ -0,0 +1,4567 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "nativeInst_loongarch.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" + +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/intrinsicnode.hpp" +#endif + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of MacroAssembler + +intptr_t MacroAssembler::i[32] = {0}; +float MacroAssembler::f[32] = {0.0}; + +void MacroAssembler::print(outputStream *s) { + unsigned int k; + for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); + } + s->cr(); + + for(k=0; kprint_cr("f%d = %f", k, f[k]); + } + s->cr(); +} + +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } + +void MacroAssembler::save_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ st_w (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ fst_s (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + +void MacroAssembler::restore_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ ld_w (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ fld_s (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + + +void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*)branch; + jint *pc = (jint *)branch; + + if (high(stub_inst, 7) == pcaddu18i_op) { + // far: + // pcaddu18i reg, si20 + // jirl r0, reg, si18 + + assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); + jlong offs = target - branch; + CodeBuffer cb(branch, 2 * BytesPerInstWord); + MacroAssembler masm(&cb); + if (reachable_from_branch_short(offs)) { + // convert far to short +#define __ masm. + __ b(target); + __ nop(); +#undef __ + } else { + masm.patchable_jump_far(R0, offs); + } + return; + } else if (high(stub_inst, 7) == pcaddi_op) { + // see MacroAssembler::set_last_Java_frame: + // pcaddi reg, si20 + + jint offs = (target - branch) >> 2; + guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); + CodeBuffer cb(branch, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.pcaddi(as_Register(low(stub_inst, 5)), offs); + return; + } else if (high(stub_inst, 7) == pcaddu12i_op) { + // pc-relative + jlong offs = target - branch; + guarantee(is_simm(offs, 32), "Not signed 32-bit offset"); + jint si12, si20; + jint& stub_instNext = *(jint*)(branch+4); + split_simm32(offs, si12, si20); + CodeBuffer cb(branch, 2 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20); + masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12); + return; + } else if (high(stub_inst, 7) == lu12i_w_op) { + // long call (absolute) + CodeBuffer cb(branch, 3 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.call_long(target); + return; + } + + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +bool MacroAssembler::reachable_from_branch_short(jlong offs) { + if (ForceUnreachable) { + return false; + } + return is_simm(offs >> 2, 26); +} + +void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { + jint si18, si20; + guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); + split_simm38(offs, si18, si20); + pcaddu18i(T4, si20); + jirl(ra, T4, si18); +} + +void MacroAssembler::patchable_jump(address target, bool force_patchable) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(target) != NULL, + "destination of jump not found in code cache"); + if (force_patchable || patchable_branches()) { + jlong offs = target - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(target)); + nop(); + } else { // Far jump + patchable_jump_far(R0, offs); + } + } else { // Real short jump + b(offset26(target)); + } +} + +void MacroAssembler::patchable_call(address target, address call_site) { + jlong offs = target - (call_site ? call_site : pc()); + if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call + nop(); + bl((offs - BytesPerInstWord) >> 2); + } else { // Far call + patchable_jump_far(RA, offs); + } +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + // We need a trampoline if branches are far. + if (far_branches()) { + bool in_scratch_emit_size = false; +#ifdef COMPILER2 + // We don't want to emit a trampoline if C2 is generating dummy + // code during its branch shortening phase. + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && + Compile::current()->in_scratch_emit_size()); +#endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { + postcond(pc() == badAddress); + return NULL; // CodeCache is full + } + } + } + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); + if (!far_branches()) { + bl(entry.target()); + } else { + bl(pc()); + } + // just need to return a non-null address + postcond(pc() != badAddress); + return pc(); +} + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (RA still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + // Start the stub + address stub = start_a_stub(NativeInstruction::nop_instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + pcaddi(T4, 0); + ld_d(T4, T4, 16); + jr(T4); + nop(); //align + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + emit_int64((int64_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + NativeInstruction* ni = nativeInstruction_at(stub_start_addr); + assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); + + end_a_stub(); + return stub_start_addr; +} + +void MacroAssembler::beq_far(Register rs, Register rt, address entry) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + beq(rs, rt, offset16(entry)); + } else { // Far jump + Label not_jump; + bne(rs, rt, not_jump); + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + beq_far(rs, rt, target(L)); + } else { + Label not_jump; + bne(rs, rt, not_jump); + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, address entry) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + bne(rs, rt, offset16(entry)); + } else { // Far jump + Label not_jump; + beq(rs, rt, not_jump); + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + bne_far(rs, rt, target(L)); + } else { + Label not_jump; + beq(rs, rt, not_jump); + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + if (is_signed) { + blt(rs, rt, offset16(entry)); + } else { + bltu(rs, rt, offset16(entry)); + } + } else { // Far jump + Label not_jump; + if (is_signed) { + bge(rs, rt, not_jump); + } else { + bgeu(rs, rt, not_jump); + } + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { + if (L.is_bound()) { + blt_far(rs, rt, target(L), is_signed); + } else { + Label not_jump; + if (is_signed) { + bge(rs, rt, not_jump); + } else { + bgeu(rs, rt, not_jump); + } + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + if (is_signed) { + bge(rs, rt, offset16(entry)); + } else { + bgeu(rs, rt, offset16(entry)); + } + } else { // Far jump + Label not_jump; + if (is_signed) { + blt(rs, rt, not_jump); + } else { + bltu(rs, rt, not_jump); + } + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { + if (L.is_bound()) { + bge_far(rs, rt, target(L), is_signed); + } else { + Label not_jump; + if (is_signed) { + blt(rs, rt, not_jump); + } else { + bltu(rs, rt, not_jump); + } + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { + Label not_taken; + bne(rs, rt, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { + Label not_taken; + beq(rs, rt, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bc1t_long(Label& L) { + Label not_taken; + bceqz(FCC0, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { + Label not_taken; + if (is_signed) { + bge(rs, rt, not_taken); + } else { + bgeu(rs, rt, not_taken); + } + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { + Label not_taken; + if (is_signed) { + blt(rs, rt, not_taken); + } else { + bltu(rs, rt, not_taken); + } + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bc1f_long(Label& L) { + Label not_taken; + bcnez(FCC0, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::b_far(Label& L) { + if (L.is_bound()) { + b_far(target(L)); + } else { + L.add_patch_at(code(), locator()); + if (ForceUnreachable) { + patchable_jump_far(R0, 0); + } else { + b(0); + } + } +} + +void MacroAssembler::b_far(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(entry)); + } else { // Far jump + patchable_jump_far(R0, offs); + } +} + +void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { + ldx_d(rt, base, offset); +} + +void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { + stx_d(rt, base, offset); +} + +Address MacroAssembler::as_Address(AddressLiteral adr) { + return Address(adr.target(), adr.rspec()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + return Address::make_array(adr); +} + +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { + li(tmp_reg1, inc); + li(tmp_reg2, counter_addr); + amadd_w(R0, tmp_reg1, tmp_reg2); +} + +void MacroAssembler::reserved_stack_check() { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); + sub_d(AT, SP, AT); + blt(AT, R0, no_reserved_zone_enabling); + + enter(); // RA and FP are live. + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); + leave(); + + // We have already removed our own frame. + // throw_delayed_StackOverflowError will think that it's been + // called by our caller. + li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); + jr(AT); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = T4; + } + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ld_ptr(swap_reg, mark_addr); + } + + if (need_tmp_reg) { + push(tmp_reg); + } + move(tmp_reg, swap_reg); + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); + addi_d(AT, R0, markOopDesc::biased_lock_pattern); + sub_d(AT, AT, tmp_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + + bne(AT, R0, cas_label); + + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + // Note that because there is no current thread register on LA we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + st_ptr(swap_reg, saved_mark_addr); + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); + xorr(tmp_reg, tmp_reg, swap_reg); +#ifndef OPT_THREAD + get_thread(swap_reg); + xorr(swap_reg, swap_reg, tmp_reg); +#else + xorr(swap_reg, TREG, tmp_reg); +#endif + + li(AT, ~((int) markOopDesc::age_mask_in_place)); + andr(swap_reg, swap_reg, AT); + + if (PrintBiasedLockingStatistics) { + Label L; + bne(swap_reg, R0, L); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (need_tmp_reg) { + pop(tmp_reg); + } + beq(swap_reg, R0, done); + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + + li(AT, markOopDesc::biased_lock_mask_in_place); + andr(AT, swap_reg, AT); + bne(AT, R0, try_revoke_bias); + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + + li(AT, markOopDesc::epoch_mask_in_place); + andr(AT,swap_reg, AT); + bne(AT, R0, try_rebias); + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + ld_ptr(swap_reg, saved_mark_addr); + + li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, AT); + + if (need_tmp_reg) { + push(tmp_reg); + } +#ifndef OPT_THREAD + get_thread(tmp_reg); + orr(tmp_reg, tmp_reg, swap_reg); +#else + orr(tmp_reg, TREG, swap_reg); +#endif + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + } + b(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); +#ifndef OPT_THREAD + get_thread(swap_reg); + orr(tmp_reg, tmp_reg, swap_reg); +#else + orr(tmp_reg, tmp_reg, TREG); +#endif + ld_ptr(swap_reg, saved_mark_addr); + + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + } + + b(done); + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + ld_ptr(swap_reg, saved_mark_addr); + + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + + bind(cas_label); + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + addi_d(AT, R0, markOopDesc::biased_lock_pattern); + + beq(AT, temp_reg, done); +} + +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf +// this method will handle the stack problem, you need not to preserve the stack space for the argument now +void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { + Label L, E; + + assert(number_of_arguments <= 4, "just check"); + + andi(AT, SP, 0xf); + beq(AT, R0, L); + addi_d(SP, SP, -8); + call(entry_point, relocInfo::runtime_call_type); + addi_d(SP, SP, 8); + b(E); + + bind(L); + call(entry_point, relocInfo::runtime_call_type); + bind(E); +} + +void MacroAssembler::jmp(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(entry)); + } else { // Far jump + patchable_jump_far(R0, offs); + } +} + +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::none: + jmp(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_jump(entry); + } + break; + } +} + +void MacroAssembler::jmp_far(Label& L) { + if (L.is_bound()) { + assert(target(L) != NULL, "jmp most probably wrong"); + patchable_jump(target(L), true /* force patchable */); + } else { + L.add_patch_at(code(), locator()); + patchable_jump_far(R0, 0); + } +} + +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_li52(AT, (long)obj); + st_d(AT, dst); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_li52(dst, (long)obj); +} + +void MacroAssembler::call(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short call (pc-rel) + bl(offset26(entry)); + } else if (is_simm(offs, 38)) { // Far call (pc-rel) + patchable_jump_far(RA, offs); + } else { // Long call (absolute) + call_long(entry); + } +} + +void MacroAssembler::call(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::none: + call(entry); + break; + case relocInfo::runtime_call_type: + if (!is_simm(entry - pc(), 38)) { + call_long(entry); + break; + } + // fallthrough + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_call(entry); + } + break; + } +} + +void MacroAssembler::call(address entry, RelocationHolder& rh){ + switch (rh.type()) { + case relocInfo::none: + call(entry); + break; + case relocInfo::runtime_call_type: + if (!is_simm(entry - pc(), 38)) { + call_long(entry); + break; + } + // fallthrough + default: + { + InstructionMark im(this); + relocate(rh); + patchable_call(entry); + } + break; + } +} + +void MacroAssembler::call_long(address entry) { + jlong value = (jlong)entry; + lu12i_w(T4, split_low20(value >> 12)); + lu32i_d(T4, split_low20(value >> 32)); + jirl(RA, T4, split_low12(value)); +} + +address MacroAssembler::ic_call(address entry, jint method_index) { + RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); + patchable_li52(IC_Klass, (long)Universe::non_oop_word()); + assert(entry != NULL, "call most probably wrong"); + InstructionMark im(this); + return trampoline_call(AddressLiteral(entry, rh)); +} + +void MacroAssembler::c2bool(Register r) { + sltu(r, R0, r); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + + } + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); +} + + +void MacroAssembler::stop(const char* msg) { + li(A0, (long)msg); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + brk(17); +} + +void MacroAssembler::warn(const char* msg) { + pushad(); + li(A0, (long)msg); + push(S2); + li(AT, -(StackAlignmentInBytes)); + move(S2, SP); // use S2 as a sender SP holder + andr(SP, SP, AT); // align stack as required by ABI + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + move(SP, S2); // use S2 as a sender SP holder + pop(S2); + popad(); +} + +void MacroAssembler::increment(Register reg, int imm) { + if (!imm) return; + if (is_simm(imm, 12)) { + addi_d(reg, reg, imm); + } else { + li(AT, imm); + add_d(reg, reg, AT); + } +} + +void MacroAssembler::decrement(Register reg, int imm) { + increment(reg, -imm); +} + +void MacroAssembler::increment(Address addr, int imm) { + if (!imm) return; + assert(is_simm(imm, 12), "must be"); + ld_ptr(AT, addr); + addi_d(AT, AT, imm); + st_ptr(AT, addr); +} + +void MacroAssembler::decrement(Address addr, int imm) { + increment(addr, -imm); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); + assert(arg_2 != A1, "smashed argument"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); + + // set last Java frame before call + Label before_call; + bind(before_call); + set_last_Java_frame(java_thread, last_java_sp, FP, before_call); + + // do the call + move(A0, java_thread); + call(entry_point, relocInfo::runtime_call_type); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + get_thread(java_thread); +#else +#ifdef ASSERT + { + Label L; + get_thread(AT); + beq(java_thread, AT, L); + stop("MacroAssembler::call_VM_base: TREG not callee saved?"); + bind(L); + } +#endif +#endif + + // discard thread and arguments + ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // reset last Java frame + reset_last_Java_frame(java_thread, false); + + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + Label L; + ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + beq(AT, R0, L); + li(AT, target(before_call)); + push(AT); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + bind(L); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); + st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); + verify_oop(oop_result); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + move(V0, SP); + //we also reserve space for java_thread here + li(AT, -(StackAlignmentInBytes)); + andr(SP, SP, AT); + call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + if (arg_0 != A0) move(A0, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); + call_VM_leaf(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 0); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1) { + if (arg_1 != A0) move(A0, arg_1); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2, + Register arg_3) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any (non-CC) registers + // NOTE: cmpl is plenty here to provoke a segv + ld_w(AT, reg, 0); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::enter() { + push2(RA, FP); + move(FP, SP); +} + +void MacroAssembler::leave() { + move(SP, FP); + pop2(RA, FP); +} + +void MacroAssembler::build_frame(int framesize) { + assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); + assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + if (Assembler::is_simm(-framesize, 12)) { + addi_d(SP, SP, -framesize); + st_ptr(FP, Address(SP, framesize - 2 * wordSize)); + st_ptr(RA, Address(SP, framesize - 1 * wordSize)); + if (PreserveFramePointer) + addi_d(FP, SP, framesize - 2 * wordSize); + } else { + addi_d(SP, SP, -2 * wordSize); + st_ptr(FP, Address(SP, 0 * wordSize)); + st_ptr(RA, Address(SP, 1 * wordSize)); + if (PreserveFramePointer) + move(FP, SP); + li(SCR1, framesize - 2 * wordSize); + sub_d(SP, SP, SCR1); + } +} + +void MacroAssembler::remove_frame(int framesize) { + assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + if (Assembler::is_simm(framesize, 12)) { + ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); + ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); + addi_d(SP, SP, framesize); + } else { + li(SCR1, framesize - 2 * wordSize); + add_d(SP, SP, SCR1); + ld_ptr(FP, Address(SP, 0 * wordSize)); + ld_ptr(RA, Address(SP, 1 * wordSize)); + addi_d(SP, SP, 2 * wordSize); + } +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +void MacroAssembler::get_thread(Register thread) { +#ifdef MINIMIZE_RAM_USAGE + Register tmp; + + if (thread == AT) + tmp = T4; + else + tmp = AT; + + move(thread, SP); + shr(thread, PAGE_SHIFT); + + push(tmp); + li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); + andr(thread, thread, tmp); + shl(thread, Address::times_ptr); // sizeof(Thread *) + li(tmp, (long)ThreadLocalStorage::sp_map_addr()); + add_d(tmp, tmp, thread); + ld_ptr(thread, tmp, 0); + pop(tmp); +#else + if (thread != V0) { + push(V0); + } + pushad_except_v0(); + + push(S5); + move(S5, SP); + li(AT, -StackAlignmentInBytes); + andr(SP, SP, AT); + // TODO: confirm reloc + call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type); + move(SP, S5); + pop(S5); + + popad_except_v0(); + if (thread != V0) { + move(thread, V0); + pop(V0); + } +#endif // MINIMIZE_RAM_USAGE +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T1; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // we must set sp to zero to clear frame + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is possible + // that we need it only for debugging + if(clear_fp) { + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // we must set sp to zero to clear frame + st_d(R0, Address(thread, JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + st_d(R0, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_d(R0, Address(thread, JavaThread::last_Java_pc_offset())); +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + assert_different_registers(AT, tmp); + juint sps = os::get_serialize_page_shift_count(); + juint lsb = sps + 2; + juint msb = sps + log2_uint(os::vm_page_size()) - 1; + bstrpick_w(AT, thread, msb, lsb); + li(tmp, os::get_memory_serialize_page()); + alsl_d(tmp, AT, tmp, Address::times_2 - 1); + st_w(R0, tmp, 0); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); + andi(AT, AT, SafepointMechanism::poll_bit()); + bne(AT, R0, slow_path); + } else { + li(AT, SafepointSynchronize::address_of_state()); + ld_w(AT, AT, 0); + addi_d(AT, AT, -SafepointSynchronize::_not_synchronized); + bne(AT, R0, slow_path); + } +} + +// Just like safepoint_poll, but use an acquiring load for thread- +// local polling. +// +// We need an acquire here to ensure that any subsequent load of the +// global SafepointSynchronize::_state flag is ordered after this load +// of the local Thread::_polling page. We don't want this poll to +// return false (i.e. not safepointing) and a later poll of the global +// SafepointSynchronize::_state spuriously to return true. +// +// This is to avoid a race when we're in a native->Java transition +// racing the code which wakes up from a safepoint. +// +void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); + membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); + andi(AT, AT, SafepointMechanism::poll_bit()); + bne(AT, R0, slow_path); + } else { + safepoint_poll(slow_path, thread_reg); + } +} + +// Calls to C land +// +// When entering C land, the fp, & sp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + Label& last_java_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // last_java_pc + lipc(AT, last_java_pc); + st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + + st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label& last_java_pc) { + set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); +} + + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { +#ifndef OPT_THREAD + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#else + thread = TREG; +#endif + } + + ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); + if (var_size_in_bytes->is_valid()) { + add_d(AT, AT, var_size_in_bytes); + } else { + addi_d(AT, AT, con_size_in_bytes); + } + st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +void MacroAssembler::li(Register rd, jlong value) { + jlong hi12 = bitfield(value, 52, 12); + jlong lo52 = bitfield(value, 0, 52); + + if (hi12 != 0 && lo52 == 0) { + lu52i_d(rd, R0, hi12); + } else { + jlong hi20 = bitfield(value, 32, 20); + jlong lo20 = bitfield(value, 12, 20); + jlong lo12 = bitfield(value, 0, 12); + + if (lo20 == 0) { + ori(rd, R0, lo12); + } else if (bitfield(simm12(lo12), 12, 20) == lo20) { + addi_w(rd, R0, simm12(lo12)); + } else { + lu12i_w(rd, lo20); + if (lo12 != 0) + ori(rd, rd, lo12); + } + if (hi20 != bitfield(simm20(lo20), 20, 20)) + lu32i_d(rd, hi20); + if (hi12 != bitfield(simm20(hi20), 20, 12)) + lu52i_d(rd, rd, hi12); + } +} + +void MacroAssembler::patchable_li52(Register rd, jlong value) { + int count = 0; + + if (value <= max_jint && value >= min_jint) { + if (is_simm(value, 12)) { + addi_d(rd, R0, value); + count++; + } else { + lu12i_w(rd, split_low20(value >> 12)); + count++; + if (split_low12(value)) { + ori(rd, rd, split_low12(value)); + count++; + } + } + } else if (is_simm(value, 52)) { + lu12i_w(rd, split_low20(value >> 12)); + count++; + if (split_low12(value)) { + ori(rd, rd, split_low12(value)); + count++; + } + lu32i_d(rd, split_low20(value >> 32)); + count++; + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 3) { + nop(); + count++; + } +} + +void MacroAssembler::lipc(Register rd, Label& L) { + if (L.is_bound()) { + jint offs = (target(L) - pc()) >> 2; + guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); + pcaddi(rd, offs); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + pcaddi(rd, 0); + } +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + long narrowKlass = (long)Klass::encode_klass(k); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_li52(dst, narrowKlass); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_li52(dst, oop_index); +} + +// ((OopHandle)result).resolve(); +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); +} + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + // get mirror + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ld_ptr(mirror, method, in_bytes(Method::const_offset())); + ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); + ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); + ld_ptr(mirror, mirror, mirror_offset); + resolve_oop_handle(mirror, tmp); +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + + const char * b = NULL; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + + addi_d(SP, SP, -6 * wordSize); + st_ptr(SCR1, Address(SP, 0 * wordSize)); + st_ptr(SCR2, Address(SP, 1 * wordSize)); + st_ptr(RA, Address(SP, 2 * wordSize)); + st_ptr(A0, Address(SP, 3 * wordSize)); + st_ptr(A1, Address(SP, 4 * wordSize)); + + move(A1, reg); + patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions + li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); + ld_ptr(SCR2, Address(SCR2)); + jalr(SCR2); + + ld_ptr(SCR1, Address(SP, 0 * wordSize)); + ld_ptr(SCR2, Address(SP, 1 * wordSize)); + ld_ptr(RA, Address(SP, 2 * wordSize)); + ld_ptr(A0, Address(SP, 3 * wordSize)); + ld_ptr(A1, Address(SP, 4 * wordSize)); + addi_d(SP, SP, 6 * wordSize); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) return; + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + } + + addi_d(SP, SP, -6 * wordSize); + st_ptr(SCR1, Address(SP, 0 * wordSize)); + st_ptr(SCR2, Address(SP, 1 * wordSize)); + st_ptr(RA, Address(SP, 2 * wordSize)); + st_ptr(A0, Address(SP, 3 * wordSize)); + st_ptr(A1, Address(SP, 4 * wordSize)); + + patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions + // addr may contain sp so we will have to adjust it based on the + // pushes that we just did. + if (addr.uses(SP)) { + lea(A1, addr); + ld_ptr(A1, Address(A1, 6 * wordSize)); + } else { + ld_ptr(A1, addr); + } + + // call indirectly to solve generation ordering problem + li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); + ld_ptr(SCR2, Address(SCR2)); + jalr(SCR2); + + ld_ptr(SCR1, Address(SP, 0 * wordSize)); + ld_ptr(SCR2, Address(SP, 1 * wordSize)); + ld_ptr(RA, Address(SP, 2 * wordSize)); + ld_ptr(A0, Address(SP, 3 * wordSize)); + ld_ptr(A1, Address(SP, 4 * wordSize)); + addi_d(SP, SP, 6 * wordSize); +} + +// used registers : SCR1, SCR2 +void MacroAssembler::verify_oop_subroutine() { + // RA: ra + // A0: char* error message + // A1: oop object to verify + Label exit, error; + // increment counter + li(SCR2, (long)StubRoutines::verify_oop_count_addr()); + ld_w(SCR1, SCR2, 0); + addi_d(SCR1, SCR1, 1); + st_w(SCR1, SCR2, 0); + + // make sure object is 'reasonable' + beqz(A1, exit); // if obj is NULL it is ok + + // Check if the oop is in the right area of memory + // const int oop_mask = Universe::verify_oop_mask(); + // const int oop_bits = Universe::verify_oop_bits(); + const uintptr_t oop_mask = Universe::verify_oop_mask(); + const uintptr_t oop_bits = Universe::verify_oop_bits(); + li(SCR1, oop_mask); + andr(SCR2, A1, SCR1); + li(SCR1, oop_bits); + bne(SCR2, SCR1, error); + + // make sure klass is 'reasonable' + // add for compressedoops + load_klass(SCR2, A1); + beqz(SCR2, error); // if klass is NULL it is broken + // return if everything seems ok + bind(exit); + + jr(RA); + + // handle errors + bind(error); + pushad(); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + popad(); + jr(RA); +} + +void MacroAssembler::verify_tlab(Register t1, Register t2) { +#ifdef ASSERT + assert_different_registers(t1, t2, AT); + if (UseTLAB && VerifyOops) { + Label next, ok; + + get_thread(t1); + + ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); + bgeu(t2, AT, next); + + stop("assert(top >= start)"); + + bind(next); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); + bgeu(AT, t2, ok); + + stop("assert(top <= end)"); + + bind(ok); + + } +#endif +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + //TODO: LA + guarantee(0, "LA not implemented yet"); + return RegisterOrConstant(tmp); +} + +void MacroAssembler::hswap(Register reg) { + //short + //andi(reg, reg, 0xffff); + srli_w(AT, reg, 8); + slli_w(reg, reg, 24); + srai_w(reg, reg, 16); + orr(reg, reg, AT); +} + +void MacroAssembler::huswap(Register reg) { + srli_d(AT, reg, 8); + slli_d(reg, reg, 24); + srli_d(reg, reg, 16); + orr(reg, reg, AT); + bstrpick_d(reg, reg, 15, 0); +} + +// something funny to do this will only one more register AT +// 32 bits +void MacroAssembler::swap(Register reg) { + srli_w(AT, reg, 8); + slli_w(reg, reg, 24); + orr(reg, reg, AT); + //reg : 4 1 2 3 + srli_w(AT, AT, 16); + xorr(AT, AT, reg); + andi(AT, AT, 0xff); + //AT : 0 0 0 1^3); + xorr(reg, reg, AT); + //reg : 4 1 2 1 + slli_w(AT, AT, 16); + xorr(reg, reg, AT); + //reg : 4 3 2 1 +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register resflag, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + + bind(again); + ll_d(resflag, addr); + bne(resflag, oldval, fail); + move(resflag, newval); + sc_d(resflag, addr); + beqz(resflag, again); + b(succ); + + bind(fail); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + ll_d(tmp, addr); + bne(tmp, oldval, neq); + move(tmp, newval); + sc_d(tmp, addr); + beqz(tmp, again); + b(succ); + + bind(neq); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) + b(*fail); +} + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, + Register resflag, bool sign, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + + bind(again); + ll_w(resflag, addr); + if (!sign) + lu32i_d(resflag, 0); + bne(resflag, oldval, fail); + move(resflag, newval); + sc_w(resflag, addr); + beqz(resflag, again); + b(succ); + + bind(fail); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + ll_w(tmp, addr); + if (!sign) + lu32i_d(tmp, 0); + bne(tmp, oldval, neq); + move(tmp, newval); + sc_w(tmp, addr); + beqz(tmp, again); + b(succ); + + bind(neq); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) + b(*fail); +} + +// be sure the three register is different +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// be sure the three register is different +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +#ifdef COMPILER2 +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (Obj, Self, box, Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. +// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + +// obj: object to lock +// box: on-stack box address (displaced header location) +// tmp: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label IsInflated, DONE, DONE_SET; + + // Ensure the register assignents are disjoint + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastLock"); + + if (PrintBiasedLockingStatistics) { + atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); + } + + if (EmitSync & 1) { + move(AT, R0); + return; + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); + } + + ld_d(tmpReg, Address(objReg, 0)) ; // fetch markword + ori(tmpReg, tmpReg, 0x1); + st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg + + // Recursive locking + sub_d(tmpReg, tmpReg, SP); + li(AT, (7 - os::vm_page_size() )); + andr(tmpReg, tmpReg, AT); + st_d(tmpReg, Address(boxReg, 0)); + bind(DONE_LABEL) ; + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); + b(fail); + bind(succ); + li(resReg, 1); + b(DONE); + bind(fail); + } + + ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. + andi(AT, tmpReg, markOopDesc::monitor_value); + bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias + + // Attempt stack-locking ... + ori(tmpReg, tmpReg, markOopDesc::unlocked_value); + st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + if (PrintBiasedLockingStatistics) { + Label SUCC, FAIL; + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg + bind(SUCC); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + li(resReg, 1); + b(DONE); + bind(FAIL); + } else { + // If cmpxchg is succ, then scrReg = 1 + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg + } + + // Recursive locking + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + sub_d(tmpReg, tmpReg, SP); + li(AT, 7 - os::vm_page_size()); + andr(tmpReg, tmpReg, AT); + st_d(tmpReg, Address(boxReg, 0)); + + if (PrintBiasedLockingStatistics) { + Label L; + // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ + bnez(tmpReg, L); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + bind(L); + } + + sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 + b(DONE); + + bind(IsInflated); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + // Without cast to int32_t a movptr will destroy r10 which is typically obj + li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); + st_d(AT, Address(boxReg, 0)); + + ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + // if (m->owner != 0) => AT = 0, goto slow path. + move(scrReg, R0); + bnez(AT, DONE_SET); + +#ifndef OPT_THREAD + get_thread(TREG) ; +#endif + // It's inflated and appears unlocked + addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); + cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); + // Intentional fall-through into DONE ... + + bind(DONE_SET); + move(resReg, scrReg); + + // DONE is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE); + // At DONE the resReg is set as follows ... + // Fast_Unlock uses the same protocol. + // resReg == 1 -> Success + // resREg == 0 -> Failure - force control through the slow-path + + // Avoid branch-to-branch on AMD processors + // This appears to be superstition. + if (EmitSync & 32) nop() ; + + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. +// tmp: killed tmp; cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label DONE, DONE_SET, Stacked, Inflated; + + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastUnlock"); + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + move(AT, R0); + return; + } else + if (EmitSync & 8) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // classic stack-locking code ... + ld_d(tmpReg, Address(boxReg, 0)) ; + assert_different_registers(AT, tmpReg); + li(AT, 0x1); + beq(tmpReg, R0, DONE_LABEL) ; + + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + bind(DONE_LABEL); + } else { + Label CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_exit(objReg, tmpReg, succ); + b(fail); + bind(succ); + li(resReg, 1); + b(DONE); + bind(fail); + } + + ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header + sltui(AT, tmpReg, 1); + beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock + + ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword + andi(AT, tmpReg, markOopDesc::monitor_value); + beqz(AT, Stacked); // Inflated? + + bind(Inflated); + // It's inflated. + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). +#ifndef OPT_THREAD + get_thread(TREG); +#endif + + // It's inflated + ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + xorr(scrReg, scrReg, TREG); + + ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); + orr(scrReg, scrReg, AT); + + move(AT, R0); + bnez(scrReg, DONE_SET); + + ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); + ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); + orr(scrReg, scrReg, AT); + + move(AT, R0); + bnez(scrReg, DONE_SET); + + membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); + st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + li(resReg, 1); + b(DONE); + + bind(Stacked); + ld_d(tmpReg, Address(boxReg, 0)); + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + + bind(DONE_SET); + move(resReg, AT); + + if (EmitSync & 65536) { + bind (CheckSucc); + } + + bind(DONE); + + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { nop() ; } + } +} +#endif // COMPILER2 + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + //Unimplemented(); +} + +Register caller_saved_registers[] = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; +Register caller_saved_registers_except_v0[] = {T7, T5, T6, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; + + //TODO: LA +//In LA, F0~23 are all caller-saved registers +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; + +// We preserve all caller-saved register +void MacroAssembler::pushad(){ + int i; + // Fixed-point registers + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +}; + +void MacroAssembler::popad(){ + int i; + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) + { + fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + for (i = 0; i < len; i++) + { + ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); +}; + +// We preserve all caller-saved register except V0 +void MacroAssembler::pushad_except_v0() { + int i; + // Fixed-point registers + int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +} + +void MacroAssembler::popad_except_v0() { + int i; + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) { + fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + for (i = 0; i < len; i++) { + ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); +} + +void MacroAssembler::push2(Register reg1, Register reg2) { + addi_d(SP, SP, -16); + st_d(reg1, SP, 8); + st_d(reg2, SP, 0); +} + +void MacroAssembler::pop2(Register reg1, Register reg2) { + ld_d(reg1, SP, 8); + ld_d(reg2, SP, 0); + addi_d(SP, SP, 16); +} + +void MacroAssembler::push(unsigned int bitset) { + unsigned char regs[31]; + int count = 0; + + bitset >>= 1; + for (int reg = 1; reg < 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + + addi_d(SP, SP, -align_up(count, 2) * wordSize); + for (int i = 0; i < count; i ++) + st_d(as_Register(regs[i]), SP, i * wordSize); +} + +void MacroAssembler::pop(unsigned int bitset) { + unsigned char regs[31]; + int count = 0; + + bitset >>= 1; + for (int reg = 1; reg < 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + + for (int i = 0; i < count; i ++) + ld_d(as_Register(regs[i]), SP, i * wordSize); + addi_d(SP, SP, align_up(count, 2) * wordSize); +} + +// for UseCompressedOops Option +void MacroAssembler::load_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ld_d(dst, src, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::store_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + encode_klass_not_null(src); + st_w(src, dst, oopDesc::klass_offset_in_bytes()); + } else { + st_d(src, dst, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld_d(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp2) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); + } else { + bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +// Doesn't do verfication, generates fixed size code +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register tmp2, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +} +#endif + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + return; + } + + sub_d(AT, r, S5_heapbase); + maskeqz(r, AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(src, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + + sub_d(AT, src, S5_heapbase); + maskeqz(dst, AT, src); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(r, R0, ok); + stop("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + sub_d(r, r, S5_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(src, R0, ok); + stop("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + + sub_d(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::decode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + return; + } + + move(AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + shl(r, LogMinObjAlignmentInBytes); + add_d(r, r, S5_heapbase); + } + } else { + add_d(r, r, S5_heapbase); + } + maskeqz(r, r, AT); + verify_oop(r, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + slli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + + Register cond; + if (dst == src) { + cond = AT; + move(cond, src); + } else { + cond = src; + } + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + add_d(dst, dst, S5_heapbase); + } + } else { + add_d(dst, src, S5_heapbase); + } + maskeqz(dst, dst, cond); + verify_oop(dst, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + shl(r, LogMinObjAlignmentInBytes); + add_d(r, r, S5_heapbase); + } + } else { + shl(r, LogMinObjAlignmentInBytes); + } + } else { + assert(Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + add_d(dst, dst, S5_heapbase); + } + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + move(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + bstrpick_d(r, r, 31, 0); + return; + } + assert(r != AT, "Encoding a klass in AT"); + li(AT, (int64_t)Universe::narrow_klass_base()); + sub_d(r, r, AT); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(r, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + } else { + if (Universe::narrow_klass_base() != NULL) { + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + bstrpick_d(dst, src, 31, 0); + return; + } + li(dst, (int64_t)Universe::narrow_klass_base()); + sub_d(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(dst, LogKlassAlignmentInBytes); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli_d(dst, src, LogKlassAlignmentInBytes); + } else { + move(dst, src); + } + } + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(r != AT, "Decoding a klass in AT"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + if (Universe::narrow_klass_shift() == 0) { + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { + lu32i_d(r, (uint64_t)Universe::narrow_klass_base() >> 32); + } else { + li(AT, (int64_t)Universe::narrow_klass_base()); + add_d(r, r, AT); + } + } else { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + li(AT, (int64_t)Universe::narrow_klass_base()); + alsl_d(r, r, AT, Address::times_8 - 1); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shl(r, LogKlassAlignmentInBytes); + } + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + if (dst == src) { + decode_klass_not_null(dst); + } else { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + if (Universe::narrow_klass_shift() == 0) { + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { + move(dst, src); + lu32i_d(dst, (uint64_t)Universe::narrow_klass_base() >> 32); + } else { + li(dst, (int64_t)Universe::narrow_klass_base()); + add_d(dst, dst, src); + } + } else { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + li(dst, (int64_t)Universe::narrow_klass_base()); + alsl_d(dst, src, dst, Address::times_8 - 1); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + slli_d(dst, src, LogKlassAlignmentInBytes); + } else { + move(dst, src); + } + } + } +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + if (Universe::narrow_oop_base() == NULL) { + move(S5_heapbase, R0); + } else { + li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); + } + } else { + li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); + ld_d(S5_heapbase, S5_heapbase, 0); + } + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { +//implement ind gen_subtype_check + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + // Check the supertype display: + if (must_load_sco) { + ld_wu(temp_reg, super_klass, sco_offset); + super_check_offset = RegisterOrConstant(temp_reg); + } + slli_d(AT, super_check_offset.register_or_noreg(), Address::times_1); + add_d(AT, sub_klass, AT); + ld_d(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + beq(super_klass, AT, *L_success); + addi_d(AT, super_check_offset.as_register(), -sc_offset); + if (L_failure == &L_fallthrough) { + beq(AT, R0, *L_slow_path); + } else { + bne_far(AT, R0, *L_failure); + b(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + beq(super_klass, AT, *L_success); + } else { + bne(super_klass, AT, *L_slow_path); + b(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + beq(super_klass, AT, *L_success); + } else { + bne_far(super_klass, AT, *L_failure); + b(*L_success); + } + } + + bind(L_fallthrough); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (temp2_reg == noreg) + temp2_reg = TSR; + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); +#endif //PRODUCT + + // We will consult the secondary-super array. + ld_d(temp_reg, secondary_supers_addr); + // Load the array length. + ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); + // Skip to start of data. + addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); + + Label Loop, subtype; + bind(Loop); + beq(temp2_reg, R0, *L_failure); + ld_d(AT, temp_reg, 0); + addi_d(temp_reg, temp_reg, 1 * wordSize); + beq(AT, super_klass, subtype); + addi_d(temp2_reg, temp2_reg, -1); + b(Loop); + + bind(subtype); + st_d(super_klass, super_cache_addr); + if (L_success != &L_fallthrough) { + b(*L_success); + } + + // Success. Cache the super we found and proceed in triumph. +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = NOREG; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times_8; + } + // We don't push RA on stack in prepare_invoke. + // offset += wordSize; // return PC is on stack + if(scale_reg==NOREG) return Address(SP, offset); + else { + alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); + return Address(scale_reg, offset); + } +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld_d(dst, src); break; + case 4: ld_w(dst, src); break; + case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; + case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: st_d(src, dst); break; + case 4: st_w(src, dst); break; + case 2: st_h(src, dst); break; + case 1: st_b(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp, AT); + assert_different_registers(method_result, intf_klass, scan_temp, AT); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); + + // %%% Could store the aligned, prescaled offset in the klassoop. + alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); + addi_d(scan_temp, scan_temp, vtable_base); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + if (itable_index.is_constant()) { + li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off); + add_d(recv_klass, recv_klass, AT); + } else { + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); + addi_d(recv_klass, AT, itentry_off); + } + } + + Label search, found_method; + + ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + beq(intf_klass, method_result, found_method); + + bind(search); + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + beqz(method_result, L_no_such_interface); + addi_d(scan_temp, scan_temp, scan_step); + ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + bne(intf_klass, method_result, search); + + bind(found_method); + if (return_method) { + // Got a hit. + ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + ldx_d(method_result, recv_klass, scan_temp); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + + if (vtable_index.is_constant()) { + li(AT, vtable_index.as_constant()); + alsl_d(AT, AT, recv_klass, Address::times_ptr - 1); + } else { + alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1); + } + + ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes()); +} + +#ifdef COMPILER2 +// Compare strings, used for char[] and byte[]. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + int ae) { + Label L, Loop, haveResult, done; + + bool isLL = ae == StrIntrinsicNode::LL; + bool isLU = ae == StrIntrinsicNode::LU; + bool isUL = ae == StrIntrinsicNode::UL; + + bool str1_isL = isLL || isLU; + bool str2_isL = isLL || isUL; + + if (!str1_isL) srli_w(cnt1, cnt1, 1); + if (!str2_isL) srli_w(cnt2, cnt2, 1); + + // compute the and difference of lengths (in result) + sub_d(result, cnt1, cnt2); // result holds the difference of two lengths + + // compute the shorter length (in cnt1) + bge(cnt2, cnt1, Loop); + move(cnt1, cnt2); + + // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register + bind(Loop); // Loop begin + if (str1_isL) { + ld_bu(AT, str1, 0); + } else { + ld_hu(AT, str1, 0); + } + beq(cnt1, R0, done); + + // compare current character + if (str2_isL) { + ld_bu(cnt2, str2, 0); + } else { + ld_hu(cnt2, str2, 0); + } + addi_d(str1, str1, str1_isL ? 1 : 2); + bne(AT, cnt2, haveResult); + addi_d(str2, str2, str2_isL ? 1 : 2); + addi_d(cnt1, cnt1, -1); + b(Loop); + + bind(haveResult); + sub_d(result, AT, cnt2); + + bind(done); +} + +// Compare char[] or byte[] arrays or substrings. +void MacroAssembler::arrays_equals(Register str1, Register str2, + Register cnt, Register tmp1, Register tmp2, Register result, + bool is_char) { + Label Loop, LoopEnd, True, False; + + addi_d(result, R0, 1); + beq(str1, str2, True); // same char[] ? + beqz(cnt, True); + + addi_d(AT, R0, is_char ? wordSize/2 : wordSize); + bind(Loop); + blt(cnt, AT, LoopEnd); + ld_d(tmp1, str1, 0); + ld_d(tmp2, str2, 0); + bne(tmp1, tmp2, False); + addi_d(str1, str1, 8); + addi_d(str2, str2, 8); + addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize); + b(Loop); + + bind(LoopEnd); + beqz(cnt, True); + // compare current character + if (is_char) { + ld_hu(tmp1, str1, 0); + ld_hu(tmp2, str2, 0); + } else { + ld_bu(tmp1, str1, 0); + ld_bu(tmp2, str2, 0); + } + bne(tmp1, tmp2, False); + addi_d(str1, str1, is_char ? 2 : 1); + addi_d(str2, str2, is_char ? 2 : 1); + addi_d(cnt, cnt, -1); + b(LoopEnd); + + bind(False); + addi_d(result, R0, 0); + + bind(True); +} +#endif // COMPILER2 + +void MacroAssembler::load_byte_map_base(Register reg) { + jbyte *byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + + // Strictly speaking the byte_map_base isn't an address at all, and it might + // even be negative. It is thus materialised as a constant. + li(reg, (uint64_t)byte_map_base); +} + +// This method checks if provided byte array contains byte with highest bit set. +void MacroAssembler::has_negatives(Register ary1, Register len, Register result) { + Label Loop, End, Nega, Done; + + orr(result, R0, R0); + bge(R0, len, Done); + + li(AT, 0x8080808080808080); + + addi_d(len, len, -8); + blt(len, R0, End); + + bind(Loop); + ld_d(result, ary1, 0); + andr(result, result, AT); + bnez(result, Nega); + beqz(len, Done); + addi_d(len, len, -8); + addi_d(ary1, ary1, 8); + bge(len, R0, Loop); + + bind(End); + ld_d(result, ary1, 0); + slli_d(len, len, 3); + sub_d(len, R0, len); + sll_d(result, result, len); + andr(result, result, AT); + beqz(result, Done); + + bind(Nega); + ori(result, R0, 1); + + bind(Done); +} + +// Compress char[] to byte[]. len must be positive int. +// jtreg: TestStringIntrinsicRangeChecks.java +void MacroAssembler::char_array_compress(Register src, Register dst, + Register len, Register result, + Register tmp1, Register tmp2, + Register tmp3) { + Label Loop, Done, Once, Fail; + + move(result, len); + bge(R0, result, Done); + + srli_w(AT, len, 2); + andi(len, len, 3); + + li(tmp3, 0xff00ff00ff00ff00); + + bind(Loop); + beqz(AT, Once); + ld_d(tmp1, src, 0); + andr(tmp2, tmp3, tmp1); // not latin-1, stop here + bnez(tmp2, Fail); + + // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 + srli_d(tmp2, tmp1, 8); + orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 + bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 + slli_d(tmp1, tmp1, 16); // 0xa1b20000 + bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 + + st_w(tmp1, dst, 0); + addi_w(AT, AT, -1); + addi_d(dst, dst, 4); + addi_d(src, src, 8); + b(Loop); + + bind(Once); + beqz(len, Done); + ld_d(AT, src, 0); + + bstrpick_d(tmp1, AT, 15, 0); + andr(tmp2, tmp3, tmp1); + bnez(tmp2, Fail); + st_b(tmp1, dst, 0); + addi_w(len, len, -1); + + beqz(len, Done); + bstrpick_d(tmp1, AT, 31, 16); + andr(tmp2, tmp3, tmp1); + bnez(tmp2, Fail); + st_b(tmp1, dst, 1); + addi_w(len, len, -1); + + beqz(len, Done); + bstrpick_d(tmp1, AT, 47, 32); + andr(tmp2, tmp3, tmp1); + bnez(tmp2, Fail); + st_b(tmp1, dst, 2); + b(Done); + + bind(Fail); + move(result, R0); + + bind(Done); +} + +// Inflate byte[] to char[]. len must be positive int. +// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java +void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, + Register tmp1, Register tmp2) { + Label Loop, Once, Done; + + bge(R0, len, Done); + + srli_w(AT, len, 2); + andi(len, len, 3); + + bind(Loop); + beqz(AT, Once); + ld_wu(tmp1, src, 0); + + // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4 + bstrpick_d(tmp2, tmp1, 7, 0); + srli_d(tmp1, tmp1, 8); + bstrins_d(tmp2, tmp1, 23, 16); + srli_d(tmp1, tmp1, 8); + bstrins_d(tmp2, tmp1, 39, 32); + srli_d(tmp1, tmp1, 8); + bstrins_d(tmp2, tmp1, 55, 48); + + st_d(tmp2, dst, 0); + addi_w(AT, AT, -1); + addi_d(dst, dst, 8); + addi_d(src, src, 4); + b(Loop); + + bind(Once); + beqz(len, Done); + ld_wu(tmp1, src, 0); + + bstrpick_d(tmp2, tmp1, 7, 0); + st_h(tmp2, dst, 0); + addi_w(len, len, -1); + + beqz(len, Done); + bstrpick_d(tmp2, tmp1, 15, 8); + st_h(tmp2, dst, 2); + addi_w(len, len, -1); + + beqz(len, Done); + bstrpick_d(tmp2, tmp1, 23, 16); + st_h(tmp2, dst, 4); + + bind(Done); +} + +void MacroAssembler::string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3) +{ + Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE; + + beqz(cnt1, NOMATCH); + + move(result, R0); + ori(tmp1, R0, 4); + blt(cnt1, tmp1, DO1_LOOP); + + // UTF-16 char occupies 16 bits + // ch -> chchchch + bstrins_d(ch, ch, 31, 16); + bstrins_d(ch, ch, 63, 32); + + li(tmp2, 0x0001000100010001); + li(tmp3, 0x7fff7fff7fff7fff); + + bind(CH1_LOOP); + ld_d(AT, str1, 0); + xorr(AT, ch, AT); + sub_d(tmp1, AT, tmp2); + orr(AT, AT, tmp3); + andn(tmp1, tmp1, AT); + bnez(tmp1, HAS_ZERO); + addi_d(str1, str1, 8); + addi_d(result, result, 4); + + // meet the end of string + beq(cnt1, result, NOMATCH); + + addi_d(tmp1, result, 4); + bge(tmp1, cnt1, DO1_SHORT); + b(CH1_LOOP); + + bind(HAS_ZERO); + ctz_d(tmp1, tmp1); + srli_d(tmp1, tmp1, 4); + add_d(result, result, tmp1); + b(DONE); + + // restore ch + bind(DO1_SHORT); + bstrpick_d(ch, ch, 15, 0); + + bind(DO1_LOOP); + ld_hu(tmp1, str1, 0); + beq(ch, tmp1, DONE); + addi_d(str1, str1, 2); + addi_d(result, result, 1); + blt(result, cnt1, DO1_LOOP); + + bind(NOMATCH); + addi_d(result, R0, -1); + + bind(DONE); +} + +void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { + const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); + STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code + // The inverted mask is sign-extended + li(AT, inverted_jweak_mask); + andr(possibly_jweak, AT, possibly_jweak); +} + +void MacroAssembler::resolve_jobject(Register value, + Register thread, + Register tmp) { + assert_different_registers(value, thread, tmp); + Label done, not_weak; + beq(value, R0, done); // Use NULL as-is. + li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. + andr(AT, value, AT); + beq(AT, R0, not_weak); + // Resolve jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); + verify_oop(value); + b(done); + bind(not_weak); + // Resolve (untagged) jobject. + access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); + verify_oop(value); + bind(done); +} + +void MacroAssembler::lea(Register rd, Address src) { + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index == noreg) { + if (is_simm(disp, 12)) { + addi_d(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(dst, base, AT); + } + } else { + if (scale == 0) { + if (is_simm(disp, 12)) { + add_d(AT, base, index); + addi_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(AT, base, AT); + add_d(dst, AT, index); + } + } else { + if (is_simm(disp, 12)) { + alsl_d(AT, index, base, scale - 1); + addi_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(AT, AT, base); + alsl_d(dst, index, AT, scale - 1); + } + } + } +} + +void MacroAssembler::lea(Register dst, AddressLiteral adr) { + code_section()->relocate(pc(), adr.rspec()); + pcaddi(dst, (adr.target() - pc()) >> 2); +} + +int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int v = (dest_pos - inst_pos) >> 2; + switch(high(inst, 6)) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + assert(is_simm16(v), "must be simm16"); +#ifndef PRODUCT + if(!is_simm16(v)) + { + tty->print_cr("must be simm16"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc0003ff; + inst |= ((v & 0xffff) << 10); + break; + case beqz_op: + case bnez_op: + case bccondz_op: + assert(is_simm(v, 21), "must be simm21"); +#ifndef PRODUCT + if(!is_simm(v, 21)) + { + tty->print_cr("must be simm21"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc0003e0; + inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); + break; + case b_op: + case bl_op: + assert(is_simm(v, 26), "must be simm26"); +#ifndef PRODUCT + if(!is_simm(v, 26)) + { + tty->print_cr("must be simm26"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc000000; + inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); + break; + default: + ShouldNotReachHere(); + break; + } + return inst; +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + Register dst, + Register src1, + Register src2, + CMCompare cmp, + bool is_signed) { + switch (cmp) { + case EQ: + sub_d(AT, op1, op2); + if (dst == src2) { + masknez(dst, src2, AT); + maskeqz(AT, src1, AT); + } else { + maskeqz(dst, src1, AT); + masknez(AT, src2, AT); + } + break; + + case NE: + sub_d(AT, op1, op2); + if (dst == src2) { + maskeqz(dst, src2, AT); + masknez(AT, src1, AT); + } else { + masknez(dst, src1, AT); + maskeqz(AT, src2, AT); + } + break; + + case GT: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + if(dst == src2) { + maskeqz(dst, src2, AT); + masknez(AT, src1, AT); + } else { + masknez(dst, src1, AT); + maskeqz(AT, src2, AT); + } + break; + case GE: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + if(dst == src2) { + masknez(dst, src2, AT); + maskeqz(AT, src1, AT); + } else { + maskeqz(dst, src1, AT); + masknez(AT, src2, AT); + } + break; + + case LT: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + if(dst == src2) { + maskeqz(dst, src2, AT); + masknez(AT, src1, AT); + } else { + masknez(dst, src1, AT); + maskeqz(AT, src2, AT); + } + break; + case LE: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + if(dst == src2) { + masknez(dst, src2, AT); + maskeqz(AT, src1, AT); + } else { + maskeqz(dst, src1, AT); + masknez(AT, src2, AT); + } + break; + default: + Unimplemented(); + } + OR(dst, dst, AT); +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp, + bool is_signed) { + switch (cmp) { + case EQ: + sub_d(AT, op1, op2); + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + case NE: + sub_d(AT, op1, op2); + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case GT: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case GE: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + case LT: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case LE: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + default: + Unimplemented(); + } + OR(dst, dst, AT); +} + + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp, + bool is_float) { + movgr2fr_d(tmp1, dst); + movgr2fr_d(tmp2, src); + + switch(cmp) { + case EQ: + if (is_float) { + fcmp_ceq_s(FCC0, op1, op2); + } else { + fcmp_ceq_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + case NE: + if (is_float) { + fcmp_ceq_s(FCC0, op1, op2); + } else { + fcmp_ceq_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case GT: + if (is_float) { + fcmp_cule_s(FCC0, op1, op2); + } else { + fcmp_cule_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case GE: + if (is_float) { + fcmp_cult_s(FCC0, op1, op2); + } else { + fcmp_cult_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case LT: + if (is_float) { + fcmp_cult_s(FCC0, op1, op2); + } else { + fcmp_cult_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + case LE: + if (is_float) { + fcmp_cule_s(FCC0, op1, op2); + } else { + fcmp_cule_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + default: + Unimplemented(); + } + + movfr2gr_d(dst, tmp1); +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (!is_float) { + fcmp_ceq_d(FCC0, op1, op2); + } else { + fcmp_ceq_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + case NE: + if (!is_float) { + fcmp_ceq_d(FCC0, op1, op2); + } else { + fcmp_ceq_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case GT: + if (!is_float) { + fcmp_cule_d(FCC0, op1, op2); + } else { + fcmp_cule_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case GE: + if (!is_float) { + fcmp_cult_d(FCC0, op1, op2); + } else { + fcmp_cult_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case LT: + if (!is_float) { + fcmp_cult_d(FCC0, op1, op2); + } else { + fcmp_cult_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + case LE: + if (!is_float) { + fcmp_cule_d(FCC0, op1, op2); + } else { + fcmp_cule_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp) { + movgr2fr_w(tmp1, R0); + + switch (cmp) { + case EQ: + sub_d(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + case NE: + sub_d(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case GT: + slt(AT, op2, op1); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case GE: + slt(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + case LT: + slt(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case LE: + slt(AT, op2, op1); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { + switch (type) { + case STORE_BYTE: st_b (reg, base, disp); break; + case STORE_CHAR: + case STORE_SHORT: st_h (reg, base, disp); break; + case STORE_INT: st_w (reg, base, disp); break; + case STORE_LONG: st_d (reg, base, disp); break; + case LOAD_BYTE: ld_b (reg, base, disp); break; + case LOAD_U_BYTE: ld_bu(reg, base, disp); break; + case LOAD_SHORT: ld_h (reg, base, disp); break; + case LOAD_U_SHORT: ld_hu(reg, base, disp); break; + case LOAD_INT: ld_w (reg, base, disp); break; + case LOAD_U_INT: ld_wu(reg, base, disp); break; + case LOAD_LONG: ld_d (reg, base, disp); break; + case LOAD_LINKED_LONG: + ll_d(reg, base, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { + switch (type) { + case STORE_BYTE: stx_b (reg, base, disp); break; + case STORE_CHAR: + case STORE_SHORT: stx_h (reg, base, disp); break; + case STORE_INT: stx_w (reg, base, disp); break; + case STORE_LONG: stx_d (reg, base, disp); break; + case LOAD_BYTE: ldx_b (reg, base, disp); break; + case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; + case LOAD_SHORT: ldx_h (reg, base, disp); break; + case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; + case LOAD_INT: ldx_w (reg, base, disp); break; + case LOAD_U_INT: ldx_wu(reg, base, disp); break; + case LOAD_LONG: ldx_d (reg, base, disp); break; + case LOAD_LINKED_LONG: + add_d(AT, base, disp); + ll_d(reg, AT, 0); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { + switch (type) { + case STORE_FLOAT: fst_s(reg, base, disp); break; + case STORE_DOUBLE: fst_d(reg, base, disp); break; + case STORE_VECTORX: vst (reg, base, disp); break; + case STORE_VECTORY: xvst (reg, base, disp); break; + case LOAD_FLOAT: fld_s(reg, base, disp); break; + case LOAD_DOUBLE: fld_d(reg, base, disp); break; + case LOAD_VECTORX: vld (reg, base, disp); break; + case LOAD_VECTORY: xvld (reg, base, disp); break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { + switch (type) { + case STORE_FLOAT: fstx_s(reg, base, disp); break; + case STORE_DOUBLE: fstx_d(reg, base, disp); break; + case STORE_VECTORX: vstx (reg, base, disp); break; + case STORE_VECTORY: xvstx (reg, base, disp); break; + case LOAD_FLOAT: fldx_s(reg, base, disp); break; + case LOAD_DOUBLE: fldx_d(reg, base, disp); break; + case LOAD_VECTORX: vldx (reg, base, disp); break; + case LOAD_VECTORY: xvldx (reg, base, disp); break; + default: + ShouldNotReachHere(); + } +} + +#ifdef COMPILER2 +void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) { + switch (type) { + case T_BYTE: + switch (opcode) { + case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break; + case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break; + case Op_MaxReductionV: vmax_b(vec1, vec2, vec3); break; + case Op_MinReductionV: vmin_b(vec1, vec2, vec3); break; + default: + ShouldNotReachHere(); + } + break; + case T_SHORT: + switch (opcode) { + case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break; + case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break; + case Op_MaxReductionV: vmax_h(vec1, vec2, vec3); break; + case Op_MinReductionV: vmin_h(vec1, vec2, vec3); break; + default: + ShouldNotReachHere(); + } + break; + case T_INT: + switch (opcode) { + case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break; + case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break; + case Op_MaxReductionV: vmax_w(vec1, vec2, vec3); break; + case Op_MinReductionV: vmin_w(vec1, vec2, vec3); break; + default: + ShouldNotReachHere(); + } + break; + case T_LONG: + switch (opcode) { + case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break; + case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break; + case Op_MaxReductionV: vmax_d(vec1, vec2, vec3); break; + case Op_MinReductionV: vmin_d(vec1, vec2, vec3); break; + default: + ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) { + switch (type) { + case T_BYTE: + case T_SHORT: + case T_INT: + switch (opcode) { + case Op_AddReductionVI: add_w(reg1, reg2, reg3); break; + case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break; + default: + ShouldNotReachHere(); + } + break; + case T_LONG: + switch (opcode) { + case Op_AddReductionVL: add_d(reg1, reg2, reg3); break; + case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break; + default: + ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) { + switch (type) { + case T_FLOAT: + switch (opcode) { + case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break; + case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break; + default: + ShouldNotReachHere(); + } + break; + case T_DOUBLE: + switch (opcode) { + case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break; + case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break; + default: + ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) { + if (vector_size == 32) { + xvpermi_d(tmp1, vsrc, 0b00001110); + reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); + vpermi_w(tmp2, tmp1, 0b00001110); + reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); + } else if (vector_size == 16) { + vpermi_w(tmp1, vsrc, 0b00001110); + reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); + } else { + ShouldNotReachHere(); + } + + if (type != T_LONG) { + vshuf4i_w(tmp2, tmp1, 0b00000001); + reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); + if (type != T_INT) { + vshuf4i_h(tmp2, tmp1, 0b00000001); + reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); + if (type != T_SHORT) { + vshuf4i_b(tmp2, tmp1, 0b00000001); + reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); + } + } + } + + switch (type) { + case T_BYTE: vpickve2gr_b(dst, tmp1, 0); break; + case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break; + case T_INT: vpickve2gr_w(dst, tmp1, 0); break; + case T_LONG: vpickve2gr_d(dst, tmp1, 0); break; + default: + ShouldNotReachHere(); + } + if (opcode == Op_MaxReductionV) { + slt(AT, dst, src); + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + orr(dst, dst, AT); + } else if (opcode == Op_MinReductionV) { + slt(AT, src, dst); + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + orr(dst, dst, AT); + } else { + reduce_ins_r(dst, dst, src, type, opcode); + } + switch (type) { + case T_BYTE: ext_w_b(dst, dst); break; + case T_SHORT: ext_w_h(dst, dst); break; + default: + break; + } +} + +void MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) { + if (vector_size == 32) { + switch (type) { + case T_FLOAT: + reduce_ins_f(dst, vsrc, src, type, opcode); + xvpickve_w(tmp, vsrc, 1); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 2); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 3); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 4); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 5); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 6); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_w(tmp, vsrc, 7); + reduce_ins_f(dst, tmp, dst, type, opcode); + break; + case T_DOUBLE: + reduce_ins_f(dst, vsrc, src, type, opcode); + xvpickve_d(tmp, vsrc, 1); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_d(tmp, vsrc, 2); + reduce_ins_f(dst, tmp, dst, type, opcode); + xvpickve_d(tmp, vsrc, 3); + reduce_ins_f(dst, tmp, dst, type, opcode); + break; + default: + ShouldNotReachHere(); + } + } else if (vector_size == 16) { + switch (type) { + case T_FLOAT: + reduce_ins_f(dst, vsrc, src, type, opcode); + vpermi_w(tmp, vsrc, 0b00000001); + reduce_ins_f(dst, tmp, dst, type, opcode); + vpermi_w(tmp, vsrc, 0b00000010); + reduce_ins_f(dst, tmp, dst, type, opcode); + vpermi_w(tmp, vsrc, 0b00000011); + reduce_ins_f(dst, tmp, dst, type, opcode); + break; + case T_DOUBLE: + reduce_ins_f(dst, vsrc, src, type, opcode); + vpermi_w(tmp, vsrc, 0b00001110); + reduce_ins_f(dst, tmp, dst, type, opcode); + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} +#endif // COMPILER2 + +/** + * Emits code to update CRC-32 with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); +**/ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xorr(val, val, crc); + andi(val, val, 0xff); + ld_w(val, Address(table, val, Address::times_4, 0)); + srli_w(crc, crc, 8); + xorr(crc, val, crc); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param tmp scratch register +**/ +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { + Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; + assert_different_registers(crc, buf, len, tmp); + + nor(crc, crc, R0); + + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by64_loop); + ld_d(tmp, buf, 0); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 8); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 16); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 24); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 32); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 40); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 48); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 56); + crc_w_d_w(crc, tmp, crc); + addi_d(buf, buf, 64); + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by4_loop); + ld_w(tmp, buf, 0); + crc_w_w_w(crc, tmp, crc); + addi_d(buf, buf, 4); + addi_d(len, len, -4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + bge(R0, len, L_exit); + + bind(CRC_by1_loop); + ld_b(tmp, buf, 0); + crc_w_b_w(crc, tmp, crc); + addi_d(buf, buf, 1); + addi_d(len, len, -1); + blt(R0, len, CRC_by1_loop); + + bind(L_exit); + nor(crc, crc, R0); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param tmp scratch register +**/ +void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { + Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; + assert_different_registers(crc, buf, len, tmp); + + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by64_loop); + ld_d(tmp, buf, 0); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 8); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 16); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 24); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 32); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 40); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 48); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 56); + crcc_w_d_w(crc, tmp, crc); + addi_d(buf, buf, 64); + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by4_loop); + ld_w(tmp, buf, 0); + crcc_w_w_w(crc, tmp, crc); + addi_d(buf, buf, 4); + addi_d(len, len, -4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + bge(R0, len, L_exit); + + bind(CRC_by1_loop); + ld_b(tmp, buf, 0); + crcc_w_b_w(crc, tmp, crc); + addi_d(buf, buf, 1); + addi_d(len, len, -1); + blt(R0, len, CRC_by1_loop); + + bind(L_exit); +} + +#ifdef COMPILER2 +void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) { + + switch(flag) { + case 0x01: //equal + beq(op1, op2, L); + break; + case 0x02: //not_equal + bne(op1, op2, L); + break; + case 0x03: //above + if (is_signed) + blt(op2, op1, L); + else + bltu(op2, op1, L); + break; + case 0x04: //above_equal + if (is_signed) + bge(op1, op2, L); + else + bgeu(op1, op2, L); + break; + case 0x05: //below + if (is_signed) + blt(op1, op2, L); + else + bltu(op1, op2, L); + break; + case 0x06: //below_equal + if (is_signed) + bge(op2, op1, L); + else + bgeu(op2, op1, L); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) { + switch(flag) { + case 0x01: //equal + beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + bne_long(op1, op2, *L); + break; + case 0x03: //above + if (is_signed) + blt_long(op2, op1, *L, true /* signed */); + else + blt_long(op2, op1, *L, false); + break; + case 0x04: //above_equal + if (is_signed) + bge_long(op1, op2, *L, true /* signed */); + else + bge_long(op1, op2, *L, false); + break; + case 0x05: //below + if (is_signed) + blt_long(op1, op2, *L, true /* signed */); + else + blt_long(op1, op2, *L, false); + break; + case 0x06: //below_equal + if (is_signed) + bge_long(op2, op1, *L, true /* signed */); + else + bge_long(op2, op1, *L, false); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) { + switch(flag) { + case 0x01: //equal + beqz(op1, L); + break; + case 0x02: //not_equal + bnez(op1, L); + break; + default: + Unimplemented(); + } +} +#endif // COMPILER2 + +void MacroAssembler::membar(Membar_mask_bits hint){ + address prev = pc() - NativeInstruction::sync_instruction_size; + address last = code()->last_insn(); + if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) { + code()->set_last_insn(NULL); + NativeMembar *membar = (NativeMembar*)prev; + // merged membar + // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore + membar->set_hint(membar->get_hint() & (~hint & 0xF)); + block_comment("merged membar"); + } else { + code()->set_last_insn(pc()); + Assembler::membar(hint); + } +} + +// Code for BigInteger::mulAdd intrinsic +// out = A0 +// in = A1 +// offset = A2 (already out.length-offset) +// len = A3 +// k = A4 +// +// pseudo code from java implementation: +// long kLong = k & LONG_MASK; +// carry = 0; +// offset = out.length-offset - 1; +// for (int j = len - 1; j >= 0; j--) { +// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; +void MacroAssembler::mul_add(Register out, Register in, Register offset, + Register len, Register k) { + Label L_tail_loop, L_unroll, L_end; + + move(SCR2, out); + move(out, R0); // should clear out + bge(R0, len, L_end); + + alsl_d(offset, offset, SCR2, LogBytesPerInt - 1); + alsl_d(in, len, in, LogBytesPerInt - 1); + + const int unroll = 16; + li(SCR2, unroll); + blt(len, SCR2, L_tail_loop); + + bind(L_unroll); + + addi_d(in, in, -unroll * BytesPerInt); + addi_d(offset, offset, -unroll * BytesPerInt); + + for (int i = unroll - 1; i >= 0; i--) { + ld_wu(SCR1, in, i * BytesPerInt); + mulw_d_wu(SCR1, SCR1, k); + add_d(out, out, SCR1); // out as scratch + ld_wu(SCR1, offset, i * BytesPerInt); + add_d(SCR1, SCR1, out); + st_w(SCR1, offset, i * BytesPerInt); + srli_d(out, SCR1, 32); // keep carry + } + + sub_w(len, len, SCR2); + bge(len, SCR2, L_unroll); + + bge(R0, len, L_end); // check tail + + bind(L_tail_loop); + + addi_d(in, in, -BytesPerInt); + ld_wu(SCR1, in, 0); + mulw_d_wu(SCR1, SCR1, k); + add_d(out, out, SCR1); // out as scratch + + addi_d(offset, offset, -BytesPerInt); + ld_wu(SCR1, offset, 0); + add_d(SCR1, SCR1, out); + st_w(SCR1, offset, 0); + + srli_d(out, SCR1, 32); // keep carry + + addi_w(len, len, -1); + blt(R0, len, L_tail_loop); + + bind(L_end); +} + diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp new file mode 100644 index 00000000000..1f96557543b --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp @@ -0,0 +1,825 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "runtime/rtmLocking.hpp" +#include "utilities/macros.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class Runtime1; // as_Address() + + public: + // Compare code + typedef enum { + EQ = 0x01, + NE = 0x02, + GT = 0x03, + GE = 0x04, + LT = 0x05, + LE = 0x06 + } CMCompare; + + protected: + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + #define VIRTUAL virtual + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than sp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // helpers for FPU flag access + // tmp is a temporary register, if none is available use noreg + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr); + + static intptr_t i[32]; + static float f[32]; + static void print(outputStream *s); + + static int i_offset(unsigned int k); + static int f_offset(unsigned int k); + + static void save_registers(MacroAssembler *masm); + static void restore_registers(MacroAssembler *masm); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static void pd_patch_instruction(address branch, address target); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + + // Support for inc/dec with optimal instruction selection depending on value + // void incrementl(Register reg, int value = 1); + // void decrementl(Register reg, int value = 1); + + + // Alignment + void align(int modulus); + + + // Stack frame creation/removal + void enter(); + void leave(); + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register thread, + Register last_java_sp, + Register last_java_fp, + Label& last_java_pc); + + // thread in the default location (S6) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label& last_java_pc); + + void reset_last_Java_frame(Register thread, bool clear_fp); + + // thread in the default location (S6) + void reset_last_Java_frame(bool clear_fp); + + // jobjects + void clear_jweak_tag(Register possibly_jweak); + void resolve_jobject(Register value, Register thread, Register tmp); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + void resolve_oop_handle(Register result, Register tmp); + void load_mirror(Register dst, Register method, Register tmp); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp2); + + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register tmp2 = noreg, DecoratorSet decorators = 0); + + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + void encode_heap_oop(Register r); + void encode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r); + void decode_heap_oop(Register dst, Register src); + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_oop(Register dst, jobject obj); + + // Sign extension + void sign_extend_short(Register reg) { ext_w_h(reg, reg); } + void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } + void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + void verify_oop_subroutine(); + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + + #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) + #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug(char* msg/*, RegistersForDebugging* regs*/); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = ""); + + void should_not_reach_here() { stop("should not reach here"); } + + void print_CPU_state(); + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + if (offset <= 2048) { + st_w(A0, SP, -offset); + } else if (offset <= 32768 && !(offset & 3)) { + stptr_w(A0, SP, -offset); + } else { + li(AT, offset); + sub_d(AT, SP, AT); + st_w(A0, AT, 0); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + // Check for reserved stack access in method being exited (for JIT) + void reserved_stack_check(); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + void safepoint_poll(Label& slow_path, Register thread_reg); + void safepoint_poll_acquire(Label& slow_path, Register thread_reg); + + //void verify_tlab(); + void verify_tlab(Register t1, Register t2); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will + // be killed; if not supplied, push/pop will be used internally to + // allocate a temporary (inefficient, avoid if possible). + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); +#ifdef COMPILER2 + void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed); + void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed); + void cmp_branchEqNe_off21(int flag, Register op1, Label& L); + void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); + void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); +#endif + + + // the follow two might use AT register, be sure you have no meanful data in AT before you call them + void increment(Register reg, int imm); + void decrement(Register reg, int imm); + void increment(Address addr, int imm = 1); + void decrement(Address addr, int imm = 1); + void shl(Register reg, int sa) { slli_d(reg, reg, sa); } + void shr(Register reg, int sa) { srli_d(reg, reg, sa); } + void sar(Register reg, int sa) { srai_d(reg, reg, sa); } + // Helper functions for statistics gathering. + void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); + + // Calls + void call(address entry); + void call(address entry, relocInfo::relocType rtype); + void call(address entry, RelocationHolder& rh); + void call_long(address entry); + + address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); + + static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + + static bool far_branches() { + if (ForceUnreachable) { + return true; + } else { + return ReservedCodeCacheSize > branch_range; + } + } + + // Emit the CompiledIC call idiom + address ic_call(address entry, jint method_index = 0); + + // Jumps + void jmp(address entry); + void jmp(address entry, relocInfo::relocType rtype); + void jmp_far(Label& L); // patchable + + /* branches may exceed 16-bit offset */ + void b_far(address entry); + void b_far(Label& L); + + void bne_far (Register rs, Register rt, address entry); + void bne_far (Register rs, Register rt, Label& L); + + void beq_far (Register rs, Register rt, address entry); + void beq_far (Register rs, Register rt, Label& L); + + void blt_far (Register rs, Register rt, address entry, bool is_signed); + void blt_far (Register rs, Register rt, Label& L, bool is_signed); + + void bge_far (Register rs, Register rt, address entry, bool is_signed); + void bge_far (Register rs, Register rt, Label& L, bool is_signed); + + // For C2 to support long branches + void beq_long (Register rs, Register rt, Label& L); + void bne_long (Register rs, Register rt, Label& L); + void blt_long (Register rs, Register rt, Label& L, bool is_signed); + void bge_long (Register rs, Register rt, Label& L, bool is_signed); + void bc1t_long (Label& L); + void bc1f_long (Label& L); + + static bool patchable_branches() { + const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + return ReservedCodeCacheSize > branch_range; + } + + static bool reachable_from_branch_short(jlong offs); + + void patchable_jump_far(Register ra, jlong offs); + void patchable_jump(address target, bool force_patchable = false); + void patchable_call(address target, address call_size = 0); + + // Floating + void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, + address pio2, address dsin_coef, address dcos_coef); + + // Data + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs + inline void ld_ptr(Register rt, Address a) { + ld_d(rt, a); + } + + inline void ld_ptr(Register rt, Register base, int offset16) { + ld_d(rt, base, offset16); + } + + // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs + inline void st_ptr(Register rt, Address a) { + st_d(rt, a); + } + + inline void st_ptr(Register rt, Register base, int offset16) { + st_d(rt, base, offset16); + } + + void ld_ptr(Register rt, Register base, Register offset); + void st_ptr(Register rt, Register base, Register offset); + + // swap the two byte of the low 16-bit halfword + // this directive will use AT, be sure the high 16-bit of reg is zero + void hswap(Register reg); + void huswap(Register reg); + + // convert big endian integer to little endian integer + void swap(Register reg); + + void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, + bool retold, bool barrier); + void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, + bool retold, bool barrier, Label& succ, Label* fail = NULL); + void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, + bool sign, bool retold, bool barrier); + void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); + + void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} + void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} + void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } + void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } + void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } + void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } + void pop () { addi_d(SP, SP, 8); } + void pop2 () { addi_d(SP, SP, 16); } + void push2(Register reg1, Register reg2); + void pop2 (Register reg1, Register reg2); + //we need 2 fun to save and resotre general register + void pushad(); + void popad(); + void pushad_except_v0(); + void popad_except_v0(); + void push(RegSet regs) { if (regs.bits()) push(regs.bits()); } + void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); } + + void li(Register rd, jlong value); + void li(Register rd, address addr) { li(rd, (long)addr); } + void patchable_li52(Register rd, jlong value); + void lipc(Register rd, Label& L); + + void move(Register rd, Register rs) { orr(rd, rs, R0); } + void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + + // Code for java.lang.StringCoding::hasNegatives() instrinsic. + void has_negatives(Register ary1, Register len, Register result); + + // Code for java.lang.StringUTF16::compress intrinsic. + void char_array_compress(Register src, Register dst, Register len, + Register result, Register tmp1, + Register tmp2, Register tmp3); + + // Code for java.lang.StringLatin1::inflate intrinsic. + void byte_array_inflate(Register src, Register dst, Register len, + Register tmp1, Register tmp2); + + // Find index of char in UTF-16 string + void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3); + + //FIXME + void empty_FPU_stack(){/*need implemented*/}; + +#ifdef COMPILER2 + // Compare strings. + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + int ae); + + // Compare char[] or byte[] arrays. + void arrays_equals(Register str1, Register str2, + Register cnt, Register tmp1, Register tmp2, Register result, + bool is_char); +#endif + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + + // LA added: + void jr (Register reg) { jirl(R0, reg, 0); } + void jalr(Register reg) { jirl(RA, reg, 0); } + void nop () { andi(R0, R0, 0); } + void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } + void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } + void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } + void lea (Register rd, Address src); + void lea(Register dst, AddressLiteral adr); + static int patched_branch(int dest_pos, int inst, int inst_pos); + + // Conditional move + void cmp_cmov(Register op1, + Register op2, + Register dst, + Register src1, + Register src2, + CMCompare cmp = EQ, + bool is_signed = true); + void cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_signed = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp = EQ); + + // CRC32 code for java.util.zip.CRC32::update() instrinsic. + void update_byte_crc32(Register crc, Register val, Register table); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, Register tmp); + + // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. + void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); + + void membar(Membar_mask_bits hint); + + void bind(Label& L) { + Assembler::bind(L); + code()->clear_last_insn(); + } + + // Code for java.math.BigInteger::mulAdd intrinsic. + void mul_add(Register out, Register in, Register offset, + Register len, Register k); + +#undef VIRTUAL + +public: +// Memory Data Type +#define INT_TYPE 0x100 +#define FLOAT_TYPE 0x200 +#define SIGNED_TYPE 0x10 +#define UNSIGNED_TYPE 0x20 + + typedef enum { + LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, + LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, + LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, + LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, + LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, + STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, + STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, + STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, + STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, + STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, + LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, + + LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, + LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, + LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, + + LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, + LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, + LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, + LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, + STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, + STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, + STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, + STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 + } CMLoadStoreDataType; + + void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { + assert((type & INT_TYPE), "must be General reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + + void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { + assert((type & FLOAT_TYPE), "must be Float reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + +#ifdef COMPILER2 + void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size); + void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size); +#endif + +private: + void push(unsigned int bitset); + void pop(unsigned int bitset); + + template + void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { + if (index != 0) { + assert(((scale==0)&&(disp==0)), "only support base+index"); + loadstore(reg, as_Register(base), as_Register(index), type); + } else { + loadstore(reg, as_Register(base), disp, type); + } + } + void loadstore(Register reg, Register base, int disp, int type); + void loadstore(Register reg, Register base, Register disp, int type); + void loadstore(FloatRegister reg, Register base, int disp, int type); + void loadstore(FloatRegister reg, Register base, Register disp, int type); + +#ifdef COMPILER2 + void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode); + void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode); + void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode); +#endif + void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); + void generate_kernel_cos(FloatRegister x, address dcos_coef); + void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); + void generate__kernel_rem_pio2(address two_over_pi, address pio2); +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { +private: + MacroAssembler* _masm; + Label _label; + +public: + inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) + : _masm(masm) { + _masm->li(AT, (address)flag_addr); + _masm->ld_b(AT, AT, 0); + if (value) { + _masm->bne(AT, R0, _label); + } else { + _masm->beq(AT, R0, _label); + } + } + + ~SkipIfEqual(); +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + +struct tableswitch { + Register _reg; + int _insn_index; jint _first_key; jint _last_key; + Label _after; + Label _branches; +}; + +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp new file mode 100644 index 00000000000..49302590c37 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp new file mode 100644 index 00000000000..3ed4c366510 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp @@ -0,0 +1,1625 @@ +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "macroAssembler_loongarch.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// The following code is a optimized version of fdlibm sin/cos implementation +// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64. + +// Please refer to sin/cos approximation via polynomial and +// trigonometric argument reduction techniques to the following literature: +// +// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, +// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, +// Nathalie Revol, Damien Stehlé, and Serge Torres: +// Handbook of floating-point arithmetic. +// Springer Science & Business Media, 2009. +// [2] K. C. Ng +// Argument Reduction for Huge Arguments: Good to the Last Bit +// July 13, 1992, SunPro +// +// HOW TO READ THIS CODE: +// This code consists of several functions. Each function has following header: +// 1) Description +// 2) C-pseudo code with differences from fdlibm marked by comments starting +// with "NOTE". Check unmodified fdlibm code in +// share/runtime/SharedRuntimeTrig.cpp +// 3) Brief textual description of changes between fdlibm and current +// implementation along with optimization notes (if applicable) +// 4) Assumptions, input and output +// 5) (Optional) additional notes about intrinsic implementation +// Each function is separated in blocks which follow the pseudo-code structure +// +// HIGH-LEVEL ALGORITHM DESCRIPTION: +// - entry point: generate_dsin_dcos(...); +// - check corner cases: NaN, INF, tiny argument. +// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) +// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and +// use reduced argument to get result via kernel_sin/kernel_cos +// +// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: +// 1) two_over_pi table fdlibm representation is int[], while intrinsic version +// has these int values converted to double representation to load converted +// double values directly (see stubRoutines_aarch4::_two_over_pi) +// 2) Several loops are unrolled and vectorized: see comments in code after +// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 +// 3) fdlibm npio2_hw table now has "prefix" with constants used in +// calculation. These constants are loaded from npio2_hw table instead of +// constructing it in code (see stubRoutines_loongarch64.cpp) +// 4) Polynomial coefficients for sin and cos are moved to table sin_coef +// and cos_coef to use the same optimization as in 3). It allows to load most of +// required constants via single instruction +// +// +// +///* __ieee754_rem_pio2(x,y) +// * +// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) +// * x is input argument, y[] is hi and low parts of reduced argument (x) +// * uses __kernel_rem_pio2() +// */ +// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw +// +// BEGIN __ieee754_rem_pio2 PSEUDO CODE +// +//static int __ieee754_rem_pio2(double x, double *y) { +// double z,w,t,r,fn; +// double tx[3]; +// int e0,i,j,nx,n,ix,hx,i0; +// +// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ +// hx = *(i0+(int*)&x); /* high word of x */ +// ix = hx&0x7fffffff; +// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ +// if(hx>0) { +// z = x - pio2_1; +// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ +// y[0] = z - pio2_1t; +// y[1] = (z-y[0])-pio2_1t; +// } else { /* near pi/2, use 33+33+53 bit pi */ +// z -= pio2_2; +// y[0] = z - pio2_2t; +// y[1] = (z-y[0])-pio2_2t; +// } +// return 1; +// } else { /* negative x */ +// z = x + pio2_1; +// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ +// y[0] = z + pio2_1t; +// y[1] = (z-y[0])+pio2_1t; +// } else { /* near pi/2, use 33+33+53 bit pi */ +// z += pio2_2; +// y[0] = z + pio2_2t; +// y[1] = (z-y[0])+pio2_2t; +// } +// return -1; +// } +// } +// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ +// t = fabsd(x); +// n = (int) (t*invpio2+half); +// fn = (double)n; +// r = t-fn*pio2_1; +// w = fn*pio2_1t; /* 1st round good to 85 bit */ +// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" +// y[0] = r-w; +// if(n<32&&ix!=npio2_hw[n-1]) { +// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier +// } else { +// j = ix>>20; +// // y[0] = r-w; // NOTE: moved earlier +// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); +// if(i>16) { /* 2nd iteration needed, good to 118 */ +// t = r; +// w = fn*pio2_2; +// r = t-w; +// w = fn*pio2_2t-((t-r)-w); +// y[0] = r-w; +// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); +// if(i>49) { /* 3rd iteration need, 151 bits acc */ +// t = r; /* will cover all possible cases */ +// w = fn*pio2_3; +// r = t-w; +// w = fn*pio2_3t-((t-r)-w); +// y[0] = r-w; +// } +// } +// } +// y[1] = (r-y[0])-w; +// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} +// else return n; +// } +// /* +// * all other (large) arguments +// */ +// // NOTE: this check is removed, because it was checked in dsin/dcos +// // if(ix>=0x7ff00000) { /* x is inf or NaN */ +// // y[0]=y[1]=x-x; return 0; +// // } +// /* set z = scalbn(|x|,ilogb(x)-23) */ +// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); +// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ +// *(i0+(int*)&z) = ix - (e0<<20); +// +// // NOTE: "for" loop below in unrolled. See comments in asm code +// for(i=0;i<2;i++) { +// tx[i] = (double)((int)(z)); +// z = (z-tx[i])*two24A; +// } +// +// tx[2] = z; +// nx = 3; +// +// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code +// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ +// +// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); +// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} +// return n; +//} +// +// END __ieee754_rem_pio2 PSEUDO CODE +// +// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: +// 1. INF/NaN check for huge argument is removed in comparison with fdlibm +// code, because this check is already done in dcos/dsin code +// 2. Most constants are now loaded from table instead of direct initialization +// 3. Two loops are unrolled +// Assumptions: +// 1. Assume |X| >= PI/4 +// 2. Assume SCR1 = 0x3fe921fb00000000 (~ PI/4) +// 3. Assume ix = A3 +// Input and output: +// 1. Input: X = A0 +// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 +// NOTE: general purpose register names match local variable names in C code +// NOTE: fpu registers are actively reused. See comments in code about their usage +void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) { + const int64_t PIO2_1t = 0x3DD0B4611A626331ULL; + const int64_t PIO2_2 = 0x3DD0B4611A600000ULL; + const int64_t PIO2_2t = 0x3BA3198A2E037073ULL; + Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE, + REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET, + X_IS_NEGATIVE_LONG_PI; + Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6, + tmp3 = A7, iqBase = T0, ih = T1, i = T2; + FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, + vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15; + + push2(S0, S1); + + // initializing constants first + li(SCR1, 0x3ff921fb54400000); // PIO2_1 + li(SCR2, 0x4002d97c); // 3*PI/4 high word + movgr2fr_d(v1, SCR1); // v1 = PIO2_1 + bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE); + + block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); { + blt(X, R0, X_IS_NEGATIVE); + + block_comment("if(hx>0) {"); { + fsub_d(v2, v0, v1); // v2 = z = x - pio2_1 + srli_d(SCR1, SCR1, 32); + li(n, 1); + beq(ix, SCR1, X_IS_POSITIVE_LONG_PI); + + block_comment("case: hx > 0 && ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */ + li(SCR2, PIO2_1t); + movgr2fr_d(v27, SCR2); + fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t; + fsub_d(v5, v2, v4); + fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t + b(REDUCTION_DONE); + } + + block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */ + bind(X_IS_POSITIVE_LONG_PI); + li(SCR1, PIO2_2); + li(SCR2, PIO2_2t); + movgr2fr_d(v27, SCR1); + movgr2fr_d(v6, SCR2); + fsub_d(v2, v2, v27); // z-= pio2_2 + fsub_d(v4, v2, v6); // y[0] = z - pio2_2t + fsub_d(v5, v2, v4); + fsub_d(v5, v5, v6); // v5 = (z - y[0]) - pio2_2t + b(REDUCTION_DONE); + } + } + + block_comment("case: hx <= 0)"); { + bind(X_IS_NEGATIVE); + fadd_d(v2, v0, v1); // v2 = z = x + pio2_1 + srli_d(SCR1, SCR1, 32); + li(n, -1); + beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI); + + block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */ + li(SCR2, PIO2_1t); + movgr2fr_d(v27, SCR2); + fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t; + fsub_d(v5, v2, v4); + fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t + b(REDUCTION_DONE); + } + + block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */ + bind(X_IS_NEGATIVE_LONG_PI); + li(SCR1, PIO2_2); + li(SCR2, PIO2_2t); + movgr2fr_d(v27, SCR1); + movgr2fr_d(v6, SCR2); + fadd_d(v2, v2, v27); // z += pio2_2 + fadd_d(v4, v2, v6); // y[0] = z + pio2_2t + fsub_d(v5, v2, v4); + fadd_d(v5, v5, v6); // v5 = (z - y[0]) + pio2_2t + b(REDUCTION_DONE); + } + } + } + bind(X_IS_MEDIUM_OR_LARGE); + li(SCR1, 0x413921fb); + blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ? + + block_comment("|x| ~<= 2^19*(pi/2), medium size"); { + li(ih, npio2_hw); + fld_d(v4, ih, 0); + fld_d(v5, ih, 8); + fld_d(v6, ih, 16); + fld_d(v7, ih, 24); + fabs_d(v31, v0); // v31 = t = |x| + addi_d(ih, ih, 64); + fmadd_d(v2, v31, v5, v4); // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5) + ftintrz_w_d(vt, v2); // n = (int) v2 + movfr2gr_s(n, vt); + vfrintrz_d(v2, v2); + fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1 + fmul_d(v26, v2, v7); // v26 = w = fn * pio2_1t + fsub_d(v4, v3, v26); // y[0] = r - w. Calculated before branch + li(SCR1, 32); + blt(SCR1, n, LARGE_ELSE); + addi_w(tmp5, n, -1); // tmp5 = n - 1 + alsl_d(tmp5, tmp5, ih, 2 - 1); + ld_w(jv, tmp5, 0); + bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE); + + block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); { + bind(LARGE_ELSE); + movfr2gr_d(jx, v4); + srli_d(tmp5, ix, 20); // j = ix >> 20 + slli_d(jx, jx, 1); + srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); + sub_d(tmp3, tmp5, tmp3); + + block_comment("if(i>16)"); { + li(SCR1, 16); + bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); + // i > 16. 2nd iteration needed + fld_d(v6, ih, -32); + fld_d(v7, ih, -24); + fmov_d(v28, v3); // t = r + fmul_d(v29, v2, v6); // w = v29 = fn * pio2_2 + fsub_d(v3, v28, v29); // r = t - w + fsub_d(v31, v28, v3); // v31 = (t - r) + fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) + fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_2t - ((t - r) - w) + fsub_d(v4, v3, v26); // y[0] = r - w + movfr2gr_d(jx, v4); + slli_d(jx, jx, 1); + srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); + sub_d(tmp3, tmp5, tmp3); + + block_comment("if(i>49)"); { + li(SCR1, 49); + bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); + // 3rd iteration need, 151 bits acc + fld_d(v6, ih, -16); + fld_d(v7, ih, -8); + fmov_d(v28, v3); // save "r" + fmul_d(v29, v2, v6); // v29 = fn * pio2_3 + fsub_d(v3, v28, v29); // r = r - w + fsub_d(v31, v28, v3); // v31 = (t - r) + fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) + fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_3t - ((t - r) - w) + fsub_d(v4, v3, v26); // y[0] = r - w + } + } + } + block_comment("medium x tail"); { + bind(X_IS_MEDIUM_BRANCH_DONE); + fsub_d(v5, v3, v4); // v5 = y[1] = (r - y[0]) + fsub_d(v5, v5, v26); // v5 = y[1] = (r - y[0]) - w + blt(R0, X, REDUCTION_DONE); + fneg_d(v4, v4); + sub_w(n, R0, n); + fneg_d(v5, v5); + b(REDUCTION_DONE); + } + } + + block_comment("all other (large) arguments"); { + bind(X_IS_LARGE); + srli_d(SCR1, ix, 20); // ix >> 20 + li(tmp5, 0x4170000000000000); + addi_w(SCR1, SCR1, -1046); // e0 + movgr2fr_d(v24, tmp5); // init two24A value + slli_w(jv, SCR1, 20); // ix - (e0<<20) + sub_w(jv, ix, jv); + slli_d(jv, jv, 32); + addi_w(SCR2, SCR1, -3); + bstrins_d(jv, X, 31, 0); // jv = z + li(i, 24); + movgr2fr_d(v26, jv); // v26 = z + + block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); { + // tx[0,1,2] = v6,v7,v26 + vfrintrz_d(v6, v26); // v6 = (double)((int)v26) + div_w(jv, SCR2, i); // jv = (e0 - 3)/24 + fsub_d(v26, v26, v6); + addi_d(SP, SP, -560); + fmul_d(v26, v26, v24); + vfrintrz_d(v7, v26); // v7 = (double)((int)v26) + li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop + fsub_d(v26, v26, v7); + } + + block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { + vxor_v(vt, vt, vt); + fcmp_cne_d(FCC0, v26, vt); // if NE then jx == 2. else it's 1 or 0 + addi_d(iqBase, SP, 480); // base of iq[] + fmul_d(v3, v26, v24); + bcnez(FCC0, NX_SET); + fcmp_cne_d(FCC0, v7, vt); // v7 == 0 => jx = 0. Else jx = 1 + movcf2gr(jx, FCC0); + } + bind(NX_SET); + generate__kernel_rem_pio2(two_over_pi, pio2); + // now we have y[0] = v4, y[1] = v5 and n = r2 + bge(X, R0, REDUCTION_DONE); + fneg_d(v4, v4); + fneg_d(v5, v5); + sub_w(n, R0, n); + } + bind(REDUCTION_DONE); + + pop2(S0, S1); +} + +///* +// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) +// * double x[],y[]; int e0,nx,prec; int ipio2[]; +// * +// * __kernel_rem_pio2 return the last three digits of N with +// * y = x - N*pi/2 +// * so that |y| < pi/2. +// * +// * The method is to compute the integer (mod 8) and fraction parts of +// * (2/pi)*x without doing the full multiplication. In general we +// * skip the part of the product that are known to be a huge integer ( +// * more accurately, = 0 mod 8 ). Thus the number of operations are +// * independent of the exponent of the input. +// * +// * NOTE: 2/pi int representation is converted to double +// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. +// * +// * Input parameters: +// * x[] The input value (must be positive) is broken into nx +// * pieces of 24-bit integers in double precision format. +// * x[i] will be the i-th 24 bit of x. The scaled exponent +// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// * match x's up to 24 bits. +// * +// * Example of breaking a double positive z into x[0]+x[1]+x[2]: +// * e0 = ilogb(z)-23 +// * z = scalbn(z,-e0) +// * for i = 0,1,2 +// * x[i] = floor(z) +// * z = (z-x[i])*2**24 +// * +// * +// * y[] ouput result in an array of double precision numbers. +// * The dimension of y[] is: +// * 24-bit precision 1 +// * 53-bit precision 2 +// * 64-bit precision 2 +// * 113-bit precision 3 +// * The actual value is the sum of them. Thus for 113-bit +// * precsion, one may have to do something like: +// * +// * long double t,w,r_head, r_tail; +// * t = (long double)y[2] + (long double)y[1]; +// * w = (long double)y[0]; +// * r_head = t+w; +// * r_tail = w - (r_head - t); +// * +// * e0 The exponent of x[0] +// * +// * nx dimension of x[] +// * +// * prec an interger indicating the precision: +// * 0 24 bits (single) +// * 1 53 bits (double) +// * 2 64 bits (extended) +// * 3 113 bits (quad) +// * +// * NOTE: ipio2[] array below is converted to double representation +// * //ipio2[] +// * // integer array, contains the (24*i)-th to (24*i+23)-th +// * // bit of 2/pi after binary point. The corresponding +// * // floating value is +// * +// * ipio2[i] * 2^(-24(i+1)). +// * +// * Here is the description of some local variables: +// * +// * jk jk+1 is the initial number of terms of ipio2[] needed +// * in the computation. The recommended value is 2,3,4, +// * 6 for single, double, extended,and quad. +// * +// * jz local integer variable indicating the number of +// * terms of ipio2[] used. +// * +// * jx nx - 1 +// * +// * jv index for pointing to the suitable ipio2[] for the +// * computation. In general, we want +// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// * is an integer. Thus +// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// * Hence jv = max(0,(e0-3)/24). +// * +// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// * +// * q[] double array with integral value, representing the +// * 24-bits chunk of the product of x and 2/pi. +// * +// * q0 the corresponding exponent of q[0]. Note that the +// * exponent for q[i] would be q0-24*i. +// * +// * PIo2[] double precision array, obtained by cutting pi/2 +// * into 24 bits chunks. +// * +// * f[] ipio2[] in floating point +// * +// * iq[] integer array by breaking up q[] in 24-bits chunk. +// * +// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// * +// * ih integer. If >0 it indicates q[] is >= 0.5, hence +// * it also indicates the *sign* of the result. +// * +// */ +// +// Use PIo2 table(see stubRoutines_loongarch64.cpp) +// +// BEGIN __kernel_rem_pio2 PSEUDO CODE +// +//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { +// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; +// double z,fw,f[20],fq[20],q[20]; +// +// /* initialize jk*/ +// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. +// jp = jk; // NOTE: always 4 +// +// /* determine jx,jv,q0, note that 3>q0 */ +// jx = nx-1; +// jv = (e0-3)/24; if(jv<0) jv=0; +// q0 = e0-24*(jv+1); +// +// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ +// j = jv-jx; m = jx+jk; +// +// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It +// // allows the use of wider loads/stores +// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; +// +// // NOTE: unrolled and vectorized "for". See comments in asm code +// /* compute q[0],q[1],...q[jk] */ +// for (i=0;i<=jk;i++) { +// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; +// } +// +// jz = jk; +//recompute: +// /* distill q[] into iq[] reversingly */ +// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { +// fw = (double)((int)(twon24* z)); +// iq[i] = (int)(z-two24B*fw); +// z = q[j-1]+fw; +// } +// +// /* compute n */ +// z = scalbnA(z,q0); /* actual value of z */ +// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ +// n = (int) z; +// z -= (double)n; +// ih = 0; +// if(q0>0) { /* need iq[jz-1] to determine n */ +// i = (iq[jz-1]>>(24-q0)); n += i; +// iq[jz-1] -= i<<(24-q0); +// ih = iq[jz-1]>>(23-q0); +// } +// else if(q0==0) ih = iq[jz-1]>>23; +// else if(z>=0.5) ih=2; +// +// if(ih>0) { /* q > 0.5 */ +// n += 1; carry = 0; +// for(i=0;i0) { /* rare case: chance is 1 in 12 */ +// switch(q0) { +// case 1: +// iq[jz-1] &= 0x7fffff; break; +// case 2: +// iq[jz-1] &= 0x3fffff; break; +// } +// } +// if(ih==2) { +// z = one - z; +// if(carry!=0) z -= scalbnA(one,q0); +// } +// } +// +// /* check if recomputation is needed */ +// if(z==zeroB) { +// j = 0; +// for (i=jz-1;i>=jk;i--) j |= iq[i]; +// if(j==0) { /* need recomputation */ +// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ +// +// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ +// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; +// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; +// q[i] = fw; +// } +// jz += k; +// goto recompute; +// } +// } +// +// /* chop off zero terms */ +// if(z==0.0) { +// jz -= 1; q0 -= 24; +// while(iq[jz]==0) { jz--; q0-=24;} +// } else { /* break z into 24-bit if necessary */ +// z = scalbnA(z,-q0); +// if(z>=two24B) { +// fw = (double)((int)(twon24*z)); +// iq[jz] = (int)(z-two24B*fw); +// jz += 1; q0 += 24; +// iq[jz] = (int) fw; +// } else iq[jz] = (int) z ; +// } +// +// /* convert integer "bit" chunk to floating-point value */ +// fw = scalbnA(one,q0); +// for(i=jz;i>=0;i--) { +// q[i] = fw*(double)iq[i]; fw*=twon24; +// } +// +// /* compute PIo2[0,...,jp]*q[jz,...,0] */ +// for(i=jz;i>=0;i--) { +// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; +// fq[jz-i] = fw; +// } +// +// // NOTE: switch below is eliminated, because prec is always 2 for doubles +// /* compress fq[] into y[] */ +// //switch(prec) { +// //case 0: +// // fw = 0.0; +// // for (i=jz;i>=0;i--) fw += fq[i]; +// // y[0] = (ih==0)? fw: -fw; +// // break; +// //case 1: +// //case 2: +// fw = 0.0; +// for (i=jz;i>=0;i--) fw += fq[i]; +// y[0] = (ih==0)? fw: -fw; +// fw = fq[0]-fw; +// for (i=1;i<=jz;i++) fw += fq[i]; +// y[1] = (ih==0)? fw: -fw; +// // break; +// //case 3: /* painful */ +// // for (i=jz;i>0;i--) { +// // fw = fq[i-1]+fq[i]; +// // fq[i] += fq[i-1]-fw; +// // fq[i-1] = fw; +// // } +// // for (i=jz;i>1;i--) { +// // fw = fq[i-1]+fq[i]; +// // fq[i] += fq[i-1]-fw; +// // fq[i-1] = fw; +// // } +// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; +// // if(ih==0) { +// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; +// // } else { +// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; +// // } +// //} +// return n&7; +//} +// +// END __kernel_rem_pio2 PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. One loop is unrolled and vectorized (see comments in code) +// 2. One loop is split into 2 loops (see comments in code) +// 3. Non-double code is removed(last switch). Sevaral variables became +// constants because of that (see comments in code) +// 4. Use of jx, which is nx-1 instead of nx +// Assumptions: +// 1. Assume |X| >= PI/4 +// Input and output: +// 1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1 +// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 +// NOTE: general purpose register names match local variable names in C code +// NOTE: fpu registers are actively reused. See comments in code about their usage +void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { + Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT, + RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ, + INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE, + Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION, + RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED, + CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE, + IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY, + RECOMP_FOR1_CHECK; + Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6, + tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3, + jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8; + FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, + vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8, + v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15; + // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4 + // jx = nx - 1 + li(twoOverPiBase, two_over_pi); + slti(SCR2, jv, 0); + addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] + masknez(jv, jv, SCR2); + if (UseLASX) + xvxor_v(v26, v26, v26); + else + vxor_v(v26, v26, v26); + addi_w(tmp5, jv, 1); // jv+1 + sub_w(j, jv, jx); + addi_d(qBase, SP, 320); // base of q[] + mul_w(SCR2, i, tmp5); // q0 = e0-24*(jv+1) + sub_w(SCR1, SCR1, SCR2); + // use double f[20], fq[20], q[20], iq[20] on stack, which is + // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it + // will contain f[20], fq[20], q[20], iq[20] + // now initialize f[20] indexes 0..m (inclusive) + // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j]; + move(tmp5, SP); + + block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); { + xorr(i, i, i); + bge(j, R0, INIT_F_COPY); + bind(INIT_F_ZERO); + if (UseLASX) { + xvst(v26, tmp5, 0); + } else { + vst(v26, tmp5, 0); + vst(v26, tmp5, 16); + } + addi_d(tmp5, tmp5, 32); + addi_w(i, i, 4); + addi_w(j, j, 4); + blt(j, R0, INIT_F_ZERO); + sub_w(i, i, j); + move(j, R0); + bind(INIT_F_COPY); + alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address + if (UseLASX) { + xvld(v18, tmp1, 0); + xvld(v19, tmp1, 32); + } else { + vld(v18, tmp1, 0); + vld(v19, tmp1, 16); + vld(v20, tmp1, 32); + vld(v21, tmp1, 48); + } + alsl_d(tmp5, i, SP, 3 - 1); + if (UseLASX) { + xvst(v18, tmp5, 0); + xvst(v19, tmp5, 32); + } else { + vst(v18, tmp5, 0); + vst(v19, tmp5, 16); + vst(v20, tmp5, 32); + vst(v21, tmp5, 48); + } + } + // v18..v21 can actually contain f[0..7] + beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded + if (UseLASX) { + xvld(v18, SP, 0); // load f[0..7] + xvld(v19, SP, 32); + } else { + vld(v18, SP, 0); // load f[0..7] + vld(v19, SP, 16); + vld(v20, SP, 32); + vld(v21, SP, 48); + } + bind(SKIP_F_LOAD); + // calculate 2^q0 and 2^-q0, which we'll need further. + // q0 is exponent. So, calculate biased exponent(q0+1023) + sub_w(tmp4, R0, SCR1); + addi_w(tmp5, SCR1, 1023); + addi_w(tmp4, tmp4, 1023); + // Unroll following for(s) depending on jx in [0,1,2] + // for (i=0;i<=jk;i++) { + // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; + // } + // Unrolling for jx == 0 case: + // q[0] = x[0] * f[0] + // q[1] = x[0] * f[1] + // q[2] = x[0] * f[2] + // q[3] = x[0] * f[3] + // q[4] = x[0] * f[4] + // + // Vectorization for unrolled jx == 0 case: + // {q[0], q[1]} = {f[0], f[1]} * x[0] + // {q[2], q[3]} = {f[2], f[3]} * x[0] + // q[4] = f[4] * x[0] + // + // Unrolling for jx == 1 case: + // q[0] = x[0] * f[1] + x[1] * f[0] + // q[1] = x[0] * f[2] + x[1] * f[1] + // q[2] = x[0] * f[3] + x[1] * f[2] + // q[3] = x[0] * f[4] + x[1] * f[3] + // q[4] = x[0] * f[5] + x[1] * f[4] + // + // Vectorization for unrolled jx == 1 case: + // {q[0], q[1]} = {f[0], f[1]} * x[1] + // {q[2], q[3]} = {f[2], f[3]} * x[1] + // q[4] = f[4] * x[1] + // {q[0], q[1]} += {f[1], f[2]} * x[0] + // {q[2], q[3]} += {f[3], f[4]} * x[0] + // q[4] += f[5] * x[0] + // + // Unrolling for jx == 2 case: + // q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0] + // q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1] + // q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2] + // q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3] + // q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4] + // + // Vectorization for unrolled jx == 2 case: + // {q[0], q[1]} = {f[0], f[1]} * x[2] + // {q[2], q[3]} = {f[2], f[3]} * x[2] + // q[4] = f[4] * x[2] + // {q[0], q[1]} += {f[1], f[2]} * x[1] + // {q[2], q[3]} += {f[3], f[4]} * x[1] + // q[4] += f[5] * x[1] + // {q[0], q[1]} += {f[2], f[3]} * x[0] + // {q[2], q[3]} += {f[4], f[5]} * x[0] + // q[4] += f[6] * x[0] + block_comment("unrolled and vectorized computation of q[0]..q[jk]"); { + li(SCR2, 1); + slli_d(tmp5, tmp5, 52); // now it's 2^q0 double value + slli_d(tmp4, tmp4, 52); // now it's 2^-q0 double value + if (UseLASX) + xvpermi_d(v6, v6, 0); + else + vreplvei_d(v6, v6, 0); + blt(jx, SCR2, JX_IS_0); + addi_d(i, SP, 8); + if (UseLASX) { + xvld(v26, i, 0); // load f[1..4] + xvpermi_d(v3, v3, 0); + xvpermi_d(v7, v7, 0); + xvpermi_d(v20, v19, 85); + xvpermi_d(v21, v19, 170); + } else { + vld(v26, i, 0); // load f[1..4] + vld(v27, i, 16); + vreplvei_d(v3, v3, 0); + vreplvei_d(v7, v7, 0); + vreplvei_d(vt, v20, 1); + vreplvei_d(v21, v21, 0); + } + blt(SCR2, jx, JX_IS_2); + // jx == 1 + if (UseLASX) { + xvfmul_d(v28, v18, v7); // f[0,3] * x[1] + fmul_d(v30, v19, v7); // f[4] * x[1] + xvfmadd_d(v28, v26, v6, v28); + fmadd_d(v30, v6, v20, v30); // v30 += f[5] * x[0] + } else { + vfmul_d(v28, v18, v7); // f[0,1] * x[1] + vfmul_d(v29, v19, v7); // f[2,3] * x[1] + fmul_d(v30, v20, v7); // f[4] * x[1] + vfmadd_d(v28, v26, v6, v28); + vfmadd_d(v29, v27, v6, v29); + fmadd_d(v30, v6, vt, v30); // v30 += f[5] * x[0] + } + b(Q_DONE); + bind(JX_IS_2); + if (UseLASX) { + xvfmul_d(v28, v18, v3); // f[0,3] * x[2] + fmul_d(v30, v19, v3); // f[4] * x[2] + xvfmadd_d(v28, v26, v7, v28); + fmadd_d(v30, v7, v20, v30); // v30 += f[5] * x[1] + xvpermi_q(v18, v19, 3); + xvfmadd_d(v28, v18, v6, v28); + } else { + vfmul_d(v28, v18, v3); // f[0,1] * x[2] + vfmul_d(v29, v19, v3); // f[2,3] * x[2] + fmul_d(v30, v20, v3); // f[4] * x[2] + vfmadd_d(v28, v26, v7, v28); + vfmadd_d(v29, v27, v7, v29); + fmadd_d(v30, v7, vt, v30); // v30 += f[5] * x[1] + vfmadd_d(v28, v19, v6, v28); + vfmadd_d(v29, v20, v6, v29); + } + fmadd_d(v30, v6, v21, v30); // v30 += f[6] * x[0] + b(Q_DONE); + bind(JX_IS_0); + if (UseLASX) { + xvfmul_d(v28, v18, v6); // f[0,1] * x[0] + fmul_d(v30, v19, v6); // f[4] * x[0] + } else { + vfmul_d(v28, v18, v6); // f[0,1] * x[0] + vfmul_d(v29, v19, v6); // f[2,3] * x[0] + fmul_d(v30, v20, v6); // f[4] * x[0] + } + bind(Q_DONE); + if (UseLASX) { + xvst(v28, qBase, 0); // save calculated q[0]...q[jk] + } else { + vst(v28, qBase, 0); // save calculated q[0]...q[jk] + vst(v29, qBase, 16); + } + fst_d(v30, qBase, 32); + } + li(i, 0x3E70000000000000); + li(jz, 4); + movgr2fr_d(v17, i); // v17 = twon24 + movgr2fr_d(v30, tmp5); // 2^q0 + vldi(v21, -960); // 0.125 (0x3fc0000000000000) + vldi(v20, -992); // 8.0 (0x4020000000000000) + movgr2fr_d(v22, tmp4); // 2^-q0 + + block_comment("recompute loop"); { + bind(RECOMPUTE); + // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { + // fw = (double)((int)(twon24* z)); + // iq[i] = (int)(z-two24A*fw); + // z = q[j-1]+fw; + // } + block_comment("distill q[] into iq[] reversingly"); { + xorr(i, i, i); + move(j, jz); + alsl_d(tmp2, jz, qBase, 3 - 1); // q[jz] address + fld_d(v18, tmp2, 0); // z = q[j] and moving address to q[j-1] + addi_d(tmp2, tmp2, -8); + bind(RECOMP_FIRST_FOR); + fld_d(v27, tmp2, 0); + addi_d(tmp2, tmp2, -8); + fmul_d(v29, v17, v18); // twon24*z + vfrintrz_d(v29, v29); // (double)(int) + fnmsub_d(v28, v24, v29, v18); // v28 = z-two24A*fw + ftintrz_w_d(vt, v28); // (int)(z-two24A*fw) + alsl_d(SCR2, i, iqBase, 2 - 1); + fst_s(vt, SCR2, 0); + fadd_d(v18, v27, v29); + addi_w(i, i, 1); + addi_w(j, j, -1); + blt(R0, j, RECOMP_FIRST_FOR); + } + // compute n + fmul_d(v18, v18, v30); + fmul_d(v2, v18, v21); + vfrintrm_d(v2, v2); // v2 = floor(v2) == rounding towards -inf + fnmsub_d(v18, v2, v20, v18); // z -= 8.0*floor(z*0.125); + li(ih, 2); + vfrintrz_d(v2, v18); // v2 = (double)((int)z) + ftintrz_w_d(vt, v18); // n = (int) z; + movfr2gr_s(n, vt); + fsub_d(v18, v18, v2); // z -= (double)n; + + block_comment("q0-dependent initialization"); { + blt(SCR1, R0, Q0_ZERO_CMP_LT); // if (q0 > 0) + addi_w(j, jz, -1); // j = jz - 1 + alsl_d(SCR2, j, iqBase, 2 - 1); + ld_w(tmp2, SCR2, 0); // tmp2 = iq[jz-1] + beq(SCR1, R0, Q0_ZERO_CMP_EQ); + li(tmp4, 24); + sub_w(tmp4, tmp4, SCR1); // == 24 - q0 + srl_w(i, tmp2, tmp4); // i = iq[jz-1] >> (24-q0) + sll_w(tmp5, i, tmp4); + sub_w(tmp2, tmp2, tmp5); // iq[jz-1] -= i<<(24-q0); + alsl_d(SCR2, j, iqBase, 2 - 1); + st_w(tmp2, SCR2, 0); // store iq[jz-1] + addi_w(SCR2, tmp4, -1); // == 23 - q0 + add_w(n, n, i); // n+=i + srl_w(ih, tmp2, SCR2); // ih = iq[jz-1] >> (23-q0) + b(Q0_ZERO_CMP_DONE); + bind(Q0_ZERO_CMP_EQ); + srli_d(ih, tmp2, 23); // ih = iq[z-1] >> 23 + b(Q0_ZERO_CMP_DONE); + bind(Q0_ZERO_CMP_LT); + vldi(v4, -928); // 0.5 (0x3fe0000000000000) + fcmp_clt_d(FCC0, v18, v4); + movcf2gr(SCR2, FCC0); + masknez(ih, ih, SCR2); // if (z<0.5) ih = 0 + } + bind(Q0_ZERO_CMP_DONE); + bge(R0, ih, IH_HANDLED); + + block_comment("if(ih>) {"); { + // use rscratch2 as carry + + block_comment("for(i=0;i0) {"); { + bge(R0, SCR1, IH_AFTER_SWITCH); + // tmp3 still has iq[jz-1] value. no need to reload + // now, zero high tmp3 bits (rscratch1 number of bits) + li(j, 0xffffffff); + addi_w(i, jz, -1); // set i to jz-1 + srl_d(j, j, SCR1); + srli_w(tmp1, j, 8); + andr(tmp3, tmp3, tmp1); // we have 24-bit-based constants + alsl_d(tmp1, i, iqBase, 2 - 1); + st_w(tmp3, tmp1, 0); // save iq[jz-1] + } + bind(IH_AFTER_SWITCH); + li(tmp1, 2); + bne(ih, tmp1, IH_HANDLED); + + block_comment("if(ih==2) {"); { + vldi(v25, -912); // 1.0 (0x3ff0000000000000) + fsub_d(v18, v25, v18); // z = one - z; + beqz(SCR2, IH_HANDLED); + fsub_d(v18, v18, v30); // z -= scalbnA(one,q0); + } + } + bind(IH_HANDLED); + // check if recomputation is needed + vxor_v(vt, vt, vt); + fcmp_cne_d(FCC0, v18, vt); + bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO); + + block_comment("if(z==zeroB) {"); { + + block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { + addi_w(i, jz, -1); + xorr(j, j, j); + b(RECOMP_FOR1_CHECK); + bind(RECOMP_FOR1); + alsl_d(tmp1, i, iqBase, 2 - 1); + ld_w(tmp1, tmp1, 0); + orr(j, j, tmp1); + addi_w(i, i, -1); + bind(RECOMP_FOR1_CHECK); + li(SCR2, 4); + bge(i, SCR2, RECOMP_FOR1); + } + bnez(j, RECOMP_CHECK_DONE); + + block_comment("if(j==0) {"); { + // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read + // iq[3], iq[2], iq[1], iq[0] until non-zero value + ld_d(tmp1, iqBase, 0); // iq[0..3] + ld_d(tmp3, iqBase, 8); + li(j, 2); + masknez(tmp1, tmp1, tmp3); // set register for further consideration + orr(tmp1, tmp1, tmp3); + masknez(j, j, tmp3); // set initial k. Use j as k + srli_d(SCR2, tmp1, 32); + sltu(SCR2, R0, SCR2); + addi_w(i, jz, 1); + add_w(j, j, SCR2); + + block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); { + add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz) + bind(RECOMP_FOR2); + add_w(tmp1, jv, i); + alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1); + fld_d(v29, SCR2, 0); + add_w(tmp2, jx, i); + alsl_d(SCR2, tmp2, SP, 3 - 1); + fst_d(v29, SCR2, 0); + // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; + // since jx = 0, 1 or 2 we can unroll it: + // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; + // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29 + alsl_d(tmp2, tmp2, SP, 3 - 1); // address of f[jx+i] + fld_d(v4, tmp2, -16); // load f[jx+i-2] and f[jx+i-1] + fld_d(v5, tmp2, -8); + fmul_d(v26, v6, v29); // initial fw + beqz(jx, RECOMP_FW_UPDATED); + fmadd_d(v26, v7, v5, v26); + li(SCR2, 1); + beq(jx, SCR2, RECOMP_FW_UPDATED); + fmadd_d(v26, v3, v4, v26); + bind(RECOMP_FW_UPDATED); + alsl_d(SCR2, i, qBase, 3 - 1); + fst_d(v26, SCR2, 0); // q[i] = fw; + addi_w(i, i, 1); + bge(jz, i, RECOMP_FOR2); // jz here is "old jz" + k + } + b(RECOMPUTE); + } + } + } + bind(RECOMP_CHECK_DONE); + // chop off zero terms + vxor_v(vt, vt, vt); + fcmp_ceq_d(FCC0, v18, vt); + bcnez(FCC0, Z_IS_ZERO); + + block_comment("else block of if(z==0.0) {"); { + bind(RECOMP_CHECK_DONE_NOT_ZERO); + fmul_d(v18, v18, v22); + fcmp_clt_d(FCC0, v18, v24); // v24 is stil two24A + bcnez(FCC0, Z_IS_LESS_THAN_TWO24B); + fmul_d(v1, v18, v17); // twon24*z + vfrintrz_d(v1, v1); // v1 = (double)(int)(v1) + fnmsub_d(v2, v24, v1, v18); + ftintrz_w_d(vt, v1); // (int)fw + movfr2gr_s(tmp3, vt); + ftintrz_w_d(vt, v2); // double to int + movfr2gr_s(tmp2, vt); + alsl_d(SCR2, jz, iqBase, 2 - 1); + st_w(tmp2, SCR2, 0); + addi_w(SCR1, SCR1, 24); + addi_w(jz, jz, 1); + st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw + b(Z_ZERO_CHECK_DONE); + bind(Z_IS_LESS_THAN_TWO24B); + ftintrz_w_d(vt, v18); // (int)z + movfr2gr_s(tmp3, vt); + alsl_d(SCR2, jz, iqBase, 2 - 1); + st_w(tmp3, SCR2, 0); // iq[jz] = (int) z + b(Z_ZERO_CHECK_DONE); + } + + block_comment("if(z==0.0) {"); { + bind(Z_IS_ZERO); + addi_w(jz, jz, -1); + alsl_d(SCR2, jz, iqBase, 2 - 1); + ld_w(tmp1, SCR2, 0); + addi_w(SCR1, SCR1, -24); + beqz(tmp1, Z_IS_ZERO); + } + bind(Z_ZERO_CHECK_DONE); + // convert integer "bit" chunk to floating-point value + // v17 = twon24 + // update v30, which was scalbnA(1.0, ); + addi_w(tmp2, SCR1, 1023); // biased exponent + slli_d(tmp2, tmp2, 52); // put at correct position + move(i, jz); + movgr2fr_d(v30, tmp2); + + block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); { + bind(CONVERTION_FOR); + alsl_d(SCR2, i, iqBase, 2 - 1); + fld_s(v31, SCR2, 0); + vffintl_d_w(v31, v31); + fmul_d(v31, v31, v30); + alsl_d(SCR2, i, qBase, 3 - 1); + fst_d(v31, SCR2, 0); + fmul_d(v30, v30, v17); + addi_w(i, i, -1); + bge(i, R0, CONVERTION_FOR); + } + addi_d(SCR2, SP, 160); // base for fq + // reusing twoOverPiBase + li(twoOverPiBase, pio2); + + block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { + move(i, jz); + move(tmp2, R0); // tmp2 will keep jz - i == 0 at start + bind(COMP_FOR); + // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; + vxor_v(v30, v30, v30); + alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0 + li(tmp3, 4); + slti(tmp4, tmp2, 5); + alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address + masknez(tmp3, tmp3, tmp4); // min(jz - i, jp); + maskeqz(tmp4, tmp2, tmp4); + orr(tmp3, tmp3, tmp4); + move(tmp4, R0); // used as k + + block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); { + bind(COMP_INNER_LOOP); + alsl_d(tmp5, tmp4, tmp1, 3 - 1); + fld_d(v18, tmp5, 0); // q[i+k] + alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1); + fld_d(v19, tmp5, 0); // PIo2[k] + fmadd_d(v30, v18, v19, v30); // fw += PIo2[k]*q[i+k]; + addi_w(tmp4, tmp4, 1); // k++ + bge(tmp3, tmp4, COMP_INNER_LOOP); + } + alsl_d(tmp5, tmp2, SCR2, 3 - 1); + fst_d(v30, tmp5, 0); // fq[jz-i] + addi_d(tmp2, tmp2, 1); + addi_w(i, i, -1); + bge(i, R0, COMP_FOR); + } + + block_comment("switch(prec) {...}. case 2:"); { + // compress fq into y[] + // remember prec == 2 + + block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { + vxor_v(v4, v4, v4); + move(i, jz); + bind(FW_FOR1); + alsl_d(tmp5, i, SCR2, 3 - 1); + fld_d(v1, tmp5, 0); + addi_w(i, i, -1); + fadd_d(v4, v4, v1); + bge(i, R0, FW_FOR1); + } + bind(FW_FOR1_DONE); + // v1 contains fq[0]. so, keep it so far + fsub_d(v5, v1, v4); // fw = fq[0] - fw + beqz(ih, FW_Y0_NO_NEGATION); + fneg_d(v4, v4); + bind(FW_Y0_NO_NEGATION); + + block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); { + li(i, 1); + blt(jz, i, FW_FOR2_DONE); + bind(FW_FOR2); + alsl_d(tmp5, i, SCR2, 3 - 1); + fld_d(v1, tmp5, 0); + addi_w(i, i, 1); + fadd_d(v5, v5, v1); + bge(jz, i, FW_FOR2); + } + bind(FW_FOR2_DONE); + beqz(ih, FW_Y1_NO_NEGATION); + fneg_d(v5, v5); + bind(FW_Y1_NO_NEGATION); + addi_d(SP, SP, 560); + } +} + +///* __kernel_sin( x, y, iy) +// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 +// * Input x is assumed to be bounded by ~pi/4 in magnitude. +// * Input y is the tail of x. +// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// * +// * Algorithm +// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. +// * 3. sin(x) is approximated by a polynomial of degree 13 on +// * [0,pi/4] +// * 3 13 +// * sin(x) ~ x + S1*x + ... + S6*x +// * where +// * +// * |sin(x) 2 4 6 8 10 12 | -58 +// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// * | x | +// * +// * 4. sin(x+y) = sin(x) + sin'(x')*y +// * ~ sin(x) + (1-x*x/2)*y +// * For better accuracy, let +// * 3 2 2 2 2 +// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// * then 3 2 +// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) +// */ +//static const double +//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ +//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ +//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ +//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ +//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ +// +// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef +// +// BEGIN __kernel_sin PSEUDO CODE +// +//static double __kernel_sin(double x, double y, bool iy) +//{ +// double z,r,v; +// +// // NOTE: not needed. moved to dsin/dcos +// //int ix; +// //ix = high(x)&0x7fffffff; /* high word of x */ +// +// // NOTE: moved to dsin/dcos +// //if(ix<0x3e400000) /* |x| < 2**-27 */ +// // {if((int)x==0) return x;} /* generate inexact */ +// +// z = x*x; +// v = z*x; +// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); +// if(iy==0) return x+v*(S1+z*r); +// else return x-((z*(half*y-v*r)-y)-v*S1); +//} +// +// END __kernel_sin PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos +// 2. Constants are now loaded from table dsin_coef +// 3. C code parameter "int iy" was modified to "bool iyIsOne", because +// iy is always 0 or 1. Also, iyIsOne branch was moved into +// generation phase instead of taking it during code execution +// Input ans output: +// 1. Input for generated function: X argument = x +// 2. Input for generator: x = register to read argument from, iyIsOne +// = flag to use low argument low part or not, dsin_coef = coefficients +// table address +// 3. Return sin(x) value in FA0 +void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) { + FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2, + s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7; + li(SCR2, dsin_coef); + fld_d(s5, SCR2, 32); + fld_d(s6, SCR2, 40); + fmul_d(z, x, x); // z = x*x; + fld_d(s1, SCR2, 0); + fld_d(s2, SCR2, 8); + fld_d(s3, SCR2, 16); + fld_d(s4, SCR2, 24); + fmul_d(v, z, x); // v = z*x; + + block_comment("calculate r = S2+z*(S3+z*(S4+z*(S5+z*S6)))"); { + fmadd_d(r, z, s6, s5); + // initialize "half" in current block to utilize 2nd FPU. However, it's + // not a part of this block + vldi(half, -928); // 0.5 (0x3fe0000000000000) + fmadd_d(r, z, r, s4); + fmadd_d(r, z, r, s3); + fmadd_d(r, z, r, s2); + } + + if (!iyIsOne) { + // return x+v*(S1+z*r); + fmadd_d(s1, z, r, s1); + fmadd_d(FA0, v, s1, x); + } else { + // return x-((z*(half*y-v*r)-y)-v*S1); + fmul_d(s6, half, y); // half*y + fnmsub_d(s6, v, r, s6); // half*y-v*r + fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y) + fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1) + fadd_d(FA0, x, s6); + } +} + +///* +// * __kernel_cos( x, y ) +// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// * Input x is assumed to be bounded by ~pi/4 in magnitude. +// * Input y is the tail of x. +// * +// * Algorithm +// * 1. Since cos(-x) = cos(x), we need only to consider positive x. +// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// * 3. cos(x) is approximated by a polynomial of degree 14 on +// * [0,pi/4] +// * 4 14 +// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// * where the remez error is +// * +// * | 2 4 6 8 10 12 14 | -58 +// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// * | | +// * +// * 4 6 8 10 12 14 +// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// * cos(x) = 1 - x*x/2 + r +// * since cos(x+y) ~ cos(x) - sin(x)*y +// * ~ cos(x) - x*y, +// * a correction term is necessary in cos(x) and hence +// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// * For better accuracy when x > 0.3, let qx = |x|/4 with +// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. +// * Then +// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). +// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the +// * magnitude of the latter is at least a quarter of x*x/2, +// * thus, reducing the rounding error in the subtraction. +// */ +// +//static const double +//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ +//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ +//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ +//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ +//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ +//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ +// +// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef +// +// BEGIN __kernel_cos PSEUDO CODE +// +//static double __kernel_cos(double x, double y) +//{ +// double a,h,z,r,qx=0; +// +// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register +// //int ix; +// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ +// +// // NOTE: moved to dsin/dcos +// //if(ix<0x3e400000) { /* if x < 2**27 */ +// // if(((int)x)==0) return one; /* generate inexact */ +// //} +// +// z = x*x; +// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); +// if(ix < 0x3FD33333) /* if |x| < 0.3 */ +// return one - (0.5*z - (z*r - x*y)); +// else { +// if(ix > 0x3fe90000) { /* x > 0.78125 */ +// qx = 0.28125; +// } else { +// set_high(&qx, ix-0x00200000); /* x/4 */ +// set_low(&qx, 0); +// } +// h = 0.5*z-qx; +// a = one-qx; +// return a - (h - (z*r-x*y)); +// } +//} +// +// END __kernel_cos PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos +// 2. Constants are now loaded from table dcos_coef +// Input and output: +// 1. Input for generated function: X argument = x +// 2. Input for generator: x = register to read argument from, dcos_coef +// = coefficients table address +// 3. Return cos(x) value in FA0 +void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { + Register ix = A3; + FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0, + C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7; + Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET; + li(SCR2, dcos_coef); + fld_d(C1, SCR2, 0); + fld_d(C2, SCR2, 8); + fld_d(C3, SCR2, 16); + fld_d(C4, SCR2, 24); + fld_d(C5, SCR2, 32); + fld_d(C6, SCR2, 40); + fmul_d(z, x, x); // z=x^2 + block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { + fmadd_d(r, z, C6, C5); + vldi(half, -928); // 0.5 (0x3fe0000000000000) + fmadd_d(r, z, r, C4); + fmul_d(y, x, y); + fmadd_d(r, z, r, C3); + li(SCR1, 0x3FD33333); + fmadd_d(r, z, r, C2); + fmul_d(x, z, z); // x = z^2 + fmadd_d(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) + } + // need to multiply r by z to have "final" r value + vldi(one, -912); // 1.0 (0x3ff0000000000000) + bge(ix, SCR1, IX_IS_LARGE); + block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { + // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) + fnmsub_d(FA0, x, r, y); + fmadd_d(FA0, half, z, FA0); + fsub_d(FA0, one, FA0); + b(DONE); + } + block_comment("if(ix >= 0x3FD33333)"); { + bind(IX_IS_LARGE); + li(SCR2, 0x3FE90000); + blt(SCR2, ix, SET_QX_CONST); + block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); { + li(SCR2, 0x00200000); + sub_w(SCR2, ix, SCR2); + slli_d(SCR2, SCR2, 32); + movgr2fr_d(qx, SCR2); + } + b(QX_SET); + bind(SET_QX_CONST); + block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { + vldi(qx, -942); // 0.28125 (0x3fd2000000000000) + } + bind(QX_SET); + fmsub_d(C6, x, r, y); // z*r - xy + fmsub_d(h, half, z, qx); // h = 0.5*z - qx + fsub_d(a, one, qx); // a = 1-qx + fsub_d(C6, h, C6); // = h - (z*r - x*y) + fsub_d(FA0, a, C6); + } + bind(DONE); +} + +// generate_dsin_dcos creates stub for dsin and dcos +// Generation is done via single call because dsin and dcos code is almost the +// same(see C code below). These functions work as follows: +// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 +// 2) perform argument reduction if required +// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial +// +// BEGIN dsin/dcos PSEUDO CODE +// +//dsin_dcos(jdouble x, bool isCos) { +// double y[2],z=0.0; +// int n, ix; +// +// /* High word of x. */ +// ix = high(x); +// +// /* |x| ~< pi/4 */ +// ix &= 0x7fffffff; +// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); +// +// /* sin/cos(Inf or NaN) is NaN */ +// else if (ix>=0x7ff00000) return x-x; +// else if (ix<0x3e400000) { /* if ix < 2**27 */ +// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ +// } +// /* argument reduction needed */ +// else { +// n = __ieee754_rem_pio2(x,y); +// switch(n&3) { +// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); +// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); +// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); +// default: +// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); +// } +// } +//} +// END dsin/dcos PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos +// 2. Final switch use equivalent bit checks(tbz/tbnz) +// Input ans output: +// 1. Input for generated function: X = A0 +// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address +// of npio2_hw table, two_over_pi = address of two_over_pi table, +// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, +// dcos_coef = address of dcos_coef table +// 3. Return result in FA0 +// NOTE: general purpose register names match local variable names in C code +void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, + address two_over_pi, address pio2, + address dsin_coef, address dcos_coef) { + Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE; + Register X = A0, absX = A1, n = A2, ix = A3; + FloatRegister y0 = FA4, y1 = FA5; + + block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); { + movfr2gr_d(X, FA0); + li(SCR2, 0x3e400000); + li(SCR1, 0x3fe921fb); // high word of pi/4. + bstrpick_d(absX, X, 62, 0); // absX + li(T0, 0x7ff0000000000000); + srli_d(ix, absX, 32); // set ix + blt(ix, SCR2, TINY_X); // handle tiny x (|x| < 2^-27) + bge(SCR1, ix, EARLY_CASE); // if(ix <= 0x3fe921fb) return + blt(absX, T0, ARG_REDUCTION); + // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0). + // Set last bit unconditionally to make it NaN + ori(T0, T0, 1); + movgr2fr_d(FA0, T0); + jr(RA); + } + block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {}"); { + bind(TINY_X); + if (isCos) { + vldi(FA0, -912); // 1.0 (0x3ff0000000000000) + } + jr(RA); + } + bind(ARG_REDUCTION); /* argument reduction needed */ + block_comment("n = __ieee754_rem_pio2(x,y);"); { + generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2); + } + block_comment("switch(n&3) {case ... }"); { + if (isCos) { + srli_w(T0, n, 1); + xorr(absX, n, T0); + andi(T0, n, 1); + bnez(T0, RETURN_SIN); + } else { + andi(T0, n, 1); + beqz(T0, RETURN_SIN); + } + generate_kernel_cos(y0, dcos_coef); + if (isCos) { + andi(T0, absX, 1); + beqz(T0, DONE); + } else { + andi(T0, n, 2); + beqz(T0, DONE); + } + fneg_d(FA0, FA0); + jr(RA); + bind(RETURN_SIN); + generate_kernel_sin(y0, true, dsin_coef); + if (isCos) { + andi(T0, absX, 1); + beqz(T0, DONE); + } else { + andi(T0, n, 2); + beqz(T0, DONE); + } + fneg_d(FA0, FA0); + jr(RA); + } + bind(EARLY_CASE); + vxor_v(y1, y1, y1); + if (isCos) { + generate_kernel_cos(FA0, dcos_coef); + } else { + generate_kernel_sin(FA0, false, dsin_coef); + } + bind(DONE); + jr(RA); +} diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp new file mode 100644 index 00000000000..e517dcd415d --- /dev/null +++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp @@ -0,0 +1,564 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/preserveException.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T8 RT8 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ andr(temp, temp, AT); + __ li(AT, ref_kind); + __ beq(temp, AT, L); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ STOP(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == Rmethod, "interpreter calling convention"); + + Label L_no_such_method; + __ beq(method, R0, L_no_such_method); + + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + Register rthread = TREG; + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); + __ beq(AT, R0, run_compiled_code); + __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); + __ jr(T4); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld_d(T4, method, in_bytes(entry_offset)); + __ jr(T4); + + __ bind(L_no_such_method); + address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); + __ jmp(wrong_method, relocInfo::runtime_call_type); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == Rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); + __ verify_oop(method_temp); + __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld_d(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + Address recv_addr = __ argument_address(temp2, -1); + __ ld_d(AT, recv_addr); + __ beq(recv, AT, L); + + recv_addr = __ argument_address(temp2, -1); + __ ld_d(V0, recv_addr); + __ STOP("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("empty stubs make SG sick"); + return NULL; + } + + // Rmethod: Method* + // T4: argument locator (parameter slot count, added to sp) + // S7: used as temp to hold mh or receiver + Register t4_argp = T4; // argument list ptr, live on error paths + Register s7_mh = S7; // MH receiver; dies quickly and is recycled + Register rm_method = Rmethod; // eventual target of this invocation + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); + guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm12! Change the instructions."); + __ addi_d(AT, AT, -1 * (int) iid); + __ beq(AT, R0, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ STOP("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address t4_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); + __ load_sized_value(t4_argp, + Address(t4_argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + t4_first_arg_addr = __ argument_address(t4_argp, -1); + } else { + DEBUG_ONLY(t4_argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ld_d(s7_mh, t4_first_arg_addr); + DEBUG_ONLY(t4_argp = noreg); + } + + // t4_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register r_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld_d(r_recv = T2, t4_first_arg_addr); + } + DEBUG_ONLY(t4_argp = noreg); + Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now + __ pop(rm_member); // extract last argument + generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register rm_method = Rmethod; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + Register j_rarg5 = A4; + + Register temp1 = T8; + Register temp2 = T4; + Register temp3 = T5; + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + } + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); + Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz, temp3); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ load_heap_oop(rm_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ load_heap_oop(rm_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L_index_ok; + __ blt(R0, temp2_index, L_index_ok); + __ STOP("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rm_index = rm_method; + __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L; + __ bge(rm_index, R0, L); + __ STOP("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rm_index, rm_method, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + + // Live at this point: + // rm_method + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r_recv be shifted out. + __ verify_method_ptr(rm_method); + jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); + __ jmp(icce_entry, relocInfo::runtime_call_type); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; + tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(entry_sp)); + + if (Verbose) { + tty->print_cr("Registers:"); + const int saved_regs_count = RegisterImpl::number_of_registers; + for (int i = 0; i < saved_regs_count; i++) { + Register r = as_Register(i); + // The registers are stored in reverse order on the stack (by pusha). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); + if ((i + 1) % 4 == 0) { + tty->cr(); + } else { + tty->print(", "); + } + } + tty->cr(); + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // Current C frame + frame cur_frame = os::current_frame(); + + // Robust search of trace_calling_frame (independant of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); + while (trace_calling_frame.fp() < saved_regs) { + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + intptr_t *dump_fp = trace_calling_frame.link(); + + bool walkable = has_mh; // whether the traced frame shoud be walkable + + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, dump_fp); + dump_frame.describe(values, 1); + } else { + // Stack may not be walkable (invalid PC above FP): + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp for #1"); + } + values.describe(-1, entry_sp, "raw top of stack"); + + tty->print_cr("Stack layout:"); + values.print(p); + } + if (has_mh && oopDesc::is_oop(mh)) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { + trace_method_handle_stub(args->adaptername, + args->mh, + args->saved_regs, + args->entry_sp); +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { +} +#endif //PRODUCT diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp new file mode 100644 index 00000000000..f84337424b9 --- /dev/null +++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 150000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return R3; + } diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp new file mode 100644 index 00000000000..9234befae3a --- /dev/null +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp @@ -0,0 +1,511 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeCache.hpp" +#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" + +#ifndef PRODUCT +#include "compiler/disassembler.hpp" +#endif + +#include + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +void NativeInstruction::wrote(int offset) { + ICache::invalidate_word(addr_at(offset)); +} + +void NativeInstruction::set_long_at(int offset, long i) { + address addr = addr_at(offset); + *(long*)addr = i; + ICache::invalidate_range(addr, 8); +} + +bool NativeInstruction::is_int_branch() { + int op = Assembler::high(insn_word(), 6); + return op == Assembler::beqz_op || op == Assembler::bnez_op || + op == Assembler::beq_op || op == Assembler::bne_op || + op == Assembler::blt_op || op == Assembler::bge_op || + op == Assembler::bltu_op || op == Assembler::bgeu_op; +} + +bool NativeInstruction::is_float_branch() { + return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; +} + +bool NativeInstruction::is_lu12iw_lu32id() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; +} + +bool NativeInstruction::is_pcaddu12i_add() const { + return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && + Assembler::high(int_at(4), 10) == Assembler::addi_d_op; +} + +bool NativeCall::is_bl() const { + return Assembler::high(int_at(0), 6) == Assembler::bl_op; +} + +void NativeCall::verify() { + assert(is_bl(), "not a NativeCall"); +} + +address NativeCall::target_addr_for_bl(address orig_addr) const { + address addr = orig_addr ? orig_addr : addr_at(0); + + // bl + if (is_bl()) { + return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | + ((int_at(0) >> 10) & 0xffff)) << 2); + } + + fatal("not a NativeCall"); + return NULL; +} + +address NativeCall::destination() const { + address addr = (address)this; + address destination = target_addr_for_bl(); + // Do we use a trampoline stub for this call? + // Trampoline stubs are located behind the main code. + if (destination > addr) { + // Filter out recursive method invocation (call to verified/unverified entry point). + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + NativeInstruction* ni = nativeInstruction_at(destination); + if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + } + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + address addr_call = addr_at(0); + bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + + // Patch the call. + if (!reachable) { + address trampoline_stub_addr = get_trampoline(); + assert (trampoline_stub_addr != NULL, "we need a trampoline"); + guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); + + // Patch the constant in the call's trampoline stub. + NativeInstruction* ni = nativeInstruction_at(dest); + assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + dest = trampoline_stub_addr; + } + set_destination(dest); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + address bl_destination + = nativeCall_at(call_addr)->target_addr_for_bl(); + NativeInstruction* ni = nativeInstruction_at(bl_destination); + if (code->contains(bl_destination) && + ni->is_NativeCallTrampolineStub_at()) + return bl_destination; + + if (code->is_nmethod()) { + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); + } + + return NULL; +} + +void NativeCall::set_destination(address dest) { + address addr_call = addr_at(0); + CodeBuffer cb(addr_call, instruction_size); + MacroAssembler masm(&cb); + assert(is_call_at(addr_call), "unexpected call type"); + jlong offs = dest - addr_call; + masm.bl(offs >> 2); + ICache::invalidate_range(addr_call, instruction_size); +} + +// Generate a trampoline for a branch to dest. If there's no need for a +// trampoline, simply patch the call directly to dest. +address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) { + MacroAssembler a(&cbuf); + address stub = NULL; + + if (a.far_branches() + && ! is_NativeCallTrampolineStub_at()) { + stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest); + } + + if (stub == NULL) { + // If we generated no stub, patch this call directly to dest. + // This will happen if we don't need far branches or if there + // already was a trampoline. + set_destination(dest); + } + + return stub; +} + +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// MT-safe patching of a call instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +bool NativeFarCall::is_short() const { + return Assembler::high(int_at(0), 10) == Assembler::andi_op && + Assembler::low(int_at(0), 22) == 0 && + Assembler::high(int_at(4), 6) == Assembler::bl_op; +} + +bool NativeFarCall::is_far() const { + return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && + Assembler::high(int_at(4), 6) == Assembler::jirl_op && + Assembler::low(int_at(4), 5) == RA->encoding(); +} + +address NativeFarCall::destination(address orig_addr) const { + address addr = orig_addr ? orig_addr : addr_at(0); + + if (is_short()) { + // short + return addr + BytesPerInstWord + + (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | + ((int_at(4) >> 10) & 0xffff)) << 2); + } + + if (is_far()) { + // far + return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + + (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); + } + + fatal("not a NativeFarCall"); + return NULL; +} + +void NativeFarCall::set_destination(address dest) { + address addr_call = addr_at(0); + CodeBuffer cb(addr_call, instruction_size); + MacroAssembler masm(&cb); + assert(is_far_call_at(addr_call), "unexpected call type"); + masm.patchable_call(dest, addr_call); + ICache::invalidate_range(addr_call, instruction_size); +} + +void NativeFarCall::verify() { + assert(is_short() || is_far(), "not a NativeFarcall"); +} + +//------------------------------------------------------------------- + +bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::ori_op && + Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; +} + +bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_lu12iw_2nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::andi_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_lu12iw_ori_nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::ori_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_addid_2nop() const { + return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && + Assembler::high(int_at(4), 10) == Assembler::andi_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +void NativeMovConstReg::verify() { + assert(is_li52(), "not a mov reg, imm52"); +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +intptr_t NativeMovConstReg::data() const { + if (is_lu12iw_ori_lu32id()) { + return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), + (intptr_t)((int_at(0) >> 5) & 0xfffff), + (intptr_t)((int_at(8) >> 5) & 0xfffff)); + } + + if (is_lu12iw_lu32id_nop()) { + return Assembler::merge((intptr_t)0, + (intptr_t)((int_at(0) >> 5) & 0xfffff), + (intptr_t)((int_at(4) >> 5) & 0xfffff)); + } + + if (is_lu12iw_2nop()) { + return Assembler::merge((intptr_t)0, + (intptr_t)((int_at(0) >> 5) & 0xfffff)); + } + + if (is_lu12iw_ori_nop()) { + return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), + (intptr_t)((int_at(0) >> 5) & 0xfffff)); + } + + if (is_addid_2nop()) { + return Assembler::simm12((int_at(0) >> 10) & 0xfff); + } + +#ifndef PRODUCT + Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); +#endif + fatal("not a mov reg, imm52"); + return 0; // unreachable +} + +void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { + CodeBuffer cb(addr_at(0), instruction_size); + MacroAssembler masm(&cb); + masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); + ICache::invalidate_range(addr_at(0), instruction_size); + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); + nmethod* nm = blob->as_nmethod_or_null(); + if (nm != NULL) { + o = o ? o : x; + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(o); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)o; + break; + } + } + } +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::offset() const{ + //TODO: LA + guarantee(0, "LA not implemented yet"); + return 0; // mute compiler +} + +void NativeMovRegMem::set_offset(int x) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +void NativeMovRegMem::verify() { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + + +void NativeMovRegMem::print() { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == NativeIllegalInstruction::instruction_code; +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = instruction_code; + ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeJump::verify() { + assert(is_short() || is_far(), "not a general jump instruction"); +} + +bool NativeJump::is_short() { + return Assembler::high(insn_word(), 6) == Assembler::b_op; +} + +bool NativeJump::is_far() { + return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && + Assembler::high(int_at(4), 6) == Assembler::jirl_op && + Assembler::low(int_at(4), 5) == R0->encoding(); +} + +address NativeJump::jump_destination(address orig_addr) { + address addr = orig_addr ? orig_addr : addr_at(0); + address ret = (address)-1; + + // short + if (is_short()) { + ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | + ((int_at(0) >> 10) & 0xffff)) << 2); + return ret == instruction_address() ? (address)-1 : ret; + } + + // far + if (is_far()) { + ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + + (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); + return ret == instruction_address() ? (address)-1 : ret; + } + + fatal("not a jump"); + return NULL; +} + +void NativeJump::set_jump_destination(address dest) { + OrderAccess::fence(); + + CodeBuffer cb(addr_at(0), instruction_size); + MacroAssembler masm(&cb); + masm.patchable_jump(dest); + ICache::invalidate_range(addr_at(0), instruction_size); +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// MT-safe patching of a long jump instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// Must ensure atomicity +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + jlong offs = dest - verified_entry; + + if (MacroAssembler::reachable_from_branch_short(offs)) { + CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.b(dest); + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie + NativeIllegalInstruction::insert(verified_entry); + } + ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); +} + +bool NativeInstruction::is_dtrace_trap() { + //return (*(int32_t*)this & 0xff) == 0xcc; + Unimplemented(); + return false; +} + +bool NativeInstruction::is_safepoint_poll() { + // + // 390 li T2, 0x0000000000400000 #@loadConP + // 394 st_w [SP + #12], V1 # spill 9 + // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 + // + // 0x000000ffe5815130: lu12i_w t2, 0x40 + // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} + // ;*goto + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + + // Since there may be some spill instructions between the safePoint_poll and loadConP, + // we check the safepoint instruction like this. + return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && + Assembler::low(insn_word(), 5) == AT->encoding(); +} diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp new file mode 100644 index 00000000000..a6e9d4dd3c6 --- /dev/null +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp @@ -0,0 +1,528 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP +#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/safepointMechanism.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction { + friend class Relocation; + + public: + enum loongarch_specific_constants { + nop_instruction_code = 0, + nop_instruction_size = 4, + sync_instruction_code = 0xf, + sync_instruction_size = 4 + }; + + bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } + bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } + bool is_dtrace_trap(); + inline bool is_call(); + inline bool is_far_call(); + inline bool is_illegal(); + bool is_jump(); + bool is_safepoint_poll(); + + // Helper func for jvmci + bool is_lu12iw_lu32id() const; + bool is_pcaddu12i_add() const; + + // LoongArch has no instruction to generate a illegal instrucion exception? + // But `break 11` is not illegal instruction for LoongArch. + static int illegal_instruction(); + + bool is_int_branch(); + bool is_float_branch(); + + inline bool is_NativeCallTrampolineStub_at(); + //We use an illegal instruction for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(BytesPerInstWord); } + address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + int long_at(int offset) const { return *(jint*)addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + void set_long_at(int offset, long i); + + int insn_word() const { return long_at(0); } + + void wrote(int offset); + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + //inst->verify(); +#endif + return inst; +} + +inline NativeCall* nativeCall_at(address address); + +// The NativeCall is an abstraction for accessing/manipulating native call +// instructions (used to manipulate inline caches, primitive & dll calls, etc.). +class NativeCall: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 1 * BytesPerInstWord, + return_address_offset = 1 * BytesPerInstWord, + displacement_offset = 0 + }; + + // We have only bl. + bool is_bl() const; + + address instruction_address() const { return addr_at(instruction_offset); } + + address next_instruction_address() const { + return addr_at(return_address_offset); + } + + address return_address() const { + return next_instruction_address(); + } + + address target_addr_for_bl(address orig_addr = 0) const; + address destination() const; + void set_destination(address dest); + + void verify_alignment() {} + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - return_address_offset); + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate bl + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); + address trampoline_jump(CodeBuffer &cbuf, address dest); +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// The NativeFarCall is an abstraction for accessing/manipulating native +// call-anywhere instructions. +// Used to call native methods which may be loaded anywhere in the address +// space, possibly out of reach of a call instruction. +class NativeFarCall: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 2 * BytesPerInstWord + }; + + address instruction_address() const { return addr_at(instruction_offset); } + + // We use MacroAssembler::patchable_call() for implementing a + // call-anywhere instruction. + bool is_short() const; + bool is_far() const; + + // Checks whether instr points at a NativeFarCall instruction. + static bool is_far_call_at(address address) { + return nativeInstruction_at(address)->is_far_call(); + } + + // Returns the NativeFarCall's destination. + address destination(address orig_addr = 0) const; + + // Sets the NativeFarCall's destination, not necessarily mt-safe. + // Used when relocating code. + void set_destination(address dest); + + void verify(); +}; + +// Instantiates a NativeFarCall object starting at the given instruction +// address and returns the NativeFarCall object. +inline NativeFarCall* nativeFarCall_at(address address) { + NativeFarCall* call = (NativeFarCall*)address; +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// An interface for accessing/manipulating native set_oop imm, reg instructions +// (used to manipulate inlined data references, etc.). +class NativeMovConstReg: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 3 * BytesPerInstWord, + next_instruction_offset = 3 * BytesPerInstWord, + }; + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + intptr_t data() const; + void set_data(intptr_t x, intptr_t o = 0); + + bool is_li52() const { + return is_lu12iw_ori_lu32id() || + is_lu12iw_lu32id_nop() || + is_lu12iw_2nop() || + is_lu12iw_ori_nop() || + is_addid_2nop(); + } + bool is_lu12iw_ori_lu32id() const; + bool is_lu12iw_lu32id_nop() const; + bool is_lu12iw_2nop() const; + bool is_lu12iw_ori_nop() const; + bool is_addid_2nop() const; + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +class NativeMovRegMem: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 4, + hiword_offset = 4, + ldst_offset = 12, + immediate_size = 4, + ldst_size = 16 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + + int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { + NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +// Handles all kinds of jump on Loongson. +// short: +// b offs26 +// nop +// +// far: +// pcaddu18i reg, si20 +// jirl r0, reg, si18 +// +class NativeJump: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 2 * BytesPerInstWord + }; + + bool is_short(); + bool is_far(); + + address instruction_address() const { return addr_at(instruction_offset); } + address jump_destination(address orig_addr = 0); + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry) { Unimplemented(); } + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry){} + static void patch_verified_entry(address entry, address verified_entry, address dest); + + void verify(); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); + debug_only(jump->verify();) + return jump; +} + +class NativeGeneralJump: public NativeJump { + public: + // Creation + inline friend NativeGeneralJump* nativeGeneralJump_at(address address); + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativeIllegalInstruction: public NativeInstruction { +public: + enum loongarch_specific_constants { + instruction_code = 0xbadc0de0, // TODO: LA + // Temporary LoongArch reserved instruction + instruction_size = 4, + instruction_offset = 0, + next_instruction_offset = 4 + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } + +inline bool NativeInstruction::is_call() { + NativeCall *call = (NativeCall*)instruction_address(); + return call->is_bl(); +} + +inline bool NativeInstruction::is_far_call() { + NativeFarCall *call = (NativeFarCall*)instruction_address(); + + // short + if (call->is_short()) { + return true; + } + + // far + if (call->is_far()) { + return true; + } + + return false; +} + +inline bool NativeInstruction::is_jump() +{ + NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); + + // short + if (jump->is_short()) { + return true; + } + + // far + if (jump->is_far()) { + return true; + } + + return false; +} + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum la_specific_constants { + instruction_size = 6 * 4, + instruction_offset = 0, + data_offset = 4 * 4, + next_instruction_offset = 6 * 4 + }; + + address destination() const { + return (address)ptr_at(data_offset); + } + + void set_destination(address new_destination) { + set_ptr_at(data_offset, (intptr_t)new_destination); + OrderAccess::fence(); + } +}; + +// Note: Other stubs must not begin with this pattern. +inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { + // pcaddi + // ld_d + // jirl + return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && + Assembler::high(int_at(4), 10) == Assembler::ld_d_op && + Assembler::high(int_at(8), 6) == Assembler::jirl_op && + Assembler::low(int_at(8), 5) == R0->encoding(); +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + NativeInstruction* ni = nativeInstruction_at(addr); + assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} + +class NativeMembar : public NativeInstruction { +public: + unsigned int get_hint() { return Assembler::low(insn_word(), 4); } + void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); } +}; + +#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp new file mode 100644 index 00000000000..e9f0fc280d0 --- /dev/null +++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP +#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: +#ifndef CORE + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} +#endif + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp new file mode 100644 index 00000000000..58f40b747c2 --- /dev/null +++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_loongarch.hpp" +#ifdef TARGET_ARCH_MODEL_loongarch_32 +# include "interp_masm_loongarch_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_loongarch_64 +# include "interp_masm_loongarch_64.hpp" +#endif + +REGISTER_DEFINITION(Register, noreg); +REGISTER_DEFINITION(Register, r0); +REGISTER_DEFINITION(Register, r1); +REGISTER_DEFINITION(Register, r2); +REGISTER_DEFINITION(Register, r3); +REGISTER_DEFINITION(Register, r4); +REGISTER_DEFINITION(Register, r5); +REGISTER_DEFINITION(Register, r6); +REGISTER_DEFINITION(Register, r7); +REGISTER_DEFINITION(Register, r8); +REGISTER_DEFINITION(Register, r9); +REGISTER_DEFINITION(Register, r10); +REGISTER_DEFINITION(Register, r11); +REGISTER_DEFINITION(Register, r12); +REGISTER_DEFINITION(Register, r13); +REGISTER_DEFINITION(Register, r14); +REGISTER_DEFINITION(Register, r15); +REGISTER_DEFINITION(Register, r16); +REGISTER_DEFINITION(Register, r17); +REGISTER_DEFINITION(Register, r18); +REGISTER_DEFINITION(Register, r19); +REGISTER_DEFINITION(Register, r20); +REGISTER_DEFINITION(Register, r21); +REGISTER_DEFINITION(Register, r22); +REGISTER_DEFINITION(Register, r23); +REGISTER_DEFINITION(Register, r24); +REGISTER_DEFINITION(Register, r25); +REGISTER_DEFINITION(Register, r26); +REGISTER_DEFINITION(Register, r27); +REGISTER_DEFINITION(Register, r28); +REGISTER_DEFINITION(Register, r29); +REGISTER_DEFINITION(Register, r30); +REGISTER_DEFINITION(Register, r31); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp new file mode 100644 index 00000000000..54d90167a52 --- /dev/null +++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_loongarch.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", + "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} + +const char* ConditionalFlagRegisterImpl::name() const { + const char* names[number_of_registers] = { + "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", + }; + return is_valid() ? names[encoding()] : "fccnoreg"; +} diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp new file mode 100644 index 00000000000..da876a50835 --- /dev/null +++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP +#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP + +#include "asm/register.hpp" +#include "utilities/formatBuffer.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + max_slots_per_register = 2 + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + +// The integer registers of the LoongArch architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define NOREG ((Register)(noreg_RegisterEnumValue)) + +#define R0 ((Register)(r0_RegisterEnumValue)) +#define R1 ((Register)(r1_RegisterEnumValue)) +#define R2 ((Register)(r2_RegisterEnumValue)) +#define R3 ((Register)(r3_RegisterEnumValue)) +#define R4 ((Register)(r4_RegisterEnumValue)) +#define R5 ((Register)(r5_RegisterEnumValue)) +#define R6 ((Register)(r6_RegisterEnumValue)) +#define R7 ((Register)(r7_RegisterEnumValue)) +#define R8 ((Register)(r8_RegisterEnumValue)) +#define R9 ((Register)(r9_RegisterEnumValue)) +#define R10 ((Register)(r10_RegisterEnumValue)) +#define R11 ((Register)(r11_RegisterEnumValue)) +#define R12 ((Register)(r12_RegisterEnumValue)) +#define R13 ((Register)(r13_RegisterEnumValue)) +#define R14 ((Register)(r14_RegisterEnumValue)) +#define R15 ((Register)(r15_RegisterEnumValue)) +#define R16 ((Register)(r16_RegisterEnumValue)) +#define R17 ((Register)(r17_RegisterEnumValue)) +#define R18 ((Register)(r18_RegisterEnumValue)) +#define R19 ((Register)(r19_RegisterEnumValue)) +#define R20 ((Register)(r20_RegisterEnumValue)) +#define R21 ((Register)(r21_RegisterEnumValue)) +#define R22 ((Register)(r22_RegisterEnumValue)) +#define R23 ((Register)(r23_RegisterEnumValue)) +#define R24 ((Register)(r24_RegisterEnumValue)) +#define R25 ((Register)(r25_RegisterEnumValue)) +#define R26 ((Register)(r26_RegisterEnumValue)) +#define R27 ((Register)(r27_RegisterEnumValue)) +#define R28 ((Register)(r28_RegisterEnumValue)) +#define R29 ((Register)(r29_RegisterEnumValue)) +#define R30 ((Register)(r30_RegisterEnumValue)) +#define R31 ((Register)(r31_RegisterEnumValue)) + + +#define RA R1 +#define TP R2 +#define SP R3 +#define A0 R4 +#define A1 R5 +#define A2 R6 +#define A3 R7 +#define A4 R8 +#define A5 R9 +#define A6 R10 +#define A7 R11 +#define RT0 R12 +#define RT1 R13 +#define RT2 R14 +#define RT3 R15 +#define RT4 R16 +#define RT5 R17 +#define RT6 R18 +#define RT7 R19 +#define RT8 R20 +#define RX R21 +#define FP R22 +#define S0 R23 +#define S1 R24 +#define S2 R25 +#define S3 R26 +#define S4 R27 +#define S5 R28 +#define S6 R29 +#define S7 R30 +#define S8 R31 + +#define c_rarg0 RT0 +#define c_rarg1 RT1 +#define Rmethod S3 +#define Rsender S4 +#define Rnext S1 + +#define V0 A0 +#define V1 A1 + +#define SCR1 RT7 +#define SCR2 RT4 + +//for interpreter frame +// bytecode pointer register +#define BCP S0 +// local variable pointer register +#define LVP S7 +// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM +// be sure to save and restore its value in call_stub +#define TSR S2 + +#define OPT_THREAD 1 + +#define TREG S6 + +#define S5_heapbase S5 + +#define FSR V0 +#define SSR T6 +#define FSF FV0 + +#define RECEIVER T0 +#define IC_Klass T1 + +#define SHIFT_count T3 + +// ---------- Scratch Register ---------- +#define AT RT7 +#define fscratch F23 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the LoongArch architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + save_slots_per_register = 2, + slots_per_lsx_register = 4, + slots_per_lasx_register = 8, + max_slots_per_register = 8 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) +#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) +#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) +#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) +#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) +#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) +#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) +#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) +#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) +#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) +#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) +#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) +#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) +#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) +#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) +#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) +#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) +#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) +#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) +#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) +#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) +#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) +#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) +#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) +#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) +#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) +#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) +#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) +#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) +#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) +#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) +#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) + +#define FA0 F0 +#define FA1 F1 +#define FA2 F2 +#define FA3 F3 +#define FA4 F4 +#define FA5 F5 +#define FA6 F6 +#define FA7 F7 + +#define FV0 F0 +#define FV1 F1 + +#define FT0 F8 +#define FT1 F9 +#define FT2 F10 +#define FT3 F11 +#define FT4 F12 +#define FT5 F13 +#define FT6 F14 +#define FT7 F15 +#define FT8 F16 +#define FT9 F17 +#define FT10 F18 +#define FT11 F19 +#define FT12 F20 +#define FT13 F21 +#define FT14 F22 +#define FT15 F23 + +#define FS0 F24 +#define FS1 F25 +#define FS2 F26 +#define FS3 F27 +#define FS4 F28 +#define FS5 F29 +#define FS6 F30 +#define FS7 F31 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use ConditionalFlagRegister as shortcut +class ConditionalFlagRegisterImpl; +typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; + +inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { + return (ConditionalFlagRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the LoongArch architecture +class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { + public: + enum { +// conditionalflag_arg_base = 12, + number_of_registers = 8 + }; + + // construction + inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) +#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) +#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) +#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) +#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) +#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) +#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) +#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) +#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) + +#endif // DONT_USE_REGISTER_DEFINES + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + }; + + static const int max_gpr; + static const int max_fpr; +}; + +// A set of registers +template +class AbstractRegSet { + uint32_t _bitset; + + AbstractRegSet(uint32_t bitset) : _bitset(bitset) { } + +public: + + AbstractRegSet() : _bitset(0) { } + + AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { } + + AbstractRegSet operator+(const AbstractRegSet aSet) const { + AbstractRegSet result(_bitset | aSet._bitset); + return result; + } + + AbstractRegSet operator-(const AbstractRegSet aSet) const { + AbstractRegSet result(_bitset & ~aSet._bitset); + return result; + } + + AbstractRegSet &operator+=(const AbstractRegSet aSet) { + *this = *this + aSet; + return *this; + } + + AbstractRegSet &operator-=(const AbstractRegSet aSet) { + *this = *this - aSet; + return *this; + } + + static AbstractRegSet of(RegImpl r1) { + return AbstractRegSet(r1); + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2) { + return of(r1) + r2; + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) { + return of(r1, r2) + r3; + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) { + return of(r1, r2, r3) + r4; + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) { + return of(r1, r2, r3, r4) + r5; + } + + static AbstractRegSet range(RegImpl start, RegImpl end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + + return AbstractRegSet(bits); + } + + uint32_t bits() const { return _bitset; } +}; + +typedef AbstractRegSet RegSet; + +#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp new file mode 100644 index 00000000000..1caba436995 --- /dev/null +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "compiler/disassembler.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/oop.hpp" +#include "oops/klass.inline.hpp" +#include "runtime/safepoint.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + x += o; + typedef Assembler::WhichOperand WhichOperand; + WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop + assert(which == Assembler::disp32_operand || + which == Assembler::narrow_oop_operand || + which == Assembler::imm_operand, "format unpacks ok"); + if (type() == relocInfo::internal_word_type || + type() == relocInfo::section_word_type) { + MacroAssembler::pd_patch_instruction(addr(), x); + } else if (which == Assembler::imm_operand) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); + } + } else if (which == Assembler::narrow_oop_operand) { + // both compressed oops and compressed classes look the same + if (Universe::heap()->is_in_reserved((oop)x)) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); + } + } else { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); + } + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. + assert(0, "call32_operand not supported in LoongArch64"); + } +} + + +address Relocation::pd_call_destination(address orig_addr) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_far_call()) { + return nativeFarCall_at(addr())->destination(orig_addr); + } else if (ni->is_call()) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } else { + address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); + // If call is branch to self, don't try to relocate it, just leave it + // as branch to self. This happens during code generation if the code + // buffer expands. It will be relocated to the trampoline above once + // code generation is complete. + return (new_addr == orig_addr) ? addr() : new_addr; + } + } else if (ni->is_jump()) { + return nativeGeneralJump_at(addr())->jump_destination(orig_addr); + } else { + tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); + Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); + ShouldNotReachHere(); + return NULL; + } +} + +void Relocation::pd_set_call_destination(address x) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_far_call()) { + nativeFarCall_at(addr())->set_destination(x); + } else if (ni->is_call()) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + nativeCall_at(addr())->set_destination_mt_safe(x, false); + } else { + nativeCall_at(addr())->set_destination(x); + } + } else if (ni->is_jump()) { + nativeGeneralJump_at(addr())->set_jump_destination(x); + } else { + ShouldNotReachHere(); + } +} + +address* Relocation::pd_address_in_code() { + return (address*)addr(); +} + +address Relocation::pd_get_address_from_code() { + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + return (address)ni->data(); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp new file mode 100644 index 00000000000..c85ca4963f3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP +#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since LoongArch instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // imm_oop_operand vs. narrow_oop_operand + format_width = 2 + }; + + public: + + static bool mustIterateImmediateOopsInCode() { return false; } + +#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp new file mode 100644 index 00000000000..334c783b377 --- /dev/null +++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_loongarch.inline.hpp" +#endif + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +//-------------- generate_exception_blob ----------- +// creates _exception_blob. +// The exception blob is jumped to from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jump, and left with a jump. +// +// Arguments: +// V0: exception oop +// V1: exception pc +// +// Results: +// A0: exception oop +// A1: exception pc in caller or ??? +// jumps to: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +// [stubGenerator_loongarch_64.cpp] generate_forward_exception() +// |- V0, V1 are created +// |- T4 <= SharedRuntime::exception_handler_for_return_address +// `- jr T4 +// `- the caller's exception_handler +// `- jr OptoRuntime::exception_blob +// `- here +// +void OptoRuntime::generate_exception_blob() { + // Capture info about frame layout + enum layout { + fp_off, + return_off, // slot for return address + framesize + }; + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer("exception_blob", 5120, 5120); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + + __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! + + // this frame will be treated as the original caller method. + // So, the return pc should be filled with the original exception pc. + // ref: X86's implementation + __ st_d(V1, SP, return_off * wordSize); // return address + __ st_d(FP, SP, fp_off * wordSize); + + // Save callee saved registers. None for UseSSE=0, + // floats-only for UseSSE=1, and doubles for UseSSE=2. + + __ addi_d(FP, SP, fp_off * wordSize); + + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); + __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(thread, NOREG, NOREG, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ move(A0, thread); + // TODO: confirm reloc + __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + + oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(thread, true); + + // Pop self-frame. + __ leave(); // Epilog! + + // V0: exception handler + + // We have a handler in V0, (could be deopt blob) + __ move(T4, V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // Get the exception + __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); + __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); + + // Fix seg fault when running: + // Eclipse + Plugin + Debug As + // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() + // + __ move(V0, A0); + __ move(V1, A1); + + // V0: exception oop + // T4: exception handler + // A1: exception pc + __ jr(T4); + + // make sure all code is generated + masm->flush(); + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); +} diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp new file mode 100644 index 00000000000..bc91ee005eb --- /dev/null +++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp @@ -0,0 +1,3621 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/compiledICHolder.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_loongarch.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif +#if INCLUDE_JVMCI +#include "jvmci/jvmciJavaClasses.hpp" +#endif + +#include + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class RegisterSaver { + // Capture info about frame layout + enum layout { + fpr0_off = 0, + fpr1_off, + fpr2_off, + fpr3_off, + fpr4_off, + fpr5_off, + fpr6_off, + fpr7_off, + fpr8_off, + fpr9_off, + fpr10_off, + fpr11_off, + fpr12_off, + fpr13_off, + fpr14_off, + fpr15_off, + fpr16_off, + fpr17_off, + fpr18_off, + fpr19_off, + fpr20_off, + fpr21_off, + fpr22_off, + fpr23_off, + fpr24_off, + fpr25_off, + fpr26_off, + fpr27_off, + fpr28_off, + fpr29_off, + fpr30_off, + fpr31_off, + a0_off, + a1_off, + a2_off, + a3_off, + a4_off, + a5_off, + a6_off, + a7_off, + t0_off, + t1_off, + t2_off, + t3_off, + t4_off, + t5_off, + t6_off, + t7_off, + t8_off, + s0_off, + s1_off, + s2_off, + s3_off, + s4_off, + s5_off, + s6_off, + s7_off, + s8_off, + fp_off, + ra_off, + fpr_size = fpr31_off - fpr0_off + 1, + gpr_size = ra_off - a0_off + 1, + }; + + const bool _save_vectors; + public: + RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} + + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); + + int slots_save() { + int slots = gpr_size * VMRegImpl::slots_per_word; + + if (_save_vectors && UseLASX) + slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; + else if (_save_vectors && UseLSX) + slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; + else + slots += FloatRegisterImpl::save_slots_per_register * fpr_size; + + return slots; + } + + int gpr_offset(int off) { + int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; + int slots_per_gpr = VMRegImpl::slots_per_word; + + if (_save_vectors && UseLASX) + slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; + else if (_save_vectors && UseLSX) + slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; + + return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; + } + + int fpr_offset(int off) { + int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; + + if (_save_vectors && UseLASX) + slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; + else if (_save_vectors && UseLSX) + slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; + + return off * slots_per_fpr * VMRegImpl::stack_slot_size; + } + + int ra_offset() { return gpr_offset(ra_off); } + int t5_offset() { return gpr_offset(t5_off); } + int s3_offset() { return gpr_offset(s3_off); } + int v0_offset() { return gpr_offset(a0_off); } + int v1_offset() { return gpr_offset(a1_off); } + + int fpr0_offset() { return fpr_offset(fpr0_off); } + int fpr1_offset() { return fpr_offset(fpr1_off); } + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { + + // Always make the frame size 16-byte aligned + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + + *total_frame_words = frame_size_in_words; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap(frame_size_in_slots, 0); + + // save registers + __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); + + for (int i = 0; i < fpr_size; i++) { + FloatRegister fpr = as_FloatRegister(i); + int off = fpr_offset(i); + + if (_save_vectors && UseLASX) + __ xvst(fpr, SP, off); + else if (_save_vectors && UseLSX) + __ vst(fpr, SP, off); + else + __ fst_d(fpr, SP, off); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); + } + + for (int i = a0_off; i <= a7_off; i++) { + Register gpr = as_Register(A0->encoding() + (i - a0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + + for (int i = t0_off; i <= t6_off; i++) { + Register gpr = as_Register(T0->encoding() + (i - t0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + __ st_d(T8, SP, gpr_offset(t8_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); + + for (int i = s0_off; i <= s8_off; i++) { + Register gpr = as_Register(S0->encoding() + (i - s0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + + __ st_d(FP, SP, gpr_offset(fp_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); + __ st_d(RA, SP, gpr_offset(ra_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); + + __ addi_d(FP, SP, gpr_offset(fp_off)); + + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + for (int i = 0; i < fpr_size; i++) { + FloatRegister fpr = as_FloatRegister(i); + int off = fpr_offset(i); + + if (_save_vectors && UseLASX) + __ xvld(fpr, SP, off); + else if (_save_vectors && UseLSX) + __ vld(fpr, SP, off); + else + __ fld_d(fpr, SP, off); + } + + for (int i = a0_off; i <= a7_off; i++) { + Register gpr = as_Register(A0->encoding() + (i - a0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + + for (int i = t0_off; i <= t6_off; i++) { + Register gpr = as_Register(T0->encoding() + (i - t0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + __ ld_d(T8, SP, gpr_offset(t8_off)); + + for (int i = s0_off; i <= s8_off; i++) { + Register gpr = as_Register(S0->encoding() + (i - s0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + + __ ld_d(FP, SP, gpr_offset(fp_off)); + __ ld_d(RA, SP, gpr_offset(ra_off)); + + __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); +} + +// Pop the current frame and restore the registers that might be holding +// a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restore to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + __ ld_d(V0, SP, gpr_offset(a0_off)); + __ ld_d(V1, SP, gpr_offset(a1_off)); + + __ fld_d(F0, SP, fpr_offset(fpr0_off)); + __ fld_d(F1, SP, fpr_offset(fpr1_off)); + + __ addi_d(SP, SP, gpr_offset(ra_off)); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 8 bytes registers are saved by default using fld/fst instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 8; +} + +size_t SharedRuntime::trampoline_size() { + return 32; +} + +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { + // trampoline is not in CodeCache + __ li(T4, (long)destination); + __ jr(T4); +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. + +static int reg2offset_in(VMReg r) { + // Account for saved fp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than SharedInfo::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 32-bit +// integer registers. + +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + + +// --------------------------------------------------------------------------- +// The compiled Java calling convention. +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + // Create the mapping between argument positions and registers. + static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { + T0, A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_register_parameters + 1) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (int_args < Argument::n_register_parameters + 1) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, L); + // Schedule the branch target address early. + // Call into the VM to patch the caller, then jump to compiled callee + // T5 isn't live so capture return address while we easily can + __ move(T5, RA); + + __ pushad(); +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // VM needs caller's callsite + // VM needs target method + + __ move(A0, Rmethod); + __ move(A1, T5); + // we should preserve the return address + __ move(TSR, SP); + __ li(AT, -(StackAlignmentInBytes)); // align the stack + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), + relocInfo::runtime_call_type); + + __ move(SP, TSR); + __ popad(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + // However we will run interpreted if we come thru here. The next pass + // thru the call site will run compiled. If we ran compiled here then + // we can (theorectically) do endless i2c->c2i->i2c transitions during + // deopt/uncommon trap cycles. If we always go interpreted here then + // we can have at most one and don't need to play any tricks to keep + // from endlessly growing the stack. + // + // Actually if we detected that we had an i2c->c2i transition here we + // ought to be able to reset the world back to the state of the interpreted + // call and not bother building another interpreter arg area. We don't + // do that at this point. + + patch_callers_callsite(masm); + __ bind(skip_fixup); + +#ifdef COMPILER2 + __ empty_FPU_stack(); +#endif + //this is for native ? + // Since all args are passed on the stack, total_args_passed * interpreter_ + // stack_element_size is the + // space we need. + int extraspace = total_args_passed * Interpreter::stackElementSize; + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + // Get return address + __ move(T5, RA); + // set senderSP value + //refer to interpreter_loongarch.cpp:generate_asm_entry + __ move(Rsender, SP); + __ addi_d(SP, SP, -extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // st_off points to lowest address on stack. + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; + // Say 4 args: + // i st_off + // 0 12 T_LONG + // 1 8 T_VOID + // 2 4 T_OBJECT + // 3 0 T_BOOL + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use fpu stack top + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + __ ld_ptr(AT, Address(SP, ld_off)); + __ st_ptr(AT, Address(SP, st_off)); + + } else { + + + int next_off = st_off - Interpreter::stackElementSize; + __ ld_ptr(AT, Address(SP, ld_off)); + __ st_ptr(AT, Address(SP, st_off)); + + // Ref to is_Register condition + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ st_ptr(AT, SP, st_off - 8); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ st_d(r, SP, st_off); + } else { + //FIXME, LA will not enter here + // long/double in gpr + __ st_d(r, SP, st_off); + // In [java/util/zip/ZipFile.java] + // + // private static native long open(String name, int mode, long lastModified); + // private static native int getTotal(long jzfile); + // + // We need to transfer T_LONG paramenters from a compiled method to a native method. + // It's a complex process: + // + // Caller -> lir_static_call -> gen_resolve_stub + // -> -- resolve_static_call_C + // `- gen_c2i_adapter() [*] + // | + // `- AdapterHandlerLibrary::get_create_apapter_index + // -> generate_native_entry + // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] + // + // In [**], T_Long parameter is stored in stack as: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // However, the sequence is reversed here: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | | + // (low) + // + // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). + // + if (sig_bt[i] == T_LONG) + __ st_d(r, SP, st_off - 8); + } + } else if (r_1->is_FloatRegister()) { + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ fst_s(fr, SP, st_off); + else { + __ fst_d(fr, SP, st_off); + __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots + } + } + } + + // Schedule the branch target address early. + __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); + // And repush original return address + __ move(RA, T5); + __ jr (AT); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame + // layout. Lesp was saved by the calling I-frame and will be restored on + // return. Meanwhile, outgoing arg space is all owned by the callee + // C-frame, so we can mangle it at will. After adjusting the frame size, + // hoist register arguments and repack other args according to the compiled + // code convention. Finally, end in a jump to the compiled code. The entry + // point address is the start of the buffer. + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the LA side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + __ move(T4, SP); + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); + // Convert 4-byte stack slots to words. + // did LA need round? FIXME + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); + __ addi_d(SP, SP, -comp_words_on_stack * wordSize); + } + + // Align the outgoing SP + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + // push the return address on the stack (note that pushing, rather + // than storing it, yields the correct frame alignment for the callee) + // Put saved SP in another register + const Register saved_sp = T5; + __ move(saved_sp, T4); + + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); + +#if INCLUDE_JVMCI + if (EnableJVMCI) { + // check if this call should be routed towards a specific entry point + __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + Label no_alternative_target; + __ beqz(AT, no_alternative_target); + __ move(T4, AT); + __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + __ bind(no_alternative_target); + } +#endif // INCLUDE_JVMCI + + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to + // account for return address ) + // NOTICE HERE!!!! I sub a wordSize here + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + //+ wordSize; + + if (!r_2->is_valid()) { + __ ld_d(AT, saved_sp, ld_off); + __ st_d(AT, SP, st_off); + } else { + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + // st_off is LSW (i.e. reg.first()) + + // [./org/eclipse/swt/graphics/GC.java] + // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, + // int destX, int destY, int destWidth, int destHeight, + // boolean simple, + // int imgWidth, int imgHeight, + // long maskPixmap, <-- Pass T_LONG in stack + // int maskType); + // Before this modification, Eclipse displays icons with solid black background. + // + __ ld_d(AT, saved_sp, ld_off); + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ ld_d(AT, saved_sp, ld_off - 8); + __ st_d(AT, SP, st_off); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // Remember r_1 is low address (and LSB on LA) + // So r_2 gets loaded from high address regardless of the platform + assert(r_2->as_Register() == r_1->as_Register(), ""); + __ ld_d(r, saved_sp, ld_off); + + // + // For T_LONG type, the real layout is as below: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // We should load the low-8 bytes. + // + if (sig_bt[i] == T_LONG) + __ ld_d(r, saved_sp, ld_off - 8); + } else { + __ ld_w(r, saved_sp, ld_off); + } + } else if (r_1->is_FloatRegister()) { // Float Register + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ fld_s(fr, saved_sp, ld_off); + else { + __ fld_d(fr, saved_sp, ld_off); + __ fld_d(fr, saved_sp, ld_off - 8); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); + + // move methodOop to T5 in case we end up in an c2i adapter. + // the c2i adapters expect methodOop in T5 (c2) because c2's + // resolve stubs return the result (the method) in T5. + // I'd love to fix this. + __ move(T5, Rmethod); + __ jr(T4); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The + // args start out packed in the compiled layout. They need to be unpacked + // into the interpreter layout. This will almost always require some stack + // space. We grow the current (compiled) stack, then repack the args. We + // finally end in a jump to the generic interpreter entry point. On exit + // from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + { + Register holder = T1; + Register receiver = T0; + Register temp = T8; + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + Label missed; + + //add for compressedoops + __ load_klass(temp, receiver); + + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); + __ bne(AT, temp, missed); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, skip_fixup); + __ bind(missed); + + __ jmp(ic_miss, relocInfo::runtime_call_type); + } + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on LA"); + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 + }; + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + +// Example: +// n java.lang.UNIXProcess::forkAndExec +// private native int forkAndExec(byte[] prog, +// byte[] argBlock, int argc, +// byte[] envBlock, int envc, +// byte[] dir, +// boolean redirectErrorStream, +// FileDescriptor stdin_fd, +// FileDescriptor stdout_fd, +// FileDescriptor stderr_fd) +// JNIEXPORT jint JNICALL +// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, +// jobject process, +// jbyteArray prog, +// jbyteArray argBlock, jint argc, +// jbyteArray envBlock, jint envc, +// jbyteArray dir, +// jboolean redirectErrorStream, +// jobject stdin_fd, +// jobject stdout_fd, +// jobject stderr_fd) +// +// ::c_calling_convention +// 0: // env <-- a0 +// 1: L // klass/obj <-- t0 => a1 +// 2: [ // prog[] <-- a0 => a2 +// 3: [ // argBlock[] <-- a1 => a3 +// 4: I // argc <-- a2 => a4 +// 5: [ // envBlock[] <-- a3 => a5 +// 6: I // envc <-- a4 => a5 +// 7: [ // dir[] <-- a5 => a7 +// 8: Z // redirectErrorStream <-- a6 => sp[0] +// 9: L // stdin <-- a7 => sp[8] +// 10: L // stdout fp[16] => sp[16] +// 11: L // stderr fp[24] => sp[24] +// + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ fst_s(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ fst_d(FSF, FP, -wordSize ); + break; + case T_VOID: break; + case T_LONG: + __ st_d(V0, FP, -wordSize); + break; + case T_OBJECT: + case T_ARRAY: + __ st_d(V0, FP, -wordSize); + break; + default: { + __ st_w(V0, FP, -wordSize); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ fld_s(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ fld_d(FSF, FP, -wordSize ); + break; + case T_LONG: + __ ld_d(V0, FP, -wordSize); + break; + case T_VOID: break; + case T_OBJECT: + case T_ARRAY: + __ ld_d(V0, FP, -wordSize); + break; + default: { + __ ld_w(V0, FP, -wordSize); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ push(args[i].first()->as_FloatRegister()); + } + } +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ pop(args[i].first()->as_FloatRegister()); + } + } +} + +// A simple move of integer like type +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld_w(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else { + // stack to reg + __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + if (dst.first() != src.first()){ + __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = T5; + Label nil; + __ xorr(rHandle, rHandle, rHandle); + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ beq(AT, R0, nil); + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); + __ bind(nil); + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(), rHandle); + + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles + const Register rOop = src.first()->as_Register(); + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); + const Register rHandle = T5; + //Important: refer to java_calling_convertion + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + Label skip; + __ st_d( rOop , SP, offset ); + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ xorr( rHandle, rHandle, rHandle); + __ beq(rOop, R0, skip); + __ lea(rHandle, Address(SP, offset)); + __ bind(skip); + // Store the handle parameter + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); + + if (is_receiver) { + *receiver_offset = offset; + } + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + if (src.first()->is_stack()) { + // stack to stack/reg + if (dst.first()->is_stack()) { + __ ld_w(AT, FP, reg2offset_in(src.first())); + __ st_w(AT, SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + // reg to stack/reg + if(dst.first()->is_stack()) { + __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibility for a long_move VMRegPair is: + // 1: two stack slots (possibly unaligned) + // as neither the java or C calling convention will use registers + // for longs. + if (src.first()->is_stack()) { + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); + if( dst.first()->is_stack()){ + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else { + __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ + __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibilities for a double_move VMRegPair are: + // The painful thing here is that like long_move a VMRegPair might be + + // Because of the calling convention we know that src is either + // 1: a single physical register (xmm registers only) + // 2: two stack slots (possibly unaligned) + // dst can only be a pair of stack slots. + + if (src.first()->is_stack()) { + // source is all stack + if( dst.first()->is_stack()){ + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + // reg to stack/reg + // No worries about stack alignment + if( dst.first()->is_stack()){ + __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } + } +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = T4; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = S3; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", iid); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = SSR; // known to be free at this point + __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type, + address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, + "valid size for make_non_entrant"); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + + bool is_critical_native = true; + address native_func = critical_entry; + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // Native nmethod wrappers never take possesion of the oop arguments. + // So the caller will gc the arguments. The only thing we need an + // oopMap for is if the call is static + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + // + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // registers. We must create space for them here that is disjoint from + // the windowed save area because we have no control over when we might + // flush the window again and overwrite values that gc has since modified. + // (The live window race) + // + // We always just allocate 6 word for storing down these object. This allow + // us to simply record the base and use the Ireg number to decide which + // slot to use. (Note that the reg number is the inbound number not the + // outbound number). + // We must shuffle args to match the native convention, and include var-args space. + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = round_to(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place to save return value or as a temporary for any gpr -> fpr moves + // + 2 for return address (which we own) and saved fp + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | vararg area | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + + + // First thing make an ic check to see if we should even be here + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + // We are free to use all registers as temps without saving them and + // restoring them except fp. fp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + //refer to register_loongarch.hpp:IC_Klass + const Register ic_reg = T1; + const Register receiver = T0; + + Label hit; + Label exception_pending; + + __ verify_oop(receiver); + //add for compressedoops + __ load_klass(T4, receiver); + __ beq(T4, ic_reg, hit); + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; +#ifdef COMPILER1 + if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { + // Object.hashCode can pull the hashCode from the header word + // instead of doing a full VM transition once it's been computed. + // Since hashCode is usually polymorphic at call sites we can't do + // this optimization at the call site without a lot of work. + Label slowCase; + Register receiver = T0; + Register result = V0; + __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes()); + // check if locked + __ andi(AT, result, markOopDesc::unlocked_value); + __ beq(AT, R0, slowCase); + if (UseBiasedLocking) { + // Check if biased and fall through to runtime if so + __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); + __ bne(AT, R0, slowCase); + } + // get hash + __ li(AT, markOopDesc::hash_mask_in_place); + __ andr (AT, result, AT); + // test if hashCode exists + __ beq (AT, R0, slowCase); + __ shr(result, markOopDesc::hash_shift); + __ jr(RA); + __ bind (slowCase); + } +#endif // COMPILER1 + + // Generate stack overflow check + if (UseStackBanging) { + __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + } + + // The instruction at the verified entry point must be 4 bytes or longer + // because it can be patched on the fly by make_non_entrant. + if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { + __ nop(); + } + + // Generate a new frame for the wrapper. + // do LA need this ? +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ enter(); + // -2 because return address is already present and so is saved fp + __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // Calculate the difference between sp and fp. We need to know it + // after the native call because on windows Java Natives will pop + // the arguments and it is painful to do sp relative addressing + // in a platform independent way. So after the call we switch to + // fp relative addressing. + //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change + //the SP + int fp_adjustment = stack_size - 2*wordSize; + +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // Compute the fp offset for any slots used after the jni call + + int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; + // We use TREG as a thread pointer because it is callee save and + // if we load it once it is usable thru the entire wrapper + const Register thread = TREG; + + // We use S4 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = S4; + if (is_critical_native) { + Unimplemented(); + // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + // oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmpi, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // and, if static, the class mirror instead of a receiver. This pretty much + // guarantees that register layout will not match (and LA doesn't use reg + // parms though amd does). Since the native abi doesn't use register args + // and the java conventions does we don't have to worry about collisions. + // All of our moved are reg->stack or stack->stack. + // We ignore the extra arguments during the shuffle and handle them at the + // last moment. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + + int c_arg = method->is_static() ? 2 : 1 ; + + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of fp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(T8->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + Unimplemented(); + // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + Unimplemented(); + // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + simple_move32(masm, in_regs[i], out_regs[c_arg]); + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + // Pre-load a static method's oop. Used both by locking code and + // the normal JNI call code. + + __ move(oop_handle_reg, A1); + + if (method->is_static() && !is_critical_native) { + + // load opp into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( + (method->method_holder())->java_mirror())); + + + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + // Now handlize the static class mirror it's known not-null. + __ st_d( oop_handle_reg, SP, klass_offset); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(SP, klass_offset)); + // store the klass handle as second argument + __ move(A1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + Label native_return; + __ set_last_Java_frame(SP, noreg, native_return); + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + save_args(masm, total_c_args, c_arg, out_regs); + int metadata_index = __ oop_recorder()->find_index(method()); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_li52(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + thread, AT); + + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // These are register definitions we need for locking/unlocking + const Register swap_reg = T8; // Must use T8 for cmpxchg instruction + const Register obj_reg = T4; // Will contain the oop + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) + + + + Label slow_path_lock; + Label lock_done; + + // Lock a synchronized method + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ move(oop_handle_reg, A1); + + // Get address of the box + __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); + + // Load the oop from the handle + __ ld_d(obj_reg, oop_handle_reg, 0); + + if (UseBiasedLocking) { + // Note that oop_handle_reg is trashed during this call + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %T8 + __ li(swap_reg, 1); + + __ ld_d(AT, obj_reg, 0); + __ orr(swap_reg, swap_reg, AT); + + __ st_d(swap_reg, lock_reg, mark_word_offset); + __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg + + __ sub_d(swap_reg, swap_reg, SP); + __ li(AT, 3 - os::vm_page_size()); + __ andr(swap_reg , swap_reg, AT); + // Save the test result, for recursive case, the result is zero + __ st_d(swap_reg, lock_reg, mark_word_offset); + __ bne(swap_reg, R0, slow_path_lock); + // Slow path will re-enter here + __ bind(lock_done); + + if (UseBiasedLocking) { + // Re-fetch oop_handle_reg as we trashed it above + __ move(A1, oop_handle_reg); + } + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); + } + + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) + // Load the second arguments into A1 + //__ ld(A1, SP , wordSize ); // klass + + // Now set thread in native + __ addi_d(AT, R0, _thread_in_native); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + // do the call + __ call(native_func, relocInfo::runtime_call_type); + __ bind(native_return); + + oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); + + // WARNING - on Windows Java Natives use pascal calling convention and pop the + // arguments off of the stack. We could just re-adjust the stack pointer here + // and continue to do SP relative addressing but we instead switch to FP + // relative addressing. + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : // nothing to do break; + case T_DOUBLE : + case T_FLOAT : + // Result is in st0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ addi_d(AT, R0, _thread_in_native_trans); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(__ AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, T5); + } + } + + Label after_transition; + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + Label slow_path; + + __ safepoint_poll_acquire(slow_path, thread); + __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ bind(slow_path); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ move(A0, thread); + __ addi_d(SP, SP, -wordSize); + __ push(S2); + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + if (!is_critical_native) { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); + } else { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); + } + __ move(SP, S2); // use S2 as a sender SP holder + __ pop(S2); + __ addi_d(SP, SP, wordSize); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic below. + __ beq(R0, R0, after_transition); + } + + __ bind(Continue); + } + + // change thread state + __ addi_d(AT, R0, _thread_in_Java); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + __ bind(after_transition); + Label reguard; + Label reguard_done; + __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); + __ beq(AT, R0, reguard); + // slow path reguard re-enters here + __ bind(reguard_done); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live + + // Unlock + Label slow_path_unlock; + Label unlock_done; + if (method->is_synchronized()) { + + Label done; + + // Get locked oop from the handle we passed to jni + __ ld_d( obj_reg, oop_handle_reg, 0); + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, T8, done); + + } + + // Simple recursive lock? + + __ ld_d(AT, FP, lock_slot_fp_offset); + __ beq(AT, R0, done); + // Must save FSF if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + // get old displaced header + __ ld_d (T8, FP, lock_slot_fp_offset); + // get address of the stack lock + __ addi_d (c_rarg0, FP, lock_slot_fp_offset); + // Atomic swap old header if oop still contains the stack lock + __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + // Tell dtrace about this method exit + save_native_result(masm, ret_type, stack_slots); + int metadata_index = __ oop_recorder()->find_index( (method())); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_li52(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + thread, AT); + restore_native_result(masm, ret_type, stack_slots); + } + + // We can finally stop using that last_Java_frame we setup ages ago + + __ reset_last_Java_frame(false); + + // Unpack oop result, e.g. JNIHandles::resolve value. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + __ resolve_jobject(V0, thread, T4); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); + } + + if (!is_critical_native) { + // reset handle block + __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); + __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); + } + + if (!is_critical_native) { + // Any exception pending? + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, exception_pending); + } + // no exception, we're almost done + + // check that only result value is on FPU stack + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); + + // Return +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ leave(); + + __ jr(RA); + // Unexpected paths are out of line and go here + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + __ move(A0, obj_reg); + __ move(A1, lock_reg); + __ move(A2, thread); + __ addi_d(SP, SP, - 3*wordSize); + + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ move(SP, S2); + __ addi_d(SP, SP, 3*wordSize); + + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // Slow path unlock + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ push(AT); + __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); + + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + // should be a peal + // +wordSize because of the push above + __ addi_d(A1, FP, lock_slot_fp_offset); + + __ move(A0, obj_reg); + __ move(A2, thread); + __ addi_d(SP, SP, -2*wordSize); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + relocInfo::runtime_call_type); + __ addi_d(SP, SP, 2*wordSize); + __ move(SP, S2); +#ifdef ASSERT + { + Label L; + __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ pop(AT); + __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + // END Slow path unlock + + } + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + + // BEGIN EXCEPTION PROCESSING + if (!is_critical_native) { + // Forward the exception + __ bind(exception_pending); + + // remove possible return value from FPU register stack + __ empty_FPU_stack(); + + // pop our frame + //forward_exception_entry need return address on stack + __ move(SP, FP); + __ pop(FP); + + // and forward the exception + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + } + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + return nm; +} + +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; +static bool offsets_initialized = false; + +static VMRegPair reg64_to_VMRegPair(Register r) { + VMRegPair ret; + if (wordSize == 8) { + ret.set2(r->as_VMReg()); + } else { + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); + } + return ret; +} + + +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { + + + // generate_dtrace_nmethod is guarded by a mutex so we are sure to + // be single threaded in this method. + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); + + // Fill in the signature array, for the calling-convention call. + int total_args_passed = method->size_of_parameters(); + + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); + + // The signature we are going to use for the trap that dtrace will see + // java/lang/String is converted. We drop "this" and any other object + // is converted to NULL. (A one-slot java/lang/Long object reference + // is converted to a two-slot long, which is why we double the allocation). + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); + + int i=0; + int total_strings = 0; + int first_arg_to_pass = 0; + int total_c_args = 0; + + // Skip the receiver as dtrace doesn't want to see it + if( !method->is_static() ) { + in_sig_bt[i++] = T_OBJECT; + first_arg_to_pass = 1; + } + + SignatureStream ss(method->signature()); + for ( ; !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + in_sig_bt[i++] = bt; // Collect remaining bits of signature + out_sig_bt[total_c_args++] = bt; + if( bt == T_OBJECT) { + symbolOop s = ss.as_symbol_or_null(); + if (s == vmSymbols::java_lang_String()) { + total_strings++; + out_sig_bt[total_c_args-1] = T_ADDRESS; + } else if (s == vmSymbols::java_lang_Boolean() || + s == vmSymbols::java_lang_Byte()) { + out_sig_bt[total_c_args-1] = T_BYTE; + } else if (s == vmSymbols::java_lang_Character() || + s == vmSymbols::java_lang_Short()) { + out_sig_bt[total_c_args-1] = T_SHORT; + } else if (s == vmSymbols::java_lang_Integer() || + s == vmSymbols::java_lang_Float()) { + out_sig_bt[total_c_args-1] = T_INT; + } else if (s == vmSymbols::java_lang_Long() || + s == vmSymbols::java_lang_Double()) { + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } + } else if ( bt == T_LONG || bt == T_DOUBLE ) { + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots + // We convert double to long + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } else if ( bt == T_FLOAT) { + // We convert float to int + out_sig_bt[total_c_args-1] = T_INT; + } + } + + assert(i==total_args_passed, "validly parsed signature"); + + // Now get the compiled-Java layout as input arguments + int comp_args_on_stack; + comp_args_on_stack = SharedRuntime::java_calling_convention( + in_sig_bt, in_regs, total_args_passed, false); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the a native (non-jni) function would expect them. To figure out + // where they go we convert the java signature to a C signature and remove + // T_VOID for any long/double we might have received. + + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Plus a temp for possible converion of float/double/long register args + + int conversion_temp = stack_slots; + stack_slots += 2; + + + // Now space for the string(s) we must convert + + int string_locs = stack_slots; + stack_slots += total_strings * + (max_dtrace_string_size / VMRegImpl::stack_slot_size); + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | string[n] | + // |---------------------| <- string_locs[n] + // | string[n-1] | + // |---------------------| <- string_locs[n-1] + // | ... | + // | ... | + // |---------------------| <- string_locs[1] + // | string[0] | + // |---------------------| <- string_locs[0] + // | temp | + // |---------------------| <- conversion_temp + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + + { + Label L; + const Register temp_reg = G3_scratch; + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); + __ verify_oop(O0); + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); + __ cmp(temp_reg, G5_inline_cache_reg); + __ brx(Assembler::equal, true, Assembler::pt, L); + + __ jump_to(ic_miss, 0); + __ align(CodeEntryAlignment); + __ bind(L); + } + + int vep_offset = ((intptr_t)__ pc()) - start; + + // The instruction at the verified entry point must be 4 bytes or longer + // because it can be patched on the fly by make_non_entrant. The stack bang + // instruction fits that requirement. + + // Generate stack overflow check before creating frame + __ generate_stack_overflow_check(stack_size); + + assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, + "valid size for make_non_entrant"); + + // Generate a new frame for the wrapper. + __ save(SP, -stack_size, SP); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + VMRegPair zero; + const Register g0 = G0; // without this we get a compiler warning (why??) + zero.set2(g0->as_VMReg()); + + int c_arg, j_arg; + + Register conversion_off = noreg; + + for (j_arg = first_arg_to_pass, c_arg = 0 ; + j_arg < total_args_passed ; j_arg++, c_arg++ ) { + + VMRegPair src = in_regs[j_arg]; + VMRegPair dst = out_regs[c_arg]; + +#ifdef ASSERT + if (src.first()->is_Register()) { + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); + } else if (src.first()->is_FloatRegister()) { + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)], "ack!"); + } + if (dst.first()->is_Register()) { + reg_destroyed[dst.first()->as_Register()->encoding()] = true; + } else if (dst.first()->is_FloatRegister()) { + freg_destroyed[dst.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)] = true; + } +#endif /* ASSERT */ + + switch (in_sig_bt[j_arg]) { + case T_ARRAY: + case T_OBJECT: + { + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { + // need to unbox a one-slot value + Register in_reg = L0; + Register tmp = L2; + if ( src.first()->is_reg() ) { + in_reg = src.first()->as_Register(); + } else { + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); + } + // If the final destination is an acceptable register + if ( dst.first()->is_reg() ) { + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { + tmp = dst.first()->as_Register(); + } + } + + Label skipUnbox; + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { + __ mov(G0, tmp->successor()); + } + __ mov(G0, tmp); + __ br_null(in_reg, true, Assembler::pn, skipUnbox); + + BasicType bt = out_sig_bt[c_arg]; + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); + switch (bt) { + case T_BYTE: + __ ldub(in_reg, box_offset, tmp); break; + case T_SHORT: + __ lduh(in_reg, box_offset, tmp); break; + case T_INT: + __ ld(in_reg, box_offset, tmp); break; + case T_LONG: + __ ld_long(in_reg, box_offset, tmp); break; + default: ShouldNotReachHere(); + } + + __ bind(skipUnbox); + // If tmp wasn't final destination copy to final destination + if (tmp == L2) { + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); + if (out_sig_bt[c_arg] == T_LONG) { + long_move(masm, tmp_as_VM, dst); + } else { + move32_64(masm, tmp_as_VM, out_regs[c_arg]); + } + } + if (out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // move over the T_VOID to keep the loop indices in sync + } + } else if (out_sig_bt[c_arg] == T_ADDRESS) { + Register s = + src.first()->is_reg() ? src.first()->as_Register() : L2; + Register d = + dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // We store the oop now so that the conversion pass can reach + // while in the inner frame. This will be the only store if + // the oop is NULL. + if (s != L2) { + // src is register + if (d != L2) { + // dst is register + __ mov(s, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } else { + // src not a register + assert(Assembler::is_simm13(reg2offset(src.first()) + + STACK_BIAS), "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); + if (d == L2) { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } else if (out_sig_bt[c_arg] != T_VOID) { + // Convert the arg to NULL + if (dst.first()->is_reg()) { + __ mov(G0, dst.first()->as_Register()); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } + break; + case T_VOID: + break; + + case T_FLOAT: + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + move32_64(masm, src, dst); + } else { + if (dst.first()->is_reg()) { + // freg -> reg + int off = + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + Register d = dst.first()->as_Register(); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + __ ld(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld(SP, conversion_off , d); + } + } else { + // freg -> mem + int off = STACK_BIAS + reg2offset(dst.first()); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + } + } + } + break; + + case T_DOUBLE: + assert( j_arg + 1 < total_args_passed && + in_sig_bt[j_arg + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + long_move(masm, src, dst); + } else { + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // Destination could be an odd reg on 32bit in which case + // we can't load direct to the destination. + + if (!d->is_even() && wordSize == 4) { + d = L2; + } + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, off); + __ ld_long(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld_long(SP, conversion_off, d); + } + if (d == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } + break; + + case T_LONG : + // 32bit can't do a split move of something like g1 -> O0, O1 + // so use a memory temp + if (src.is_single_phys_reg() && wordSize == 4) { + Register tmp = L2; + if (dst.first()->is_reg() && + (wordSize == 8 || dst.first()->as_Register()->is_even())) { + tmp = dst.first()->as_Register(); + } + + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stx(src.first()->as_Register(), SP, off); + __ ld_long(SP, off, tmp); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stx(src.first()->as_Register(), SP, conversion_off); + __ ld_long(SP, conversion_off, tmp); + } + + if (tmp == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } else { + long_move(masm, src, dst); + } + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, src, dst); + } + } + + + // If we have any strings we must store any register based arg to the stack + // This includes any still live xmm registers too. + + if (total_strings > 0 ) { + + // protect all the arg registers + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + const Register L2_string_off = L2; + + // Get first string offset + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); + + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { + if (out_sig_bt[c_arg] == T_ADDRESS) { + + VMRegPair dst = out_regs[c_arg]; + const Register d = dst.first()->is_reg() ? + dst.first()->as_Register()->after_save() : noreg; + + // It's a string the oop and it was already copied to the out arg + // position + if (d != noreg) { + __ mov(d, O0); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); + } + Label skip; + + __ add_d(FP, L2_string_off, O1); + __ br_null(O0, false, Assembler::pn, skip); + + if (d != noreg) { + __ mov(O1, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); + } + + __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), + relocInfo::runtime_call_type); + + __ bind(skip); + + } + + } + __ mov(L7_thread_cache, G2_thread); + __ restore(); + + } + + + // Ok now we are done. Need to place the nop that dtrace wants in order to + // patch in the trap + + int patch_offset = ((intptr_t)__ pc()) - start; + + __ nop(); + + + // Return + + __ restore(); + __ ret(); + + __ flush(); + nmethod *nm = nmethod::new_dtrace_nmethod( + method, masm->code(), vep_offset, patch_offset, frame_complete, + stack_slots / VMRegImpl::slots_per_word); + return nm; +} + +#endif // HAVE_DTRACE_H + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; +} + +// "Top of Stack" slots that may be unused by the calling convention but must +// otherwise be preserved. +// On Intel these are not necessary and the value can be zero. +// On Sparc this describes the words reserved for storing a register window +// when an interrupt occurs. +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +//------------------------------generate_deopt_blob---------------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_deopt_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + int pad = 0; +#if INCLUDE_JVMCI + if (EnableJVMCI) { + pad += 512; // Increase the buffer size when compiling for JVMCI + } +#endif + //CodeBuffer buffer ("deopt_blob", 4000, 2048); + CodeBuffer buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug + MacroAssembler* masm = new MacroAssembler( & buffer); + int frame_size_in_words; + OopMap* map = NULL; + // Account for the extra args we place on the stack + // by the time we call fetch_unroll_info + const int additional_words = 2; // deopt kind, thread + + OopMapSet *oop_maps = new OopMapSet(); + RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0); + + address start = __ pc(); + Label cont; + // we use S3 for DeOpt reason register + Register reason = S3; + // use S6 for thread register + Register thread = TREG; + // use S7 for fetch_unroll_info returned UnrollBlock + Register unroll = S7; + // Prolog for non exception case! + + // We have been called from the deopt handler of the deoptee. + // + // deoptee: + // ... + // call X + // ... + // deopt_handler: call_deopt_stub + // cur. return pc --> ... + // + // So currently RA points behind the call in the deopt handler. + // We adjust it such that it points to the start of the deopt handler. + // The return_pc has been stored in the frame of the deoptee and + // will replace the address of the deopt_handler in the call + // to Deoptimization::fetch_unroll_info below. + + // HandlerImpl::size_deopt_handler() + __ addi_d(RA, RA, - NativeFarCall::instruction_size); + // Save everything in sight. + map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + // Normal deoptimization + __ li(reason, Deoptimization::Unpack_deopt); + __ b(cont); + + int reexecute_offset = __ pc() - start; +#if INCLUDE_JVMCI && !defined(COMPILER1) + if (EnableJVMCI && UseJVMCICompiler) { + // JVMCI does not use this kind of deoptimization + __ should_not_reach_here(); + } +#endif + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + __ li(reason, Deoptimization::Unpack_reexecute); + __ b(cont); + +#if INCLUDE_JVMCI + Label after_fetch_unroll_info_call; + int implicit_exception_uncommon_trap_offset = 0; + int uncommon_trap_offset = 0; + + if (EnableJVMCI) { + implicit_exception_uncommon_trap_offset = __ pc() - start; + + __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + + uncommon_trap_offset = __ pc() - start; + + // Save everything in sight. + (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + __ addi_d(SP, SP, -additional_words * wordSize); + // fetch_unroll_info needs to call last_java_frame() + Label retaddr; + __ set_last_Java_frame(NOREG, NOREG, retaddr); + + __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); + __ li(AT, -1); + __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); + + __ li(reason, (int32_t)Deoptimization::Unpack_reexecute); + __ move(A0, TREG); + __ move(A2, reason); // exec mode + __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); + __ bind(retaddr); + oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); + __ addi_d(SP, SP, additional_words * wordSize); + + __ reset_last_Java_frame(false); + + __ b(after_fetch_unroll_info_call); + } // EnableJVMCI +#endif // INCLUDE_JVMCI + + int exception_offset = __ pc() - start; + // Prolog for exception case + + // all registers are dead at this entry point, except for V0 and + // V1 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + int exception_in_tls_offset = __ pc() - start; + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // Return address will be patched later with the throwing pc. The correct value is not + // available now because loading it from memory would destroy registers. + // Save everything in sight. + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + + // Now it is safe to overwrite any register + // store the correct deoptimization type + __ li(reason, Deoptimization::Unpack_exception); + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); + __ verify_oop(AT); + // verify that there is no pending exception + Label no_pending_exception; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + __ bind(cont); + // Compiled code leaves the floating point stack dirty, empty it. + __ empty_FPU_stack(); + + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ move(A0, thread); + __ move(A1, reason); // exec_mode + __ addi_d(SP, SP, -additional_words * wordSize); + + Label retaddr; + __ set_last_Java_frame(NOREG, NOREG, retaddr); + + // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. Call should capture return values. + + // TODO: confirm reloc + __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); + __ bind(retaddr); + oop_maps->add_gc_map(__ pc() - start, map); + __ addi_d(SP, SP, additional_words * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + +#if INCLUDE_JVMCI + if (EnableJVMCI) { + __ bind(after_fetch_unroll_info_call); + } +#endif + + // Load UnrollBlock into S7 + __ move(unroll, V0); + + + // Move the unpack kind to a safe place in the UnrollBlock because + // we are very short of registers + + Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ st_w(reason, unpack_kind); + // save the unpack_kind value + // Retrieve the possible live values (return values) + // All callee save registers representing jvm state + // are now in the vframeArray. + + Label noException; + __ li(AT, Deoptimization::Unpack_exception); + __ bne(AT, reason, noException);// Was exception pending? + __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); + + __ verify_oop(V0); + + // Overwrite the result registers with the exception results. + __ st_ptr(V0, SP, reg_save.v0_offset()); + __ st_ptr(V1, SP, reg_save.v1_offset()); + + __ bind(noException); + + + // Stack is back to only having register save data on the stack. + // Now restore the result registers. Everything else is either dead or captured + // in the vframeArray. + + reg_save.restore_result_registers(masm); + // All of the register save area has been popped of the stack. Only the + // return address remains. + // Pop all the frames we must move/replace. + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // register for the sender's sp + Register sender_sp = Rsender; + // register for frame pcs + Register pcs = T0; + // register for frame sizes + Register sizes = T1; + // register for frame count + Register count = T3; + + // Pop deoptimized frame + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ add_d(SP, SP, AT); + // sp should be pointing at the return address to the caller (3) + + // Load array of frame pcs into pcs + __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + __ addi_d(SP, SP, wordSize); // trash the old pc + // Load array of frame sizes into T6 + __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + + + + // Load count of frams into T3 + __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + // Pick up the initial fp we should save + __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + __ move(sender_sp, SP); + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ sub_d(SP, SP, AT); + + Label loop; + __ bind(loop); + __ ld_d(T2, sizes, 0); // Load frame size + __ ld_ptr(AT, pcs, 0); // save return address + __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ sub_d(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addi_d(count, count, -1); // decrement counter + __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); + // Re-push self-frame + __ push2(AT, FP); + __ move(FP, SP); + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); + + // Restore frame locals after moving the frame + __ st_d(V0, SP, reg_save.v0_offset()); + __ st_d(V1, SP, reg_save.v1_offset()); + __ fst_d(F0, SP, reg_save.fpr0_offset()); + __ fst_d(F1, SP, reg_save.fpr1_offset()); + + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. + __ move(A1, reason); // exec_mode +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(A0, thread); // thread + __ addi_d(SP, SP, (-additional_words) *wordSize); + + // set last_Java_sp, last_Java_fp + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(NOREG, FP, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // Set an oopmap for the call site + oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); + + __ push(V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(true); + + // Collect return values + __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); + __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); + // Pop float stack and store in local + __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); + __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); + + //FIXME, + // Clear floating point stack before returning to interpreter + __ empty_FPU_stack(); + //FIXME, we should consider about float and double + // Push a float or double return value if necessary. + __ leave(); + + // Jump to interpreter + __ jr(RA); + + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); + _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); + } +#endif +} + +#ifdef COMPILER2 + +//------------------------------generate_uncommon_trap_blob-------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_uncommon_trap_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); + MacroAssembler* masm = new MacroAssembler(&buffer); + + enum frame_layout { + fp_off, fp_off2, + return_off, return_off2, + framesize + }; + assert(framesize % 4 == 0, "sp not 16-byte aligned"); + address start = __ pc(); + + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); + // Push self-frame. + __ addi_d(SP, SP, -framesize * BytesPerInt); + + __ st_d(RA, SP, return_off * BytesPerInt); + __ st_d(FP, SP, fp_off * BytesPerInt); + + __ addi_d(FP, SP, fp_off * BytesPerInt); + + // Clear the floating point exception stack + __ empty_FPU_stack(); + + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // set last_Java_sp + Label retaddr; + __ set_last_Java_frame(NOREG, FP, retaddr); + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + __ move(A0, thread); + // argument already in T0 + __ move(A1, T0); + __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap); + __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + oop_maps->add_gc_map(__ pc() - start, map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + Register unroll = S7; + __ move(unroll, V0); + +#ifdef ASSERT + { Label L; + __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ li(T4, Deoptimization::Unpack_uncommon_trap); + __ beq(AT, T4, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: possible-i2c-adapter-frame + // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an + // and c2i here) + + __ addi_d(SP, SP, framesize * BytesPerInt); + + // Pop deoptimized frame + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ add_d(SP, SP, AT); + + // register for frame pcs + Register pcs = T8; + // register for frame sizes + Register sizes = T4; + // register for frame count + Register count = T3; + // register for the sender's sp + Register sender_sp = T1; + + // sp should be pointing at the return address to the caller (4) + // Load array of frame pcs + __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + + // Load array of frame sizes + __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + + // Pick up the initial fp we should save + __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + __ move(sender_sp, SP); + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ sub_d(SP, SP, AT); + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ld_d(T2, sizes, 0); // Load frame size + __ ld_d(AT, pcs, 0); // save return address + __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ sub_d(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addi_d(count, count, -1); // decrement counter + __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + + __ ld_d(RA, pcs, 0); + + // Re-push self-frame + // save old & set new FP + // save final return address + __ enter(); + + // Use FP because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(NOREG, FP, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + __ move(A0, thread); + __ li(A1, Deoptimization::Unpack_uncommon_trap); + __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); + // Set an oopmap for the call site + oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); + + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog! + + // Jump to interpreter + __ jr(RA); + // ------------- + // make sure all code is generated + masm->flush(); + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); +} + +#endif // COMPILER2 + +//------------------------------generate_handler_blob------------------- +// +// Generate a special Compile2Runtime blob that saves all registers, and sets +// up an OopMap and calls safepoint code to stop the compiled code for +// a safepoint. +// +// This blob is jumped to (via a breakpoint and the signal handler) from a +// safepoint in compiled code. + +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + + // Account for thread arg in our frame + const int additional_words = 0; + int frame_size_in_words; + + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // allocate space for the code + // setup code generation tools + CodeBuffer buffer ("handler_blob", 2048, 512); + MacroAssembler* masm = new MacroAssembler( &buffer); + + const Register thread = TREG; + address start = __ pc(); + bool cause_return = (poll_type == POLL_AT_RETURN); + RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselvs. + + Label retaddr; + __ set_last_Java_frame(NOREG, NOREG, retaddr); + + if (!cause_return) { + // overwrite the return address pushed by save_live_registers + // Additionally, TSR is a callee-saved register so we can look at + // it later to determine if someone changed the return address for + // us! + __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); + __ st_ptr(TSR, SP, reg_save.ra_offset()); + } + + // Do the call + __ move(A0, thread); + // TODO: confirm reloc + __ call(call_ptr, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + oop_maps->add_gc_map(__ pc() - start, map); + + Label noException; + + // Clear last_Java_sp again + __ reset_last_Java_frame(false); + + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, noException); + + // Exception pending + + reg_save.restore_live_registers(masm); + //forward_exception_entry need return address on the stack + __ push(RA); + // TODO: confirm reloc + __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + + // No exception case + __ bind(noException); + + Label no_adjust, bail; + if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld_ptr(AT, SP, reg_save.ra_offset()); + __ bne(AT, TSR, no_adjust); + +#ifdef ASSERT + // Verify the correct encoding of the poll we're about to skip. + // See NativeInstruction::is_safepoint_poll() + __ ld_wu(AT, TSR, 0); + __ push(T5); + __ li(T5, 0xffc0001f); + __ andr(AT, AT, T5); + __ li(T5, 0x28800013); + __ xorr(AT, AT, T5); + __ pop(T5); + __ bne(AT, R0, bail); +#endif + // Adjust return pc forward to step over the safepoint poll instruction + __ addi_d(RA, TSR, 4); // NativeInstruction::instruction_size=4 + __ st_ptr(RA, SP, reg_save.ra_offset()); + } + + __ bind(no_adjust); + // Normal exit, register restoring and exit + reg_save.restore_live_registers(masm); + __ jr(RA); + +#ifdef ASSERT + __ bind(bail); + __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); +#endif + + // Make sure all code is generated + masm->flush(); + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); + //FIXME. code_size + CodeBuffer buffer(name, 2000, 2048); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_words; + RegisterSaver reg_save(false /* save_vectors */); + //we put the thread in A0 + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + address start = __ pc(); + map = reg_save.save_live_registers(masm, 0, &frame_size_words); + + + int frame_complete = __ offset(); +#ifndef OPT_THREAD + const Register thread = T8; + __ get_thread(thread); +#else + const Register thread = TREG; +#endif + + __ move(A0, thread); + Label retaddr; + __ set_last_Java_frame(noreg, FP, retaddr); + // align the stack before invoke native + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + // TODO: confirm reloc + __ call(destination, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + oop_maps->add_gc_map(__ pc() - start, map); + // V0 contains the address we are going to jump to assuming no exception got installed +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // clear last_Java_sp + __ reset_last_Java_frame(true); + // check for pending exceptions + Label pending; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, pending); + // get the returned Method* + __ get_vm_result_2(Rmethod, thread); + __ st_ptr(Rmethod, SP, reg_save.s3_offset()); + __ st_ptr(V0, SP, reg_save.t5_offset()); + reg_save.restore_live_registers(masm); + + // We are back the the original state on entry and ready to go the callee method. + __ jr(T5); + // Pending exception after the safepoint + + __ bind(pending); + + reg_save.restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + //forward_exception_entry need return address on the stack + __ push(RA); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); + __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + // + // make sure all code is generated + masm->flush(); + RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); + return tmp; +} + +extern "C" int SpinPause() {return 0;} diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp new file mode 100644 index 00000000000..7f73863b2e4 --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp @@ -0,0 +1,4804 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) + +//#ifdef PRODUCT +//#define BLOCK_COMMENT(str) /* nothing */ +//#else +//#define BLOCK_COMMENT(str) __ block_comment(str) +//#endif + +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + + // This fig is not LA ABI. It is call Java from C ABI. + // Call stubs are used to call Java from C + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S0) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp + // 3 [ result ] <--- a1 + // 4 [ result_type ] <--- a2 + // 5 [ method ] <--- a3 + // 6 [ entry_point ] <--- a4 + // 7 [ parameters ] <--- a5 + // 8 [ parameter_size ] <--- a6 + // 9 [ thread ] <--- a7 + + // + // LA ABI does not save paras in sp. + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + //-22 [ F31 ] + // ... + //-15 [ F24 ] + //-14 [ S8 ] + //-13 [ thread ] + //-12 [ result_type ] <--- a2 + //-11 [ result ] <--- a1 + //-10 [ ] + // -9 [ ptr. to call wrapper ] <--- a0 + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S0) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ] <--- old sp + // + // Find a right place in the call_stub for S8. + // S8 will point to the starting point of Interpreter::dispatch_table(itos). + // It should be saved/restored before/after Java calls. + // + enum call_stub_layout { + RA_off = 1, + FP_off = 0, + BCP_off = -1, + LVP_off = -2, + TSR_off = -3, + S1_off = -4, + S3_off = -5, + S4_off = -6, + S5_off = -7, + S6_off = -8, + call_wrapper_off = -9, + result_off = -11, + result_type_off = -12, + thread_off = -13, + S8_off = -14, + F24_off = -15, + F25_off = -16, + F26_off = -17, + F27_off = -18, + F28_off = -19, + F29_off = -20, + F30_off = -21, + F31_off = -22, + total_off = F31_off, + }; + + address generate_call_stub(address& return_address) { + assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + // same as in generate_catch_exception()! + + // stub code + // save ra and fp + __ enter(); + // I think 14 is the max gap between argument and callee saved register + __ addi_d(SP, SP, total_off * wordSize); + __ st_d(BCP, FP, BCP_off * wordSize); + __ st_d(LVP, FP, LVP_off * wordSize); + __ st_d(TSR, FP, TSR_off * wordSize); + __ st_d(S1, FP, S1_off * wordSize); + __ st_d(S3, FP, S3_off * wordSize); + __ st_d(S4, FP, S4_off * wordSize); + __ st_d(S5, FP, S5_off * wordSize); + __ st_d(S6, FP, S6_off * wordSize); + __ st_d(A0, FP, call_wrapper_off * wordSize); + __ st_d(A1, FP, result_off * wordSize); + __ st_d(A2, FP, result_type_off * wordSize); + __ st_d(A7, FP, thread_off * wordSize); + __ st_d(S8, FP, S8_off * wordSize); + + __ fst_d(F24, FP, F24_off * wordSize); + __ fst_d(F25, FP, F25_off * wordSize); + __ fst_d(F26, FP, F26_off * wordSize); + __ fst_d(F27, FP, F27_off * wordSize); + __ fst_d(F28, FP, F28_off * wordSize); + __ fst_d(F29, FP, F29_off * wordSize); + __ fst_d(F30, FP, F30_off * wordSize); + __ fst_d(F31, FP, F31_off * wordSize); + + __ li(S8, (long)Interpreter::dispatch_table(itos)); + +#ifdef OPT_THREAD + __ move(TREG, A7); +#endif + //add for compressedoops + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + /* FIXME: I do not know how to realize stop in LA, do it in the future */ + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // pass parameters if any + // A5: parameter + // A6: parameter_size + // T0: parameter_size_tmp(--) + // T2: offset(++) + // T3: tmp + Label parameters_done; + // judge if the parameter_size equals 0 + __ beq(A6, R0, parameters_done); + __ slli_d(AT, A6, Interpreter::logStackElementSize); + __ sub_d(SP, SP, AT); + __ li(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + // Copy Java parameters in reverse order (receiver last) + // Note that the argument order is inverted in the process + Label loop; + __ move(T0, A6); + __ move(T2, R0); + __ bind(loop); + + // get parameter + __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); + __ ld_d(AT, T3, -wordSize); + __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); + __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); + __ addi_d(T2, T2, 1); + __ addi_d(T0, T0, -1); + __ bne(T0, R0, loop); + // advance to next parameter + + // call Java function + __ bind(parameters_done); + + // receiver in V0, methodOop in Rmethod + + __ move(Rmethod, A3); + __ move(Rsender, SP); //set sender sp + __ jalr(A4); + return_address = __ pc(); + + Label common_return; + __ bind(common_return); + + // store result depending on type + // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + __ ld_d(T0, FP, result_off * wordSize); // result --> T0 + Label is_long, is_float, is_double, exit; + __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 + __ addi_d(T3, T2, (-1) * T_LONG); + __ beq(T3, R0, is_long); + __ addi_d(T3, T2, (-1) * T_FLOAT); + __ beq(T3, R0, is_float); + __ addi_d(T3, T2, (-1) * T_DOUBLE); + __ beq(T3, R0, is_double); + + // handle T_INT case + __ st_d(V0, T0, 0 * wordSize); + __ bind(exit); + + // restore + __ ld_d(BCP, FP, BCP_off * wordSize); + __ ld_d(LVP, FP, LVP_off * wordSize); + __ ld_d(S8, FP, S8_off * wordSize); + __ ld_d(TSR, FP, TSR_off * wordSize); + + __ ld_d(S1, FP, S1_off * wordSize); + __ ld_d(S3, FP, S3_off * wordSize); + __ ld_d(S4, FP, S4_off * wordSize); + __ ld_d(S5, FP, S5_off * wordSize); + __ ld_d(S6, FP, S6_off * wordSize); + + __ fld_d(F24, FP, F24_off * wordSize); + __ fld_d(F25, FP, F25_off * wordSize); + __ fld_d(F26, FP, F26_off * wordSize); + __ fld_d(F27, FP, F27_off * wordSize); + __ fld_d(F28, FP, F28_off * wordSize); + __ fld_d(F29, FP, F29_off * wordSize); + __ fld_d(F30, FP, F30_off * wordSize); + __ fld_d(F31, FP, F31_off * wordSize); + + __ leave(); + + // return + __ jr(RA); + + // handle return types different from T_INT + __ bind(is_long); + __ st_d(V0, T0, 0 * wordSize); + __ b(exit); + + __ bind(is_float); + __ fst_s(FV0, T0, 0 * wordSize); + __ b(exit); + + __ bind(is_double); + __ fst_d(FV0, T0, 0 * wordSize); + __ b(exit); + StubRoutines::la::set_call_stub_compiled_return(__ pc()); + __ b(common_return); + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // sp. + // + // V0: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + Register thread = TREG; + + // get thread directly +#ifndef OPT_THREAD + __ ld_d(thread, FP, thread_off * wordSize); +#endif + +#ifdef ASSERT + // verify that threads correspond + { Label L; + __ get_thread(T8); + __ beq(T8, thread, L); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + // set pending exception + __ verify_oop(V0); + __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ li(AT, (long)__FILE__); + __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); + __ li(AT, (long)__LINE__); + __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); + __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // V0: exception + // V1: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be on stack !! + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + //Register thread = TREG; + Register thread = TREG; + address start = __ pc(); + + // Upon entry, the sp points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into T4 + __ ld_d(A1, SP, 0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T4, V0); + __ pop(V1); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ bne(V0, R0, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler (return address removed) + // V0: exception + // T4: exception handler + // V1: throwing pc + __ verify_oop(V0); + __ jr(T4); + return start; + } + + // Non-destructive plausibility checks for oops + // + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + __ verify_oop_subroutine(); + address end = __ pc(); + return start; + } + + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: A0 + // value: A1 + // count: A2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register to = A0; // source array address + const Register value = A1; // value + const Register count = A2; // elements count + + const Register end = T5; // source array address end + const Register tmp = T8; // temp register + + Label L_fill_elements; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit + __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + case T_SHORT: + shift = 1; + __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + case T_INT: + shift = 2; + __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + default: ShouldNotReachHere(); + } + + switch (t) { + case T_BYTE: + __ add_d(end, to, count); + break; + case T_SHORT: + case T_INT: + __ alsl_d(end, count, to, shift-1); + break; + default: ShouldNotReachHere(); + } + if (!aligned) { + __ st_d(value, to, 0); + __ bstrins_d(to, R0, 2, 0); + __ addi_d(to, to, 8); + } + __ st_d(value, end, -8); + __ bstrins_d(end, R0, 2, 0); + + // + // Fill large chunks + // + Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2; + __ addi_d(AT, to, 64); + __ blt(end, AT, L_not_64bytes_fill); + __ addi_d(to, to, 64); + __ bind(L_loop_begin); + __ st_d(value, to, -8); + __ st_d(value, to, -16); + __ st_d(value, to, -24); + __ st_d(value, to, -32); + __ st_d(value, to, -40); + __ st_d(value, to, -48); + __ st_d(value, to, -56); + __ st_d(value, to, -64); + __ addi_d(to, to, 64); + __ bge(end, to, L_loop_begin); + __ addi_d(to, to, -64); + __ beq(to, end, L_loop_end); + + __ bind(L_not_64bytes_fill); + // There are 0 - 7 words + __ lipc(AT, L_jtab1); + __ sub_d(tmp, end, to); + __ alsl_d(AT, tmp, AT, 1); + __ jr(AT); + + __ bind(L_jtab1); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ st_d(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 4: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ jr(RA); + __ nop(); + __ nop(); + + // 6: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ st_d(value, to, 40); + __ jr(RA); + __ nop(); + + // 7: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ st_d(value, to, 40); + __ st_d(value, to, 48); + + __ bind(L_loop_end); + __ jr(RA); + + // Short arrays (<= 8 bytes) + __ bind(L_fill_elements); + __ lipc(AT, L_jtab2); + __ slli_d(tmp, count, 4 + shift); + __ add_d(AT, AT, tmp); + __ jr(AT); + + __ bind(L_jtab2); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ st_b(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 2: + __ st_h(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 3: + __ st_h(value, to, 0); + __ st_b(value, to, 2); + __ jr(RA); + __ nop(); + + // 4: + __ st_w(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 5: + __ st_w(value, to, 0); + __ st_b(value, to, 4); + __ jr(RA); + __ nop(); + + // 6: + __ st_w(value, to, 0); + __ st_h(value, to, 4); + __ jr(RA); + __ nop(); + + // 7: + __ st_w(value, to, 0); + __ st_w(value, to, 3); + __ jr(RA); + __ nop(); + + // 8: + __ st_d(value, to, 0); + __ jr(RA); + return start; + } + + // + // Generate overlap test for array copy stubs + // + // Input: + // A0 - source array address + // A1 - destination array address + // A2 - element count + // + // Temp: + // AT - destination array address - source array address + // T4 - element count * element size + // + void array_overlap_test(address no_overlap_target, int log2_elem_size) { + __ slli_d(T4, A2, log2_elem_size); + __ sub_d(AT, A1, A0); + __ bgeu(AT, T4, no_overlap_target); + } + + // disjoint large copy + void generate_disjoint_large_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le32, le16, le8, lt8; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ ld_d(A6, A0, 0); + __ ld_d(A7, A2, -8); + + __ andi(T1, A0, 7); + __ sub_d(T0, R0, T1); + __ addi_d(T0, T0, 8); + + __ add_d(A0, A0, T0); + __ add_d(A5, A1, T0); + + __ addi_d(A4, A2, -64); + __ bgeu(A0, A4, le32); + + __ bind(loop); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ ld_d(T2, A0, 16); + __ ld_d(T3, A0, 24); + __ ld_d(T4, A0, 32); + __ ld_d(T5, A0, 40); + __ ld_d(T6, A0, 48); + __ ld_d(T7, A0, 56); + __ addi_d(A0, A0, 64); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ st_d(T2, A5, 16); + __ st_d(T3, A5, 24); + __ st_d(T4, A5, 32); + __ st_d(T5, A5, 40); + __ st_d(T6, A5, 48); + __ st_d(T7, A5, 56); + __ addi_d(A5, A5, 64); + __ bltu(A0, A4, loop); + + __ bind(le32); + __ addi_d(A4, A2, -32); + __ bgeu(A0, A4, le16); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ ld_d(T2, A0, 16); + __ ld_d(T3, A0, 24); + __ addi_d(A0, A0, 32); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ st_d(T2, A5, 16); + __ st_d(T3, A5, 24); + __ addi_d(A5, A5, 32); + + __ bind(le16); + __ addi_d(A4, A2, -16); + __ bgeu(A0, A4, le8); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ addi_d(A0, A0, 16); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ addi_d(A5, A5, 16); + + __ bind(le8); + __ addi_d(A4, A2, -8); + __ bgeu(A0, A4, lt8); + __ ld_d(T0, A0, 0); + __ st_d(T0, A5, 0); + + __ bind(lt8); + __ st_d(A6, A1, 0); + __ st_d(A7, A3, -8); + __ move(A0, R0); + __ jr(RA); + } + + // disjoint large copy lsx + void generate_disjoint_large_copy_lsx(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le64, le32, le16, lt16; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ vld(F0, A0, 0); + __ vld(F1, A2, -16); + + __ andi(T1, A0, 15); + __ sub_d(T0, R0, T1); + __ addi_d(T0, T0, 16); + + __ add_d(A0, A0, T0); + __ add_d(A5, A1, T0); + + __ addi_d(A4, A2, -128); + __ bgeu(A0, A4, le64); + + __ bind(loop); + __ vld(FT0, A0, 0); + __ vld(FT1, A0, 16); + __ vld(FT2, A0, 32); + __ vld(FT3, A0, 48); + __ vld(FT4, A0, 64); + __ vld(FT5, A0, 80); + __ vld(FT6, A0, 96); + __ vld(FT7, A0, 112); + __ addi_d(A0, A0, 128); + __ vst(FT0, A5, 0); + __ vst(FT1, A5, 16); + __ vst(FT2, A5, 32); + __ vst(FT3, A5, 48); + __ vst(FT4, A5, 64); + __ vst(FT5, A5, 80); + __ vst(FT6, A5, 96); + __ vst(FT7, A5, 112); + __ addi_d(A5, A5, 128); + __ bltu(A0, A4, loop); + + __ bind(le64); + __ addi_d(A4, A2, -64); + __ bgeu(A0, A4, le32); + __ vld(FT0, A0, 0); + __ vld(FT1, A0, 16); + __ vld(FT2, A0, 32); + __ vld(FT3, A0, 48); + __ addi_d(A0, A0, 64); + __ vst(FT0, A5, 0); + __ vst(FT1, A5, 16); + __ vst(FT2, A5, 32); + __ vst(FT3, A5, 48); + __ addi_d(A5, A5, 64); + + __ bind(le32); + __ addi_d(A4, A2, -32); + __ bgeu(A0, A4, le16); + __ vld(FT0, A0, 0); + __ vld(FT1, A0, 16); + __ addi_d(A0, A0, 32); + __ vst(FT0, A5, 0); + __ vst(FT1, A5, 16); + __ addi_d(A5, A5, 32); + + __ bind(le16); + __ addi_d(A4, A2, -16); + __ bgeu(A0, A4, lt16); + __ vld(FT0, A0, 0); + __ vst(FT0, A5, 0); + + __ bind(lt16); + __ vst(F0, A1, 0); + __ vst(F1, A3, -16); + + __ move(A0, R0); + __ jr(RA); + } + + // disjoint large copy lasx + void generate_disjoint_large_copy_lasx(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le128, le64, le32, lt32; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ xvld(F0, A0, 0); + __ xvld(F1, A2, -32); + + __ andi(T1, A0, 31); + __ sub_d(T0, R0, T1); + __ addi_d(T0, T0, 32); + + __ add_d(A0, A0, T0); + __ add_d(A5, A1, T0); + + __ addi_d(A4, A2, -256); + __ bgeu(A0, A4, le128); + + __ bind(loop); + __ xvld(FT0, A0, 0); + __ xvld(FT1, A0, 32); + __ xvld(FT2, A0, 64); + __ xvld(FT3, A0, 96); + __ xvld(FT4, A0, 128); + __ xvld(FT5, A0, 160); + __ xvld(FT6, A0, 192); + __ xvld(FT7, A0, 224); + __ addi_d(A0, A0, 256); + __ xvst(FT0, A5, 0); + __ xvst(FT1, A5, 32); + __ xvst(FT2, A5, 64); + __ xvst(FT3, A5, 96); + __ xvst(FT4, A5, 128); + __ xvst(FT5, A5, 160); + __ xvst(FT6, A5, 192); + __ xvst(FT7, A5, 224); + __ addi_d(A5, A5, 256); + __ bltu(A0, A4, loop); + + __ bind(le128); + __ addi_d(A4, A2, -128); + __ bgeu(A0, A4, le64); + __ xvld(FT0, A0, 0); + __ xvld(FT1, A0, 32); + __ xvld(FT2, A0, 64); + __ xvld(FT3, A0, 96); + __ addi_d(A0, A0, 128); + __ xvst(FT0, A5, 0); + __ xvst(FT1, A5, 32); + __ xvst(FT2, A5, 64); + __ xvst(FT3, A5, 96); + __ addi_d(A5, A5, 128); + + __ bind(le64); + __ addi_d(A4, A2, -64); + __ bgeu(A0, A4, le32); + __ xvld(FT0, A0, 0); + __ xvld(FT1, A0, 32); + __ addi_d(A0, A0, 64); + __ xvst(FT0, A5, 0); + __ xvst(FT1, A5, 32); + __ addi_d(A5, A5, 64); + + __ bind(le32); + __ addi_d(A4, A2, -32); + __ bgeu(A0, A4, lt32); + __ xvld(FT0, A0, 0); + __ xvst(FT0, A5, 0); + + __ bind(lt32); + __ xvst(F0, A1, 0); + __ xvst(F1, A3, -32); + + __ move(A0, R0); + __ jr(RA); + } + + // conjoint large copy + void generate_conjoint_large_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le32, le16, le8, lt8; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ ld_d(A6, A0, 0); + __ ld_d(A7, A2, -8); + + __ andi(T1, A2, 7); + __ sub_d(A2, A2, T1); + __ sub_d(A5, A3, T1); + + __ addi_d(A4, A0, 64); + __ bgeu(A4, A2, le32); + + __ bind(loop); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ ld_d(T2, A2, -24); + __ ld_d(T3, A2, -32); + __ ld_d(T4, A2, -40); + __ ld_d(T5, A2, -48); + __ ld_d(T6, A2, -56); + __ ld_d(T7, A2, -64); + __ addi_d(A2, A2, -64); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ st_d(T2, A5, -24); + __ st_d(T3, A5, -32); + __ st_d(T4, A5, -40); + __ st_d(T5, A5, -48); + __ st_d(T6, A5, -56); + __ st_d(T7, A5, -64); + __ addi_d(A5, A5, -64); + __ bltu(A4, A2, loop); + + __ bind(le32); + __ addi_d(A4, A0, 32); + __ bgeu(A4, A2, le16); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ ld_d(T2, A2, -24); + __ ld_d(T3, A2, -32); + __ addi_d(A2, A2, -32); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ st_d(T2, A5, -24); + __ st_d(T3, A5, -32); + __ addi_d(A5, A5, -32); + + __ bind(le16); + __ addi_d(A4, A0, 16); + __ bgeu(A4, A2, le8); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ addi_d(A2, A2, -16); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ addi_d(A5, A5, -16); + + __ bind(le8); + __ addi_d(A4, A0, 8); + __ bgeu(A4, A2, lt8); + __ ld_d(T0, A2, -8); + __ st_d(T0, A5, -8); + + __ bind(lt8); + __ st_d(A6, A1, 0); + __ st_d(A7, A3, -8); + __ move(A0, R0); + __ jr(RA); + } + + // conjoint large copy lsx + void generate_conjoint_large_copy_lsx(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le64, le32, le16, lt16; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ vld(F0, A0, 0); + __ vld(F1, A2, -16); + + __ andi(T1, A2, 15); + __ sub_d(A2, A2, T1); + __ sub_d(A5, A3, T1); + + __ addi_d(A4, A0, 128); + __ bgeu(A4, A2, le64); + + __ bind(loop); + __ vld(FT0, A2, -16); + __ vld(FT1, A2, -32); + __ vld(FT2, A2, -48); + __ vld(FT3, A2, -64); + __ vld(FT4, A2, -80); + __ vld(FT5, A2, -96); + __ vld(FT6, A2, -112); + __ vld(FT7, A2, -128); + __ addi_d(A2, A2, -128); + __ vst(FT0, A5, -16); + __ vst(FT1, A5, -32); + __ vst(FT2, A5, -48); + __ vst(FT3, A5, -64); + __ vst(FT4, A5, -80); + __ vst(FT5, A5, -96); + __ vst(FT6, A5, -112); + __ vst(FT7, A5, -128); + __ addi_d(A5, A5, -128); + __ bltu(A4, A2, loop); + + __ bind(le64); + __ addi_d(A4, A0, 64); + __ bgeu(A4, A2, le32); + __ vld(FT0, A2, -16); + __ vld(FT1, A2, -32); + __ vld(FT2, A2, -48); + __ vld(FT3, A2, -64); + __ addi_d(A2, A2, -64); + __ vst(FT0, A5, -16); + __ vst(FT1, A5, -32); + __ vst(FT2, A5, -48); + __ vst(FT3, A5, -64); + __ addi_d(A5, A5, -64); + + __ bind(le32); + __ addi_d(A4, A0, 32); + __ bgeu(A4, A2, le16); + __ vld(FT0, A2, -16); + __ vld(FT1, A2, -32); + __ addi_d(A2, A2, -32); + __ vst(FT0, A5, -16); + __ vst(FT1, A5, -32); + __ addi_d(A5, A5, -32); + + __ bind(le16); + __ addi_d(A4, A0, 16); + __ bgeu(A4, A2, lt16); + __ vld(FT0, A2, -16); + __ vst(FT0, A5, -16); + + __ bind(lt16); + __ vst(F0, A1, 0); + __ vst(F1, A3, -16); + + __ move(A0, R0); + __ jr(RA); + } + + // conjoint large copy lasx + void generate_conjoint_large_copy_lasx(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le128, le64, le32, lt32; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ xvld(F0, A0, 0); + __ xvld(F1, A2, -32); + + __ andi(T1, A2, 31); + __ sub_d(A2, A2, T1); + __ sub_d(A5, A3, T1); + + __ addi_d(A4, A0, 256); + __ bgeu(A4, A2, le128); + + __ bind(loop); + __ xvld(FT0, A2, -32); + __ xvld(FT1, A2, -64); + __ xvld(FT2, A2, -96); + __ xvld(FT3, A2, -128); + __ xvld(FT4, A2, -160); + __ xvld(FT5, A2, -192); + __ xvld(FT6, A2, -224); + __ xvld(FT7, A2, -256); + __ addi_d(A2, A2, -256); + __ xvst(FT0, A5, -32); + __ xvst(FT1, A5, -64); + __ xvst(FT2, A5, -96); + __ xvst(FT3, A5, -128); + __ xvst(FT4, A5, -160); + __ xvst(FT5, A5, -192); + __ xvst(FT6, A5, -224); + __ xvst(FT7, A5, -256); + __ addi_d(A5, A5, -256); + __ bltu(A4, A2, loop); + + __ bind(le128); + __ addi_d(A4, A0, 128); + __ bgeu(A4, A2, le64); + __ xvld(FT0, A2, -32); + __ xvld(FT1, A2, -64); + __ xvld(FT2, A2, -96); + __ xvld(FT3, A2, -128); + __ addi_d(A2, A2, -128); + __ xvst(FT0, A5, -32); + __ xvst(FT1, A5, -64); + __ xvst(FT2, A5, -96); + __ xvst(FT3, A5, -128); + __ addi_d(A5, A5, -128); + + __ bind(le64); + __ addi_d(A4, A0, 64); + __ bgeu(A4, A2, le32); + __ xvld(FT0, A2, -32); + __ xvld(FT1, A2, -64); + __ addi_d(A2, A2, -64); + __ xvst(FT0, A5, -32); + __ xvst(FT1, A5, -64); + __ addi_d(A5, A5, -64); + + __ bind(le32); + __ addi_d(A4, A0, 32); + __ bgeu(A4, A2, lt32); + __ xvld(FT0, A2, -32); + __ xvst(FT0, A5, -32); + + __ bind(lt32); + __ xvst(F0, A1, 0); + __ xvst(F1, A3, -32); + + __ move(A0, R0); + __ jr(RA); + } + + // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements. + void generate_byte_small_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_b(AT, A0, 0); + __ st_b(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_h(AT, A0, 0); + __ st_h(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_h(AT, A0, 0); + __ ld_b(A2, A0, 2); + __ st_h(AT, A1, 0); + __ st_b(A2, A1, 2); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 4: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ ld_w(AT, A0, 0); + __ ld_b(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_b(A2, A1, 4); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 6: + __ ld_w(AT, A0, 0); + __ ld_h(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_h(A2, A1, 4); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 7: + __ ld_w(AT, A0, 0); + __ ld_w(A2, A0, 3); + __ st_w(AT, A1, 0); + __ st_w(A2, A1, 3); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 8: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + + if (!UseLSX) + return; + + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 9: + __ ld_d(AT, A0, 0); + __ ld_b(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_b(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 10: + __ ld_d(AT, A0, 0); + __ ld_h(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_h(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 11: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 7); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 7); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 12: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 13: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 5); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 5); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 14: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 6); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 6); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 15: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 7); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 7); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 16: + __ vld(F0, A0, 0); + __ vst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + if (!UseLASX) + return; + + // 17: + __ vld(F0, A0, 0); + __ ld_b(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_b(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 18: + __ vld(F0, A0, 0); + __ ld_h(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_h(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 19: + __ vld(F0, A0, 0); + __ ld_w(AT, A0, 15); + __ vst(F0, A1, 0); + __ st_w(AT, A1, 15); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 20: + __ vld(F0, A0, 0); + __ ld_w(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_w(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 21: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 13); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 13); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 22: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 14); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 14); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 23: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 15); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 15); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 24: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 25: + __ vld(F0, A0, 0); + __ vld(F1, A0, 9); + __ vst(F0, A1, 0); + __ vst(F1, A1, 9); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 26: + __ vld(F0, A0, 0); + __ vld(F1, A0, 10); + __ vst(F0, A1, 0); + __ vst(F1, A1, 10); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 27: + __ vld(F0, A0, 0); + __ vld(F1, A0, 11); + __ vst(F0, A1, 0); + __ vst(F1, A1, 11); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 28: + __ vld(F0, A0, 0); + __ vld(F1, A0, 12); + __ vst(F0, A1, 0); + __ vst(F1, A1, 12); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 29: + __ vld(F0, A0, 0); + __ vld(F1, A0, 13); + __ vst(F0, A1, 0); + __ vst(F1, A1, 13); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 30: + __ vld(F0, A0, 0); + __ vld(F1, A0, 14); + __ vst(F0, A1, 0); + __ vst(F1, A1, 14); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 31: + __ vld(F0, A0, 0); + __ vld(F1, A0, 15); + __ vst(F0, A1, 0); + __ vst(F1, A1, 15); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 32: + __ xvld(F0, A0, 0); + __ xvst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char * name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (UseLASX) + __ sltui(T0, A2, 33); + else if (UseLSX) + __ sltui(T0, A2, 17); + else + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + if (large_aligned.is_bound()) { + __ orr(T0, A0, A1); + __ andi(T0, T0, 7); + __ beqz(T0, large_aligned); + } + + __ b(large); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); + + if (UseLASX) + __ sltui(T0, A2, 33); + else if (UseLSX) + __ sltui(T0, A2, 17); + else + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + if (large_aligned.is_bound()) { + __ orr(T0, A0, A1); + __ andi(T0, T0, 7); + __ beqz(T0, large_aligned); + } + + __ b(large); + + return start; + } + + // Short small copy: less than { int:9, lsx:9, lasx:17 } elements. + void generate_short_small_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_h(AT, A0, 0); + __ st_h(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_w(AT, A0, 0); + __ ld_h(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_h(A2, A1, 4); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 4: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ ld_d(AT, A0, 0); + __ ld_h(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_h(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 6: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 7: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 6); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 6); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 8: + if (UseLSX) { + __ vld(F0, A0, 0); + __ vst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + } + + if (!UseLASX) + return; + + __ nop(); + __ nop(); + + // 9: + __ vld(F0, A0, 0); + __ ld_h(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_h(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 10: + __ vld(F0, A0, 0); + __ ld_w(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_w(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 11: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 14); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 14); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 12: + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 13: + __ vld(F0, A0, 0); + __ vld(F1, A0, 10); + __ vst(F0, A1, 0); + __ vst(F1, A1, 10); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 14: + __ vld(F0, A0, 0); + __ vld(F1, A0, 12); + __ vst(F0, A1, 0); + __ vst(F1, A1, 12); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 15: + __ vld(F0, A0, 0); + __ vld(F1, A0, 14); + __ vst(F0, A1, 0); + __ vst(F1, A1, 14); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 16: + __ xvld(F0, A0, 0); + __ xvst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_short_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_short_copy(). + // + address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char * name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (UseLASX) + __ sltui(T0, A2, 17); + else + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ slli_d(A2, A2, 1); + + if (large_aligned.is_bound()) { + __ orr(T0, A0, A1); + __ andi(T0, T0, 7); + __ beqz(T0, large_aligned); + } + + __ b(large); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); + + if (UseLASX) + __ sltui(T0, A2, 17); + else + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ slli_d(A2, A2, 1); + + if (large_aligned.is_bound()) { + __ orr(T0, A0, A1); + __ andi(T0, T0, 7); + __ beqz(T0, large_aligned); + } + + __ b(large); + + return start; + } + + // Int small copy: less than { int:7, lsx:7, lasx:9 } elements. + void generate_int_small_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 4: + if (UseLSX) { + __ vld(F0, A0, 0); + __ vst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + } + __ nop(); + __ nop(); + + // 5: + if (UseLSX) { + __ vld(F0, A0, 0); + __ ld_w(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_w(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_w(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_w(A3, A1, 16); + __ move(A0, R0); + __ jr(RA); + } + + // 6: + if (UseLSX) { + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_d(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_d(A3, A1, 16); + __ move(A0, R0); + __ jr(RA); + } + + if (!UseLASX) + return; + + // 7: + __ vld(F0, A0, 0); + __ vld(F1, A0, 12); + __ vst(F0, A1, 0); + __ vst(F1, A1, 12); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + + // 8: + __ xvld(F0, A0, 0); + __ xvst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + } + + // Generate maybe oop copy + void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, int log2_elem_size, bool dest_uninitialized = false) { + Label post, _large; + DecoratorSet decorators = 0; + BarrierSetAssembler *bs = NULL; + + if (is_oop) { + decorators = IN_HEAP | IS_ARRAY; + + if (disjoint) { + decorators |= ARRAYCOPY_DISJOINT; + } + + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(A2, SP, 3 * wordSize); + __ st_d(A1, SP, 2 * wordSize); + __ st_d(A0, SP, 1 * wordSize); + __ st_d(RA, SP, 0 * wordSize); + + bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2, RegSet()); + + __ ld_d(A2, SP, 3 * wordSize); + __ ld_d(A1, SP, 2 * wordSize); + __ ld_d(A0, SP, 1 * wordSize); + } + + __ sltui(T0, A2, small_limit); + if (is_oop) { + __ beqz(T0, _large); + __ bl(small); + __ b(post); + } else { + __ bnez(T0, small); + } + + __ bind(_large); + __ slli_d(A2, A2, log2_elem_size); + + if (large_aligned.is_bound()) { + __ orr(T0, A0, A1); + __ andi(T0, T0, (1 << (log2_elem_size + 1)) - 1); + if (is_oop) { + Label skip; + __ bnez(T0, skip); + __ bl(large_aligned); + __ b(post); + __ bind(skip); + } else { + __ beqz(T0, large_aligned); + } + } + + if (is_oop) { + __ bl(large); + } else { + __ b(large); + } + + if (is_oop) { + __ bind(post); + __ ld_d(A2, SP, 3 * wordSize); + __ ld_d(A1, SP, 2 * wordSize); + + bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet()); + + __ ld_d(RA, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + __ move(A0, R0); + __ jr(RA); + } + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, + name, small_limit, 2, dest_uninitialized); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (is_oop) { + array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); + } else { + array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); + } + + gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, + name, small_limit, 2, dest_uninitialized); + + return start; + } + + // Long small copy: less than { int:4, lsx:4, lasx:5 } elements. + void generate_long_small_copy(Label &entry, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + if (UseLSX) { + __ vld(F0, A0, 0); + __ vst(F0, A1, 0); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ move(A0, R0); + __ jr(RA); + } + __ nop(); + __ nop(); + + // 3: + if (UseLSX) { + __ vld(F0, A0, 0); + __ ld_d(AT, A0, 16); + __ vst(F0, A1, 0); + __ st_d(AT, A1, 16); + __ move(A0, R0); + __ jr(RA); + __ nop(); + __ nop(); + } else { + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_d(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_d(A3, A1, 16); + __ move(A0, R0); + __ jr(RA); + } + + if (!UseLASX) + return; + + // 4: + __ xvld(F0, A0, 0); + __ xvst(F0, A1, 0); + + __ move(A0, R0); + __ jr(RA); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, + name, small_limit, 3, dest_uninitialized); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (is_oop) { + array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); + } else { + array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); + } + + gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, + name, small_limit, 3, dest_uninitialized); + + return start; + } + + // Helper for generating a dynamic type check. + // Smashes scratch1, scratch2. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Register tmp1, + Register tmp2, + Label& L_success) { + assert_different_registers(sub_klass, super_check_offset, super_klass); + + __ block_comment("type_check:"); + + Label L_miss; + + __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1, &L_success, &L_miss, NULL, + super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL); + + // Fall through on failure! + __ bind(L_miss); + } + + // + // Generate checkcasting array copy stub + // + // Input: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // A3 - size_t ckoff (super_check_offset) + // A4 - oop ckval (super_klass) + // + // Output: + // V0 == 0 - success + // V0 == -1^K - failure, where K is partial transfer count + // + address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) { + Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + + // Input registers (after setup_arg_regs) + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elementscount + const Register ckoff = A3; // super_check_offset + const Register ckval = A4; // super_klass + + RegSet wb_pre_saved_regs = RegSet::range(A0, A4); + RegSet wb_post_saved_regs = RegSet::of(count); + + // Registers used as temps (S0, S1, S2, S3 are save-on-entry) + const Register copied_oop = S0; // actual oop copied + const Register count_save = S1; // orig elementscount + const Register start_to = S2; // destination array start address + const Register oop_klass = S3; // oop._klass + const Register tmp1 = A5; + const Register tmp2 = A6; + + //--------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the two arrays are subtypes of Object[] but the + // destination array type is not equal to or a supertype + // of the source type. Each element must be separately + // checked. + + assert_different_registers(from, to, count, ckoff, ckval, start_to, + copied_oop, oop_klass, count_save); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + // caller guarantees that the arrays really are different + // otherwise, we would have to make conjoint checks + + // Caller of this entry point must set up the argument registers. + __ block_comment("Entry:"); + + // Empty array: Nothing to do. + __ beqz(count, L_done); + + __ push(RegSet::of(S0, S1, S2, S3, RA)); + +#ifdef ASSERT + __ block_comment("assert consistent ckoff/ckval"); + // The ckoff and ckval must be mutually consistent, + // even though caller generates both. + { Label L; + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ld_w(start_to, Address(ckval, sco_offset)); + __ beq(ckoff, start_to, L); + __ stop("super_check_offset inconsistent"); + __ bind(L); + } +#endif //ASSERT + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; + bool is_oop = true; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs); + + // save the original count + __ move(count_save, count); + + // Copy from low to high addresses + __ move(start_to, to); // Save destination array start address + __ b(L_load_element); + + // ======== begin loop ======== + // (Loop is rotated; its entry is L_load_element.) + // Loop control: + // for (; count != 0; count--) { + // copied_oop = load_heap_oop(from++); + // ... generate_type_check ...; + // store_heap_oop(to++, copied_oop); + // } + __ align(OptoLoopAlignment); + + __ bind(L_store_element); + __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop + __ addi_d(to, to, UseCompressedOops ? 4 : 8); + __ addi_d(count, count, -1); + __ beqz(count, L_do_card_marks); + + // ======== loop entry is here ======== + __ bind(L_load_element); + __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop + __ addi_d(from, from, UseCompressedOops ? 4 : 8); + __ beqz(copied_oop, L_store_element); + + __ load_klass(oop_klass, copied_oop); // query the object klass + generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element); + // ======== end loop ======== + + // Register count = remaining oops, count_orig = total oops. + // Emit GC store barriers for the oops we have copied and report + // their number to the caller. + + __ sub_d(tmp1, count_save, count); // K = partially copied oop count + __ nor(count, tmp1, R0); // report (-1^K) to caller + __ beqz(tmp1, L_done_pop); + + __ bind(L_do_card_marks); + + bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs); + + __ bind(L_done_pop); + __ pop(RegSet::of(S0, S1, S2, S3, RA)); + +#ifndef PRODUCT + __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr); + __ increment(Address(SCR2, 0), 1); +#endif + + __ bind(L_done); + __ move(A0, count); + __ jr(RA); + + return start; + } + + // + // Generate 'unsafe' array copy stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t argument instead of an element count. + // + // Input: + // A0 - source array address + // A1 - destination array address + // A2 - byte count, treated as ssize_t, can be zero + // + // Examines the alignment of the operands and dispatches + // to a long, int, short, or byte copy loop. + // + address generate_unsafe_copy(const char *name) { + Label L_long_aligned, L_int_aligned, L_short_aligned; + Register s = A0, d = A1, count = A2; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ orr(AT, s, d); + __ orr(AT, AT, count); + + __ andi(AT, AT, BytesPerLong-1); + __ beqz(AT, L_long_aligned); + __ andi(AT, AT, BytesPerInt-1); + __ beqz(AT, L_int_aligned); + __ andi(AT, AT, BytesPerShort-1); + __ beqz(AT, L_short_aligned); + __ b(StubRoutines::_jbyte_arraycopy); + + __ bind(L_short_aligned); + __ srli_d(count, count, LogBytesPerShort); // size => short_count + __ b(StubRoutines::_jshort_arraycopy); + __ bind(L_int_aligned); + __ srli_d(count, count, LogBytesPerInt); // size => int_count + __ b(StubRoutines::_jint_arraycopy); + __ bind(L_long_aligned); + __ srli_d(count, count, LogBytesPerLong); // size => long_count + __ b(StubRoutines::_jlong_arraycopy); + + return start; + } + + // Perform range checks on the proposed arraycopy. + // Kills temp, but nothing else. + // Also, clean the sign bits of src_pos and dst_pos. + void arraycopy_range_checks(Register src, // source array oop (A0) + Register src_pos, // source position (A1) + Register dst, // destination array oo (A2) + Register dst_pos, // destination position (A3) + Register length, + Register temp, + Label& L_failed) { + __ block_comment("arraycopy_range_checks:"); + + assert_different_registers(SCR1, temp); + + // if (src_pos + length > arrayOop(src)->length()) FAIL; + __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ add_w(temp, length, src_pos); + __ bltu(SCR1, temp, L_failed); + + // if (dst_pos + length > arrayOop(dst)->length()) FAIL; + __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ add_w(temp, length, dst_pos); + __ bltu(SCR1, temp, L_failed); + + // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. + __ move(src_pos, src_pos); + __ move(dst_pos, dst_pos); + + __ block_comment("arraycopy_range_checks done"); + } + + // + // Generate generic array copy stubs + // + // Input: + // A0 - src oop + // A1 - src_pos (32-bits) + // A2 - dst oop + // A3 - dst_pos (32-bits) + // A4 - element count (32-bits) + // + // Output: + // V0 == 0 - success + // V0 == -1^K - failure, where K is partial transfer count + // + address generate_generic_copy(const char *name) { + Label L_failed, L_objArray; + Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + + // Input registers + const Register src = A0; // source array oop + const Register src_pos = A1; // source position + const Register dst = A2; // destination array oop + const Register dst_pos = A3; // destination position + const Register length = A4; + + // Registers used as temps + const Register dst_klass = A5; + + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", name); + + address start = __ pc(); + +#ifndef PRODUCT + // bump this on entry, not on exit: + __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr); + __ increment(Address(SCR2, 0), 1); +#endif + + //----------------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the following conditions are met: + // + // (1) src and dst must not be null. + // (2) src_pos must not be negative. + // (3) dst_pos must not be negative. + // (4) length must not be negative. + // (5) src klass and dst klass should be the same and not NULL. + // (6) src and dst should be arrays. + // (7) src_pos + length must not exceed length of src. + // (8) dst_pos + length must not exceed length of dst. + // + + // if (src == NULL) return -1; + __ beqz(src, L_failed); + + // if (src_pos < 0) return -1; + __ blt(src_pos, R0, L_failed); + + // if (dst == NULL) return -1; + __ beqz(dst, L_failed); + + // if (dst_pos < 0) return -1; + __ blt(dst_pos, R0, L_failed); + + // registers used as temp + const Register scratch_length = T0; // elements count to copy + const Register scratch_src_klass = T1; // array klass + const Register lh = T2; // layout helper + const Register tmp1 = T3; + const Register tmp2 = T4; + + // if (length < 0) return -1; + __ move(scratch_length, length); // length (elements count, 32-bits value) + __ blt(scratch_length, R0, L_failed); + + __ load_klass(scratch_src_klass, src); +#ifdef ASSERT + // assert(src->klass() != NULL); + { + __ block_comment("assert klasses not null {"); + Label L1, L2; + __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL + __ bind(L1); + __ stop("broken null klass"); + __ bind(L2); + __ load_klass(SCR2, dst); + __ beqz(SCR2, L1); // this would be broken also + __ block_comment("} assert klasses not null done"); + } +#endif + + // Load layout helper (32-bits) + // + // |array_tag| | header_size | element_type | |log2_element_size| + // 32 30 24 16 8 2 0 + // + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 + // + + const int lh_offset = in_bytes(Klass::layout_helper_offset()); + + // Handle objArrays completely differently... + const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ld_w(lh, Address(scratch_src_klass, lh_offset)); + __ li(SCR1, objArray_lh); + __ xorr(SCR2, lh, SCR1); + __ beqz(SCR2, L_objArray); + + // if (src->klass() != dst->klass()) return -1; + __ load_klass(SCR2, dst); + __ xorr(SCR2, SCR2, scratch_src_klass); + __ bnez(SCR2, L_failed); + + // if (!src->is_Array()) return -1; + __ bge(lh, R0, L_failed); // i.e. (lh >= 0) + + // At this point, it is known to be a typeArray (array_tag 0x3). +#ifdef ASSERT + { + __ block_comment("assert primitive array {"); + Label L; + __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); + __ bge(lh, SCR2, L); + __ stop("must be a primitive array"); + __ bind(L); + __ block_comment("} assert primitive array done"); + } +#endif + + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); + + // TypeArrayKlass + // + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); + // + + const Register scr1_offset = SCR1; // array offset + const Register elsize = lh; // element size + + __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift + + exact_log2(Klass::_lh_header_size_mask+1) - 1, + Klass::_lh_header_size_shift); // array_offset + __ add_d(src, src, scr1_offset); // src array offset + __ add_d(dst, dst, scr1_offset); // dst array offset + __ block_comment("choose copy loop based on element size"); + + // next registers should be set before the jump to corresponding stub + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + + // 'from', 'to', 'count' registers should be set in such order + // since they are the same as 'src', 'src_pos', 'dst'. + + assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); + + // The possible values of elsize are 0-3, i.e. exact_log2(element + // size in bytes). We do a simple bitwise binary search. + __ bind(L_copy_bytes); + __ andi(tmp1, elsize, 2); + __ bnez(tmp1, L_copy_ints); + __ andi(tmp1, elsize, 1); + __ bnez(tmp1, L_copy_shorts); + __ lea(from, Address(src, src_pos, Address::times_1)); // src_addr + __ lea(to, Address(dst, dst_pos, Address::times_1)); // dst_addr + __ move(count, scratch_length); // length + __ b(StubRoutines::_jbyte_arraycopy); + + __ bind(L_copy_shorts); + __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr + __ lea(to, Address(dst, dst_pos, Address::times_2)); // dst_addr + __ move(count, scratch_length); // length + __ b(StubRoutines::_jshort_arraycopy); + + __ bind(L_copy_ints); + __ andi(tmp1, elsize, 1); + __ bnez(tmp1, L_copy_longs); + __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr + __ lea(to, Address(dst, dst_pos, Address::times_4)); // dst_addr + __ move(count, scratch_length); // length + __ b(StubRoutines::_jint_arraycopy); + + __ bind(L_copy_longs); +#ifdef ASSERT + { + __ block_comment("assert long copy {"); + Label L; + __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize + __ li(tmp1, LogBytesPerLong); + __ beq(elsize, tmp1, L); + __ stop("must be long copy, but elsize is wrong"); + __ bind(L); + __ block_comment("} assert long copy done"); + } +#endif + __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr + __ lea(to, Address(dst, dst_pos, Address::times_8)); // dst_addr + __ move(count, scratch_length); // length + __ b(StubRoutines::_jlong_arraycopy); + + // ObjArrayKlass + __ bind(L_objArray); + // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] + + Label L_plain_copy, L_checkcast_copy; + // test array classes for subtyping + __ load_klass(tmp1, dst); + __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality + + // Identically typed arrays can be copied without element-wise checks. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); + + __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); + __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); + __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ move(count, scratch_length); // length + __ bind(L_plain_copy); + __ b(StubRoutines::_oop_arraycopy); + + __ bind(L_checkcast_copy); + // live at this point: scratch_src_klass, scratch_length, tmp1 (dst_klass) + { + // Before looking at dst.length, make sure dst is also an objArray. + __ ld_w(SCR1, Address(tmp1, lh_offset)); + __ li(SCR2, objArray_lh); + __ xorr(SCR1, SCR1, SCR2); + __ bnez(SCR1, L_failed); + + // It is safe to examine both src.length and dst.length. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed); + + __ load_klass(dst_klass, dst); // reload + + // Marshal the base address arguments now, freeing registers. + __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); + __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); + __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ move(count, length); // length (reloaded) + Register sco_temp = A3; // this register is free now + assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass); + // assert_clean_int(count, sco_temp); + + // Generate the type check. + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ld_w(sco_temp, Address(dst_klass, sco_offset)); + + // Smashes SCR1, SCR2 + generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy); + + // Fetch destination element klass from the ObjArrayKlass header. + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + __ ld_d(dst_klass, Address(dst_klass, ek_offset)); + __ ld_w(sco_temp, Address(dst_klass, sco_offset)); + + // the checkcast_copy loop needs two extra arguments: + assert(A3 == sco_temp, "#3 already in place"); + // Set up arguments for checkcast_arraycopy. + __ move(A4, dst_klass); // dst.klass.element_klass + __ b(StubRoutines::_checkcast_arraycopy); + } + + __ bind(L_failed); + __ li(V0, -1); + __ jr(RA); + + return start; + } + + void generate_arraycopy_stubs() { + Label disjoint_large_copy, conjoint_large_copy; + Label disjoint_large_copy_lsx, conjoint_large_copy_lsx; + Label disjoint_large_copy_lasx, conjoint_large_copy_lasx; + Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; + Label none; + + generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy"); + generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy"); + if (UseLSX) { + generate_disjoint_large_copy_lsx(disjoint_large_copy_lsx, "disjoint_large_copy_lsx"); + generate_conjoint_large_copy_lsx(conjoint_large_copy_lsx, "conjoint_large_copy_lsx"); + } + if (UseLASX) { + generate_disjoint_large_copy_lasx(disjoint_large_copy_lasx, "disjoint_large_copy_lasx"); + generate_conjoint_large_copy_lasx(conjoint_large_copy_lasx, "conjoint_large_copy_lasx"); + } + generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); + generate_short_small_copy(short_small_copy, "jshort_small_copy"); + generate_int_small_copy(int_small_copy, "jint_small_copy"); + generate_long_small_copy(long_small_copy, "jlong_small_copy"); + + if (UseCompressedOops) { + if (UseLSX) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy", 7); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", 7, true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 7); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 7, true); + } + if (UseLASX) { + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy", 9); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy_uninit", 9, true); + } else if (UseLSX) { + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy", 7); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy_uninit", 7, true); + } else { + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy", 7); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy_uninit", 7, true); + } + } else { + if (UseLASX) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy", 5); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy_uninit", 5, true); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy", 5); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy_uninit", 5, true); + } else if (UseLSX) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy", 4); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy_uninit", 4, true); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy", 4); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 4); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 4, true); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, none, "oop_arraycopy", 4); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); + } + } + + if (UseLASX) { + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lasx, disjoint_large_copy_lsx, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jint_disjoint_arraycopy", 9); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lasx, conjoint_large_copy_lsx, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jint_arraycopy", 9); + } else if (UseLSX) { + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lsx, none, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jint_disjoint_arraycopy", 7); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lsx, none, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jint_arraycopy", 7); + } else { + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, none, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, none, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, none, "jint_disjoint_arraycopy", 7); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, none, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, none, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, none, "jint_arraycopy", 7); + } + + if (UseLASX) { + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "jlong_disjoint_arraycopy", 5); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "jlong_arraycopy", 5); + } else if (UseLSX) { + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "jlong_disjoint_arraycopy", 4); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "jlong_arraycopy", 4); + } else { + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, none, "jlong_disjoint_arraycopy", 4); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, none, "jlong_arraycopy", 4); + } + + // We don't generate specialized code for HeapWord-aligned source + // arrays, so just use the code we've already generated + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; + StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; + + StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; + StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; + + StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; + StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; + + StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; + StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; + StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; + + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true); + + StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); + + StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // Arguments: + // + // Inputs: + // A0 - source byte array address + // A1 - destination byte array address + // A2 - K (key) in little endian int array + // A3 - r vector byte array address + // A4 - input length + // + // Output: + // A0 - input length + // + address generate_aescrypt_encryptBlock(bool cbc) { + static const uint32_t ft_consts[256] = { + 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, + 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, + 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, + 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, + 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, + 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, + 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, + 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, + 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, + 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, + 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, + 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, + 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, + 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, + 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, + 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, + 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, + 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, + 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, + 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, + 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, + 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, + 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, + 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, + 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, + 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, + 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, + 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, + 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, + 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, + 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, + 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, + 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, + 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, + 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, + 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, + 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, + 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, + 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, + 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, + 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, + 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, + 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, + 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, + 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, + 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, + 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, + 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, + 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, + 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, + 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, + 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, + 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, + 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, + 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, + 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, + 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, + 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, + 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, + 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, + 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, + 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, + 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, + 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a + }; + static const uint8_t fsb_consts[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + // Allocate registers + Register src = A0; + Register dst = A1; + Register key = A2; + Register rve = A3; + Register srclen = A4; + Register keylen = T8; + Register srcend = A5; + Register keyold = A6; + Register t0 = A7; + Register t1, t2, t3, ftp; + Register xa[4] = { T0, T1, T2, T3 }; + Register ya[4] = { T4, T5, T6, T7 }; + + Label loop, tail, done; + address start = __ pc(); + + if (cbc) { + t1 = S0; + t2 = S1; + t3 = S2; + ftp = S3; + + __ beqz(srclen, done); + + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(S3, SP, 3 * wordSize); + __ st_d(S2, SP, 2 * wordSize); + __ st_d(S1, SP, 1 * wordSize); + __ st_d(S0, SP, 0 * wordSize); + + __ add_d(srcend, src, srclen); + __ move(keyold, key); + } else { + t1 = A3; + t2 = A4; + t3 = A5; + ftp = A6; + } + + __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); + + // Round 1 + if (cbc) { + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], rve, 4 * i); + } + + __ bind(loop); + + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], src, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + } else { + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], src, 4 * i); + } + } + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], key, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + + __ li(ftp, (intptr_t)ft_consts); + + // Round 2 - (N-1) + for (int r = 0; r < 14; r++) { + Register *xp; + Register *yp; + + if (r & 1) { + xp = xa; + yp = ya; + } else { + xp = ya; + yp = xa; + } + + for (int i = 0; i < 4; i++) { + __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); + } + + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); + __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); + __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); + __ slli_w(t0, t0, 2); + __ slli_w(t1, t1, 2); + __ slli_w(t2, t2, 2); + __ slli_w(t3, t3, 2); + __ ldx_w(t0, ftp, t0); + __ ldx_w(t1, ftp, t1); + __ ldx_w(t2, ftp, t2); + __ ldx_w(t3, ftp, t3); + __ rotri_w(t0, t0, 24); + __ rotri_w(t1, t1, 16); + __ rotri_w(t2, t2, 8); + __ XOR(xp[i], xp[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xp[i], xp[i], t3); + __ XOR(xp[i], xp[i], t0); + } + + if (r == 8) { + // AES 128 + __ li(t0, 44); + __ beq(t0, keylen, tail); + } else if (r == 10) { + // AES 192 + __ li(t0, 52); + __ beq(t0, keylen, tail); + } + } + + __ bind(tail); + __ li(ftp, (intptr_t)fsb_consts); + __ alsl_d(key, keylen, key, 2 - 1); + + // Round N + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); + __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); + __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); + __ ldx_bu(t0, ftp, t0); + __ ldx_bu(t1, ftp, t1); + __ ldx_bu(t2, ftp, t2); + __ ldx_bu(t3, ftp, t3); + __ ld_w(xa[i], key, 4 * i - 16); + __ slli_w(t1, t1, 8); + __ slli_w(t2, t2, 16); + __ slli_w(t3, t3, 24); + __ XOR(xa[i], xa[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xa[i], xa[i], t3); + __ XOR(xa[i], xa[i], t0); + } + + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], dst, 4 * i); + } + + if (cbc) { + __ move(key, keyold); + __ addi_d(src, src, 16); + __ addi_d(dst, dst, 16); + __ blt(src, srcend, loop); + + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], rve, 4 * i); + } + + __ ld_d(S3, SP, 3 * wordSize); + __ ld_d(S2, SP, 2 * wordSize); + __ ld_d(S1, SP, 1 * wordSize); + __ ld_d(S0, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + + __ bind(done); + __ move(A0, srclen); + } + + __ jr(RA); + + return start; + } + + address generate_mulAdd() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + + address entry = __ pc(); + + const Register out = A0; + const Register in = A1; + const Register offset = A2; + const Register len = A3; + const Register k = A4; + + __ block_comment("Entry:"); + __ mul_add(out, in, offset, len, k); + __ jr(RA); + + return entry; + } + + // Arguments: + // + // Inputs: + // A0 - source byte array address + // A1 - destination byte array address + // A2 - K (key) in little endian int array + // A3 - r vector byte array address + // A4 - input length + // + // Output: + // A0 - input length + // + address generate_aescrypt_decryptBlock(bool cbc) { + static const uint32_t rt_consts[256] = { + 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, + 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, + 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, + 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, + 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, + 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, + 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, + 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, + 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, + 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, + 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, + 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, + 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, + 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, + 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, + 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, + 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, + 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, + 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, + 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, + 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, + 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, + 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, + 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, + 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, + 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, + 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, + 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, + 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, + 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, + 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, + 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, + 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, + 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, + 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, + 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, + 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, + 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, + 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, + 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, + 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, + 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, + 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, + 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, + 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, + 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, + 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, + 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, + 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, + 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, + 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, + 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, + 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, + 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, + 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, + 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, + 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, + 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, + 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, + 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, + 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, + 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, + 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, + 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 + }; + static const uint8_t rsb_consts[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + + // Allocate registers + Register src = A0; + Register dst = A1; + Register key = A2; + Register rve = A3; + Register srclen = A4; + Register keylen = T8; + Register srcend = A5; + Register t0 = A6; + Register t1 = A7; + Register t2, t3, rtp, rvp; + Register xa[4] = { T0, T1, T2, T3 }; + Register ya[4] = { T4, T5, T6, T7 }; + + Label loop, tail, done; + address start = __ pc(); + + if (cbc) { + t2 = S0; + t3 = S1; + rtp = S2; + rvp = S3; + + __ beqz(srclen, done); + + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(S3, SP, 3 * wordSize); + __ st_d(S2, SP, 2 * wordSize); + __ st_d(S1, SP, 1 * wordSize); + __ st_d(S0, SP, 0 * wordSize); + + __ add_d(srcend, src, srclen); + __ move(rvp, rve); + } else { + t2 = A3; + t3 = A4; + rtp = A5; + } + + __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ bind(loop); + + // Round 1 + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], src, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], key, 4 * (4 + i)); + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + + __ li(rtp, (intptr_t)rt_consts); + + // Round 2 - (N-1) + for (int r = 0; r < 14; r++) { + Register *xp; + Register *yp; + + if (r & 1) { + xp = xa; + yp = ya; + } else { + xp = ya; + yp = xa; + } + + for (int i = 0; i < 4; i++) { + __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); + } + + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); + __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); + __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); + __ slli_w(t0, t0, 2); + __ slli_w(t1, t1, 2); + __ slli_w(t2, t2, 2); + __ slli_w(t3, t3, 2); + __ ldx_w(t0, rtp, t0); + __ ldx_w(t1, rtp, t1); + __ ldx_w(t2, rtp, t2); + __ ldx_w(t3, rtp, t3); + __ rotri_w(t0, t0, 24); + __ rotri_w(t1, t1, 16); + __ rotri_w(t2, t2, 8); + __ XOR(xp[i], xp[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xp[i], xp[i], t3); + __ XOR(xp[i], xp[i], t0); + } + + if (r == 8) { + // AES 128 + __ li(t0, 44); + __ beq(t0, keylen, tail); + } else if (r == 10) { + // AES 192 + __ li(t0, 52); + __ beq(t0, keylen, tail); + } + } + + __ bind(tail); + __ li(rtp, (intptr_t)rsb_consts); + + // Round N + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); + __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); + __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); + __ ldx_bu(t0, rtp, t0); + __ ldx_bu(t1, rtp, t1); + __ ldx_bu(t2, rtp, t2); + __ ldx_bu(t3, rtp, t3); + __ ld_w(xa[i], key, 4 * i); + __ slli_w(t1, t1, 8); + __ slli_w(t2, t2, 16); + __ slli_w(t3, t3, 24); + __ XOR(xa[i], xa[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xa[i], xa[i], t3); + __ XOR(xa[i], xa[i], t0); + } + + if (cbc) { + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], rvp, 4 * i); + } + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + if (cbc) { + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + } + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], dst, 4 * i); + } + + if (cbc) { + __ move(rvp, src); + __ addi_d(src, src, 16); + __ addi_d(dst, dst, 16); + __ blt(src, srcend, loop); + + __ ld_d(t0, src, -16); + __ ld_d(t1, src, -8); + __ st_d(t0, rve, 0); + __ st_d(t1, rve, 8); + + __ ld_d(S3, SP, 3 * wordSize); + __ ld_d(S2, SP, 2 * wordSize); + __ ld_d(S1, SP, 1 * wordSize); + __ ld_d(S0, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + + __ bind(done); + __ move(A0, srclen); + } + + __ jr(RA); + + return start; + } + + // Arguments: + // + // Inputs: + // A0 - byte[] source+offset + // A1 - int[] SHA.state + // A2 - int offset + // A3 - int limit + // + void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + Label keys, loop; + + // Keys + __ bind(keys); + __ emit_int32(0x5a827999); + __ emit_int32(0x6ed9eba1); + __ emit_int32(0x8f1bbcdc); + __ emit_int32(0xca62c1d6); + + // Allocate registers + Register t0 = T5; + Register t1 = T6; + Register t2 = T7; + Register t3 = T8; + Register buf = A0; + Register state = A1; + Register ofs = A2; + Register limit = A3; + Register ka[4] = { A4, A5, A6, A7 }; + Register sa[5] = { T0, T1, T2, T3, T4 }; + + // Entry + entry = __ pc(); + __ move(ofs, R0); + __ move(limit, R0); + + // Entry MB + entry_mb = __ pc(); + + // Allocate scratch space + __ addi_d(SP, SP, -64); + + // Load keys + __ lipc(t0, keys); + __ ld_w(ka[0], t0, 0); + __ ld_w(ka[1], t0, 4); + __ ld_w(ka[2], t0, 8); + __ ld_w(ka[3], t0, 12); + + __ bind(loop); + // Load arguments + __ ld_w(sa[0], state, 0); + __ ld_w(sa[1], state, 4); + __ ld_w(sa[2], state, 8); + __ ld_w(sa[3], state, 12); + __ ld_w(sa[4], state, 16); + + // 80 rounds of hashing + for (int i = 0; i < 80; i++) { + Register a = sa[(5 - (i % 5)) % 5]; + Register b = sa[(6 - (i % 5)) % 5]; + Register c = sa[(7 - (i % 5)) % 5]; + Register d = sa[(8 - (i % 5)) % 5]; + Register e = sa[(9 - (i % 5)) % 5]; + + if (i < 16) { + __ ld_w(t0, buf, i * 4); + __ revb_2h(t0, t0); + __ rotri_w(t0, t0, 16); + __ add_w(e, e, t0); + __ st_w(t0, SP, i * 4); + __ XOR(t0, c, d); + __ AND(t0, t0, b); + __ XOR(t0, t0, d); + } else { + __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); + __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); + __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); + __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); + __ XOR(t0, t0, t1); + __ XOR(t0, t0, t2); + __ XOR(t0, t0, t3); + __ rotri_w(t0, t0, 31); + __ add_w(e, e, t0); + __ st_w(t0, SP, (i & 0xF) * 4); + + if (i < 20) { + __ XOR(t0, c, d); + __ AND(t0, t0, b); + __ XOR(t0, t0, d); + } else if (i < 40 || i >= 60) { + __ XOR(t0, b, c); + __ XOR(t0, t0, d); + } else if (i < 60) { + __ OR(t0, c, d); + __ AND(t0, t0, b); + __ AND(t2, c, d); + __ OR(t0, t0, t2); + } + } + + __ rotri_w(b, b, 2); + __ add_w(e, e, t0); + __ add_w(e, e, ka[i / 20]); + __ rotri_w(t0, a, 27); + __ add_w(e, e, t0); + } + + // Save updated state + __ ld_w(t0, state, 0); + __ ld_w(t1, state, 4); + __ ld_w(t2, state, 8); + __ ld_w(t3, state, 12); + __ add_w(sa[0], sa[0], t0); + __ ld_w(t0, state, 16); + __ add_w(sa[1], sa[1], t1); + __ add_w(sa[2], sa[2], t2); + __ add_w(sa[3], sa[3], t3); + __ add_w(sa[4], sa[4], t0); + __ st_w(sa[0], state, 0); + __ st_w(sa[1], state, 4); + __ st_w(sa[2], state, 8); + __ st_w(sa[3], state, 12); + __ st_w(sa[4], state, 16); + + __ addi_w(ofs, ofs, 64); + __ addi_d(buf, buf, 64); + __ bge(limit, ofs, loop); + __ move(V0, ofs); // return ofs + + __ addi_d(SP, SP, 64); + __ jr(RA); + } + + // Arguments: + // + // Inputs: + // A0 - byte[] source+offset + // A1 - int[] SHA.state + // A2 - int offset + // A3 - int limit + // + void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { + static const uint32_t round_consts[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + Label loop; + + // Allocate registers + Register t0 = A4; + Register t1 = A5; + Register t2 = A6; + Register t3 = A7; + Register buf = A0; + Register state = A1; + Register ofs = A2; + Register limit = A3; + Register kptr = T8; + Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; + + // Entry + entry = __ pc(); + __ move(ofs, R0); + __ move(limit, R0); + + // Entry MB + entry_mb = __ pc(); + + // Allocate scratch space + __ addi_d(SP, SP, -64); + + // Load keys base address + __ li(kptr, (intptr_t)round_consts); + + __ bind(loop); + // Load state + __ ld_w(sa[0], state, 0); + __ ld_w(sa[1], state, 4); + __ ld_w(sa[2], state, 8); + __ ld_w(sa[3], state, 12); + __ ld_w(sa[4], state, 16); + __ ld_w(sa[5], state, 20); + __ ld_w(sa[6], state, 24); + __ ld_w(sa[7], state, 28); + + // Do 64 rounds of hashing + for (int i = 0; i < 64; i++) { + Register a = sa[(0 - i) & 7]; + Register b = sa[(1 - i) & 7]; + Register c = sa[(2 - i) & 7]; + Register d = sa[(3 - i) & 7]; + Register e = sa[(4 - i) & 7]; + Register f = sa[(5 - i) & 7]; + Register g = sa[(6 - i) & 7]; + Register h = sa[(7 - i) & 7]; + + if (i < 16) { + __ ld_w(t1, buf, i * 4); + __ revb_2h(t1, t1); + __ rotri_w(t1, t1, 16); + } else { + __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); + __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); + __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); + __ add_w(t1, t1, t2); + __ rotri_w(t2, t0, 18); + __ srli_w(t3, t0, 3); + __ rotri_w(t0, t0, 7); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(t1, t1, t0); + __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); + __ rotri_w(t2, t0, 19); + __ srli_w(t3, t0, 10); + __ rotri_w(t0, t0, 17); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(t1, t1, t0); + } + + __ rotri_w(t2, e, 11); + __ rotri_w(t3, e, 25); + __ rotri_w(t0, e, 6); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ XOR(t2, g, f); + __ ld_w(t3, kptr, i * 4); + __ AND(t2, t2, e); + __ XOR(t2, t2, g); + __ add_w(t0, t0, t2); + __ add_w(t0, t0, t3); + __ add_w(h, h, t1); + __ add_w(h, h, t0); + __ add_w(d, d, h); + __ rotri_w(t2, a, 13); + __ rotri_w(t3, a, 22); + __ rotri_w(t0, a, 2); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(h, h, t0); + __ OR(t0, c, b); + __ AND(t2, c, b); + __ AND(t0, t0, a); + __ OR(t0, t0, t2); + __ add_w(h, h, t0); + __ st_w(t1, SP, (i & 0xF) * 4); + } + + // Add to state + __ ld_w(t0, state, 0); + __ ld_w(t1, state, 4); + __ ld_w(t2, state, 8); + __ ld_w(t3, state, 12); + __ add_w(sa[0], sa[0], t0); + __ add_w(sa[1], sa[1], t1); + __ add_w(sa[2], sa[2], t2); + __ add_w(sa[3], sa[3], t3); + __ ld_w(t0, state, 16); + __ ld_w(t1, state, 20); + __ ld_w(t2, state, 24); + __ ld_w(t3, state, 28); + __ add_w(sa[4], sa[4], t0); + __ add_w(sa[5], sa[5], t1); + __ add_w(sa[6], sa[6], t2); + __ add_w(sa[7], sa[7], t3); + __ st_w(sa[0], state, 0); + __ st_w(sa[1], state, 4); + __ st_w(sa[2], state, 8); + __ st_w(sa[3], state, 12); + __ st_w(sa[4], state, 16); + __ st_w(sa[5], state, 20); + __ st_w(sa[6], state, 24); + __ st_w(sa[7], state, 28); + + __ addi_w(ofs, ofs, 64); + __ addi_d(buf, buf, 64); + __ bge(limit, ofs, loop); + __ move(V0, ofs); // return ofs + + __ addi_d(SP, SP, 64); + __ jr(RA); + } + + // Do NOT delete this node which stands for stub routine placeholder + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "need CRC32 instructions support"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); + + address start = __ pc(); + + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register tmp = A3; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32(crc, buf, len, tmp); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ jr(RA); + + return start; + } + + // Do NOT delete this node which stands for stub routine placeholder + address generate_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instructions support"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); + + address start = __ pc(); + + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register tmp = A3; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32c(crc, buf, len, tmp); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ jr(RA); + + return start; + } + + address generate_dsin_dcos(bool isCos) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); + address start = __ pc(); + __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw, + (address)StubRoutines::la::_two_over_pi, + (address)StubRoutines::la::_pio2, + (address)StubRoutines::la::_dsin_coef, + (address)StubRoutines::la::_dcos_coef); + return start; + } + + // add a function to implement SafeFetch32 and SafeFetchN + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // A0 = adr + // A1 = errValue + // + // result: + // PPC_RET = *adr or errValue + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into A1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ ld_w(A1, A0, 0); + break; + case 8: + // int64_t + __ ld_d(A1, A0, 0); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ add_d(V0, A1, R0); + __ jr(RA); + } + + +#undef __ +#define __ masm-> + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + address generate_throw_exception(const char* name, + address runtime_entry, + bool restore_saved_exception_pc) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + enum layout { + thread_off, // last_java_sp + S7_off, // callee saved register sp + 1 + S6_off, // callee saved register sp + 2 + S5_off, // callee saved register sp + 3 + S4_off, // callee saved register sp + 4 + S3_off, // callee saved register sp + 5 + S2_off, // callee saved register sp + 6 + S1_off, // callee saved register sp + 7 + S0_off, // callee saved register sp + 8 + FP_off, + ret_address, + framesize + }; + + int insts_size = 2048; + int locs_size = 32; + + // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, + // NULL, NULL, NULL, false, NULL, name, false); + CodeBuffer code (name , insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM +#ifndef OPT_THREAD + Register java_thread = TREG; + __ get_thread(java_thread); +#else + Register java_thread = TREG; +#endif + if (restore_saved_exception_pc) { + __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); + } + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog + __ st_d(S0, SP, S0_off * wordSize); + __ st_d(S1, SP, S1_off * wordSize); + __ st_d(S2, SP, S2_off * wordSize); + __ st_d(S3, SP, S3_off * wordSize); + __ st_d(S4, SP, S4_off * wordSize); + __ st_d(S5, SP, S5_off * wordSize); + __ st_d(S6, SP, S6_off * wordSize); + __ st_d(S7, SP, S7_off * wordSize); + + int frame_complete = __ pc() - start; + // push java thread (becomes first argument of C function) + __ st_d(java_thread, SP, thread_off * wordSize); + if (java_thread != A0) + __ move(A0, java_thread); + + // Set up last_Java_sp and last_Java_fp + Label before_call; + address the_pc = __ pc(); + __ bind(before_call); + __ set_last_Java_frame(java_thread, SP, FP, before_call); + // Align stack + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + // Call runtime + // TODO: confirm reloc + __ call(runtime_entry, relocInfo::runtime_call_type); + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + oop_maps->add_gc_map(the_pc - start, map); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + __ get_thread(java_thread); +#endif + + __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + __ reset_last_Java_frame(java_thread, true); + + // Restore callee save registers. This must be done after resetting the Java frame + __ ld_d(S0, SP, S0_off * wordSize); + __ ld_d(S1, SP, S1_off * wordSize); + __ ld_d(S2, SP, S2_off * wordSize); + __ ld_d(S3, SP, S3_off * wordSize); + __ ld_d(S4, SP, S4_off * wordSize); + __ ld_d(S5, SP, S5_off * wordSize); + __ ld_d(S6, SP, S6_off * wordSize); + __ ld_d(S7, SP, S7_off * wordSize); + + // discard arguments + __ move(SP, FP); // epilog + __ pop(FP); + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ should_not_reach_here(); + __ bind(L); +#endif //ASSERT + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + framesize, + oop_maps, false); + return stub->entry_point(); + } + + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, + Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; + + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = A0; + Pa_base = reg; // Argument registers: + if (squaring) + Pb_base = Pa_base; + else + Pb_base = ++reg; + Pn_base = ++reg; + Rlen = ++reg; + inv = ++reg; + Rlen2 = inv; // Reuse inv + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Iam = ++reg; // Index to the current/next digit of a, b, n, and m. + Ibn = ++reg; + + t0 = ++reg; // Three registers which form a + t1 = ++reg; // triple-precision accumuator. + t2 = ++reg; + + Ri = ++reg; // Inner and outer loop indexes. + Rj = ++reg; + + if (squaring) { + Rhi_ab = ++reg; // Product registers: low and high parts + reg = S0; + Rlo_ab = ++reg; // of a*b and m*n. + } else { + reg = S0; + Rhi_ab = reg; // Product registers: low and high parts + Rlo_ab = ++reg; // of a*b and m*n. + } + + Rhi_mn = ++reg; + Rlo_mn = ++reg; + } + + private: + void enter() { + addi_d(SP, SP, -6 * wordSize); + st_d(FP, SP, 0 * wordSize); + move(FP, SP); + } + + void leave() { + addi_d(T0, FP, 6 * wordSize); + ld_d(FP, FP, 0 * wordSize); + move(SP, T0); + } + + void save_regs() { + if (!_squaring) + st_d(Rhi_ab, FP, 5 * wordSize); + st_d(Rlo_ab, FP, 4 * wordSize); + st_d(Rhi_mn, FP, 3 * wordSize); + st_d(Rlo_mn, FP, 2 * wordSize); + st_d(Pm_base, FP, 1 * wordSize); + } + + void restore_regs() { + if (!_squaring) + ld_d(Rhi_ab, FP, 5 * wordSize); + ld_d(Rlo_ab, FP, 4 * wordSize); + ld_d(Rhi_mn, FP, 3 * wordSize); + ld_d(Rlo_mn, FP, 2 * wordSize); + ld_d(Pm_base, FP, 1 * wordSize); + } + + template + void unroll_2(Register count, T block, Register tmp) { + Label loop, end, odd; + andi(tmp, count, 1); + bnez(tmp, odd); + beqz(count, end); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + addi_w(count, count, -2); + blt(R0, count, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + andi(tmp, count, 1); + bnez(tmp, odd); + beqz(count, end); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + addi_w(count, count, -2); + blt(R0, count, loop); + bind(end); + } + + void acc(Register Rhi, Register Rlo, + Register t0, Register t1, Register t2, Register t, Register c) { + add_d(t0, t0, Rlo); + OR(t, t1, Rhi); + sltu(c, t0, Rlo); + add_d(t1, t1, Rhi); + add_d(t1, t1, c); + sltu(c, t1, t); + add_d(t2, t2, c); + } + + void pre1(Register i) { + block_comment("pre1"); + // Iam = 0; + // Ibn = i; + + slli_w(Ibn, i, LogBytesPerWord); + + // Ra = Pa_base[Iam]; + // Rb = Pb_base[Ibn]; + // Rm = Pm_base[Iam]; + // Rn = Pn_base[Ibn]; + + ld_d(Ra, Pa_base, 0); + ldx_d(Rb, Pb_base, Ibn); + ld_d(Rm, Pm_base, 0); + ldx_d(Rn, Pn_base, Ibn); + + move(Iam, R0); + + // Zero the m*n result. + move(Rhi_mn, R0); + move(Rlo_mn, R0); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + addi_d(Iam, Iam, wordSize); + addi_d(Ibn, Ibn, -wordSize); + mul_d(Rlo_ab, Ra, Rb); + mulh_du(Rhi_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the + // previous iteration. + ldx_d(Ra, Pa_base, Iam); + ldx_d(Rb, Pb_base, Ibn); + + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[Iam]; + // Rn = Pn_base[Ibn]; + mul_d(Rlo_mn, Rm, Rn); + mulh_du(Rhi_mn, Rm, Rn); + acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); + ldx_d(Rm, Pm_base, Iam); + ldx_d(Rn, Pn_base, Ibn); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, t0, t1, t2); + mul_d(Rlo_ab, Ra, Rb); + mulh_du(Rhi_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n + acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); + + // Pm_base[Iam] = Rm = t0 * inv; + mul_d(Rm, t0, inv); + stx_d(Rm, Pm_base, Iam); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + mulh_du(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul_d(Rlo_mn, Rm, Rn); + add_d(Rlo_mn, t0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul_d(Rlo_mn, Rm, Rn); + // add_d(t0, t0, Rlo_mn); + OR(Ra, t1, Rhi_mn); + sltu(Rb, R0, t0); + add_d(t0, t1, Rhi_mn); + add_d(t0, t0, Rb); + sltu(Rb, t0, Ra); + add_d(t1, t2, Rb); + move(t2, R0); + } + + void pre2(Register i, Register len) { + block_comment("pre2"); + + // Rj == i-len + sub_w(Rj, i, len); + + // Iam = i - len; + // Ibn = len; + slli_w(Iam, Rj, LogBytesPerWord); + slli_w(Ibn, len, LogBytesPerWord); + + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + addi_d(Iam, Iam, wordSize); + addi_d(Ibn, Ibn, -wordSize); + + ldx_d(Ra, Pa_base, Iam); + ldx_d(Rb, Pb_base, Ibn); + ldx_d(Rm, Pm_base, Iam); + ldx_d(Rn, Pn_base, Ibn); + + move(Rhi_mn, R0); + move(Rlo_mn, R0); + } + + void post2(Register i, Register len) { + block_comment("post2"); + + sub_w(Rj, i, len); + slli_w(Iam, Rj, LogBytesPerWord); + + add_d(t0, t0, Rlo_mn); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = t0; + stx_d(t0, Pm_base, Iam); + + // t0 = t1; t1 = t2; t2 = 0; + OR(Ra, t1, Rhi_mn); + sltu(Rb, t0, Rlo_mn); + add_d(t0, t1, Rhi_mn); // The pending m*n, high part + add_d(t0, t0, Rb); + sltu(Rb, t0, Ra); + add_d(t1, t2, Rb); + move(t2, R0); + } + + // A carry in t0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(Register len) { + block_comment("normalize"); + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + Label loop, post, again; + Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now + beqz(t0, post); { + bind(again); { + move(i, R0); + move(b, R0); + slli_w(cnt, len, LogBytesPerWord); + align(16); + bind(loop); { + ldx_d(Rm, Pm_base, i); + ldx_d(Rn, Pn_base, i); + sltu(t, Rm, b); + sub_d(Rm, Rm, b); + sltu(b, Rm, Rn); + sub_d(Rm, Rm, Rn); + OR(b, b, t); + stx_d(Rm, Pm_base, i); + addi_w(i, i, BytesPerWord); + } blt(i, cnt, loop); + sub_d(t0, t0, b); + } bnez(t0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2, tmp3 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < S0 && tmp2 < S0, "register corruption"); + + alsl_d(s, len, s, LogBytesPerWord - 1); + move(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli_w(s, len, LogBytesPerWord); + sub_d(s, d, s); + } + + // where + void reverse1(Register d, Register s, Register tmp) { + ld_d(tmp, s, -wordSize); + addi_d(s, s, -wordSize); + addi_d(d, d, wordSize); + rotri_d(tmp, tmp, 32); + st_d(tmp, d, -wordSize); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * A0 - int array elements a + * A1 - int array elements b + * A2 - int array elements n (the modulus) + * A3 - int length + * A4 - int inv + * A5 - int array elements m (the result) + * + * Inputs for squaring: + * A0 - int array elements a + * A1 - int array elements n (the modulus) + * A2 - int length + * A3 - int inv + * A4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + beqz(Rlen, nothing); + + enter(); + + // Make room. + sltui(Ra, Rlen, 513); + beqz(Ra, argh); + slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); + sub_d(Ra, SP, Ra); + + srli_w(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ld_d(Rn, Pn_base, 0); + li(t0, -1); + mul_d(Rlo_mn, Rn, inv); + Label ok; + beq(Rlo_mn, t0, ok); { + stop("broken inverse in Montgomery multiply"); + } bind(ok); + } +#endif + + move(Pm_base, Ra); + + move(t0, R0); + move(t1, R0); + move(t2, R0); + + block_comment("for (int i = 0; i < len; i++) {"); + move(Ri, R0); { + Label loop, end; + bge(Ri, Rlen, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + move(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); + } block_comment(" } // j"); + + post1(); + addi_w(Ri, Ri, 1); + blt(Ri, Rlen, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + move(Ri, Rlen); + slli_w(Rlen2, Rlen, 1); { + Label loop, end; + bge(Ri, Rlen2, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + sub_w(Rj, Rlen2, Ri); + addi_w(Rj, Rj, -1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addi_w(Ri, Ri, 1); + blt(Ri, Rlen2, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + move(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + bind(nothing); + jr(RA); + + return entry; + } + // In C, approximately: + + // void + // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], + // unsigned long Pn_base[], unsigned long Pm_base[], + // unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long Ra, Rb, Rn, Rm; + // int i, Iam, Ibn; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Iam = 0; + // Ibn = i; + + // Ra = Pa_base[Iam]; + // Rb = Pb_base[Iam]; + // Rm = Pm_base[Ibn]; + // Rn = Pn_base[Ibn]; + + // int iters = i; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = pb_base[--Ibn]; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + // } + + // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Pm_base[Iam] = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int j; + + // Iam = i - len; + // Ibn = len; + + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + + // int iters = len*2-i-1; + // for (j = i-len+1; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + // } + + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + }; + + // Initialization + void generate_initial() { + // Generates all stubs and initializes the entry points + + //------------------------------------------------------------- + //----------------------------------------------------------- + // entry points that exist in all platforms + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller + // than the disadvantage of having a much more complicated generator structure. + // See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), + false); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), + false); + + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } + + if (UseCRC32CIntrinsics) { + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds and need to be relocatable, so they each + // fabricate a RuntimeStub internally. + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); + + StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + + // entry points that are platform specific + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); +#ifndef CORE + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); +#endif + + if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { + StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false); + } + + if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { + StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); + } + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + +#ifdef COMPILER2 + if (UseMulAddIntrinsic) { + StubRoutines::_mulAdd = generate_mulAdd(); + } + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, false /* squaring */); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, true /* squaring */); + // We use generate_multiply() rather than generate_square() + // because it's faster for the sizes of modulus we care about. + StubRoutines::_montgomerySquare = g.generate_multiply(); + } +#endif + + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); + } + + if (UseSHA1Intrinsics) { + generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); + } + + if (UseSHA256Intrinsics) { + generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); + } + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp new file mode 100644 index 00000000000..0ab07e1e9e8 --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP +#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc){ + return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); +} + +enum platform_dependent_constants { + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 60000 // simply increase if too small (assembler will crash if too small) +}; + +class la { + friend class StubGenerator; + friend class VMStructs; + private: + // If we call compiled code directly from the call stub we will + // need to adjust the return back to the call stub to a specialized + // piece of code that can handle compiled results and cleaning the fpu + // stack. The variable holds that location. + static address _call_stub_compiled_return; + static juint _crc_table[]; + // begin trigonometric tables block. See comments in .cpp file + static juint _npio2_hw[]; + static jdouble _two_over_pi[]; + static jdouble _pio2[]; + static jdouble _dsin_coef[]; + static jdouble _dcos_coef[]; + // end trigonometric tables block + +public: + // Call back points for traps in compiled code + static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } + static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } + +}; + +#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp new file mode 100644 index 00000000000..1a6ea3bcdee --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// a description of how to extend it, see the stubRoutines.hpp file. + +//find the last fp value +address StubRoutines::la::_call_stub_compiled_return = NULL; + +/** + * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h + */ +juint StubRoutines::la::_crc_table[] = +{ + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +}; + +ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = { + // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2, + // pio2_2t, pio2_3, pio2_3t + // This is a small optimization wich keeping double[8] values in int[] table + // to have less address calculation instructions + // + // invpio2: 53 bits of 2/pi (enough for cases when trigonometric argument is small) + // pio2_1: first 33 bit of pi/2 + // pio2_1t: pi/2 - pio2_1 + // pio2_2: second 33 bit of pi/2 + // pio2_2t: pi/2 - (pio2_1+pio2_2) + // pio2_3: third 33 bit of pi/2 + // pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) + 0x00000000, 0x3fe00000, // 0.5 + 0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01 + 0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00 + 0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11 + 0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11 + 0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21 + 0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21 + 0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32 + // now, npio2_hw itself + 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, + 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, + 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, + 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, + 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, + 0x404858EB, 0x404921FB +}; + +// Coefficients for sin(x) polynomial approximation: S1..S6. +// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details +ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = { + -1.66666666666666324348e-01, // 0xBFC5555555555549 + 8.33333333332248946124e-03, // 0x3F8111111110F8A6 + -1.98412698298579493134e-04, // 0xBF2A01A019C161D5 + 2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D + -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB + 1.58969099521155010221e-10 // 0x3DE5D93A5ACFD57C +}; + +// Coefficients for cos(x) polynomial approximation: C1..C6. +// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details +ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { + 4.16666666666666019037e-02, // c0x3FA555555555554C + -1.38888888888741095749e-03, // 0xBF56C16C16C15177 + 2.48015872894767294178e-05, // 0x3EFA01A019CB1590 + -2.75573143513906633035e-07, // 0xBE927E4F809C52AD + 2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4 + -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 +}; + +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. +// Used in cases of very large argument. 396 hex digits is enough to support +// required precision. +// Converted to double to avoid unnecessary conversion in code +// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with +// only (double) conversion added +ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = { + (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62, + (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A, + (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129, + (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41, + (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8, + (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF, + (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5, + (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08, + (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3, + (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880, + (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B, +}; + +// Pi over 2 value +ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { + 1.57079625129699707031e+00, // 0x3FF921FB40000000 + 7.54978941586159635335e-08, // 0x3E74442D00000000 + 5.39030252995776476554e-15, // 0x3CF8469880000000 + 3.28200341580791294123e-22, // 0x3B78CC5160000000 + 1.27065575308067607349e-29, // 0x39F01B8380000000 + 1.22933308981111328932e-36, // 0x387A252040000000 + 2.73370053816464559624e-44, // 0x36E3822280000000 + 2.16741683877804819444e-51, // 0x3569F31D00000000 +}; diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp new file mode 100644 index 00000000000..be1d28d4b83 --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp @@ -0,0 +1,2269 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +int TemplateInterpreter::InterpreterCodeSize = 500 * K; + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + // Rmethod: method + // LVP: pointer to locals + // A3: first stack arg + __ move(A3, SP); + __ addi_d(SP, SP, -18 * wordSize); + __ st_d(RA, SP, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + Rmethod, LVP, A3); + + // V0: result handler + + // Stack layout: + // ... + // 18 stack arg0 <--- old sp + // 17 floatReg arg7 + // ... + // 10 floatReg arg0 + // 9 float/double identifiers + // 8 IntReg arg7 + // ... + // 2 IntReg arg1 + // 1 aligned slot + // SP: 0 return address + + // Do FP first so we can use A3 as temp + __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers + + for (int i= 0; i < Argument::n_float_register_parameters; i++) { + FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); + Label isdouble, done; + + __ andi(AT, A3, 1 << i); + __ bnez(AT, isdouble); + __ fld_s(floatreg, SP, (10 + i) * wordSize); + __ b(done); + __ bind(isdouble); + __ fld_d(floatreg, SP, (10 + i) * wordSize); + __ bind(done); + } + + // A0 is for env. + // If the mothed is not static, A1 will be corrected in generate_native_entry. + for (int i= 1; i < Argument::n_register_parameters; i++) { + Register reg = as_Register(i + A0->encoding()); + __ ld_d(reg, SP, (1 + i) * wordSize); + } + + // A0/V0 contains the result from the call of + // InterpreterRuntime::slow_signature_handler so we don't touch it + // here. It will be loaded with the JNIEnv* later. + __ ld_d(RA, SP, 0); + __ addi_d(SP, SP, 18 * wordSize); + __ jr(RA); + return entry; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // Rsender: senderSP must preserved for slow path + // SP: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ li(AT, SafepointSynchronize::_not_synchronized); + __ li(T8, (long)SafepointSynchronize::address_of_state()); + __ bne(T8, AT, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + const Register crc = A0; // crc + const Register val = A1; // source java byte value + const Register tbl = A2; // scratch + + // Arguments are reversed on java expression stack + __ ld_w(val, SP, 0); // byte value + __ ld_w(crc, SP, wordSize); // Initial CRC + + __ li(tbl, (long)StubRoutines::crc_table_addr()); + + __ nor(crc, crc, R0); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ nor(crc, crc, R0); // ~crc + + // restore caller SP + __ move(SP, Rsender); + __ jr(RA); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // Rsender: senderSP must preserved for slow path + // SP: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ li(AT, SafepointSynchronize::_not_synchronized); + __ li(T8, (long)SafepointSynchronize::address_of_state()); + __ bne(T8, AT, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register tmp = A3; + + const Register off = len; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + __ ld_w(off, SP, wordSize); // int offset + __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf + __ add_d(buf, buf, off); // + offset + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ ld_w(crc, SP, 4 * wordSize); // long crc + } else { + __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ld_w(crc, SP, 3 * wordSize); // long crc + } + + // Can now load 'len' since we're finished with 'off' + __ ld_w(len, SP, 0); // length + + __ kernel_crc32(crc, buf, len, tmp); + + // restore caller SP + __ move(SP, Rsender); + __ jr(RA); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for intrinsic-candidate (non-native) methods: + * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) + * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) + * Unlike CRC32, CRC32C does not have any methods marked as native + * CRC32C also uses an "end" variable instead of the length variable CRC32 uses + */ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + + const Register crc = A0; // initial crc + const Register buf = A1; // source java byte array address + const Register len = A2; // len argument to the kernel + const Register tmp = A3; + + const Register end = len; // index of last element to process + const Register off = crc; // offset + + __ ld_w(end, SP, 0); // int end + __ ld_w(off, SP, wordSize); // int offset + __ sub_w(len, end, off); // calculate length + __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf + __ add_d(buf, buf, off); // + offset + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ ld_w(crc, SP, 4 * wordSize); // int crc + } else { + __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ld_w(crc, SP, 3 * wordSize); // int crc + } + + __ kernel_crc32c(crc, buf, len, tmp); + + // restore caller SP + __ move(SP, Rsender); + __ jr(RA); + + return entry; + } + return NULL; +} + +// +// Various method entries +// + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: + // [ arg ] <-- sp + // [ arg ] + // retaddr in ra + + address entry_point = NULL; + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); + __ fld_d(FA0, SP, 0); + __ fabs_d(F0, FA0); + __ move(SP, Rsender); + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); + __ fld_d(FA0, SP, 0); + __ fsqrt_d(F0, FA0); + __ move(SP, Rsender); + break; + case Interpreter::java_lang_math_sin : + case Interpreter::java_lang_math_cos : + case Interpreter::java_lang_math_tan : + case Interpreter::java_lang_math_log : + case Interpreter::java_lang_math_log10 : + case Interpreter::java_lang_math_exp : + entry_point = __ pc(); + __ fld_d(FA0, SP, 0); + __ move(SP, Rsender); + __ movgr2fr_d(FS0, RA); + __ movgr2fr_d(FS1, SP); + __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); + generate_transcendental_entry(kind, 1); + __ movfr2gr_d(SP, FS1); + __ movfr2gr_d(RA, FS0); + break; + case Interpreter::java_lang_math_pow : + entry_point = __ pc(); + __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize); + __ fld_d(FA1, SP, 0); + __ move(SP, Rsender); + __ movgr2fr_d(FS0, RA); + __ movgr2fr_d(FS1, SP); + __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); + generate_transcendental_entry(kind, 2); + __ movfr2gr_d(SP, FS1); + __ movfr2gr_d(RA, FS0); + break; + case Interpreter::java_lang_math_fmaD : + if (UseFMA) { + entry_point = __ pc(); + __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize); + __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize); + __ fld_d(FA2, SP, 0); + __ fmadd_d(F0, FA0, FA1, FA2); + __ move(SP, Rsender); + } + break; + case Interpreter::java_lang_math_fmaF : + if (UseFMA) { + entry_point = __ pc(); + __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize); + __ fld_s(FA1, SP, Interpreter::stackElementSize); + __ fld_s(FA2, SP, 0); + __ fmadd_s(F0, FA0, FA1, FA2); + __ move(SP, Rsender); + } + break; + default: + ; + } + if (entry_point) { + __ jr(RA); + } + + return entry_point; +} + + // double trigonometrics and transcendentals + // static jdouble dsin(jdouble x); + // static jdouble dcos(jdouble x); + // static jdouble dtan(jdouble x); + // static jdouble dlog(jdouble x); + // static jdouble dlog10(jdouble x); + // static jdouble dexp(jdouble x); + // static jdouble dpow(jdouble x, jdouble y); + +void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { + address fn; + switch (kind) { + case Interpreter::java_lang_math_sin : + if (StubRoutines::dsin() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); + } + break; + case Interpreter::java_lang_math_cos : + if (StubRoutines::dcos() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); + } + break; + case Interpreter::java_lang_math_tan : + if (StubRoutines::dtan() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); + } + break; + case Interpreter::java_lang_math_log : + if (StubRoutines::dlog() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); + } + break; + case Interpreter::java_lang_math_log10 : + if (StubRoutines::dlog10() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); + } + break; + case Interpreter::java_lang_math_exp : + if (StubRoutines::dexp() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); + } + break; + case Interpreter::java_lang_math_pow : + if (StubRoutines::dpow() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); + } + break; + default: + ShouldNotReachHere(); + fn = NULL; // unreachable + } + __ li(T4, fn); + __ jalr(T4); +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender : sender 's sp + address entry_point = __ pc(); + + // abstract method entry + // throw exception + // adjust stack to what a normal return would do + __ empty_expression_stack(); + __ restore_bcp(); + __ restore_locals(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + + +const int method_offset = frame::interpreter_frame_method_offset * wordSize; +const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; +const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp + __ bge(T1, R0, L); // check if frame is complete + __ stop("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // ??? convention: expect array in register A1 + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // setup parameters + __ li(A1, (long)name); + if (pass_oop) { + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); + } else { + __ li(A2, (long)message); + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); + } + // throw exception + __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + + address entry = __ pc(); + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); + // Restore stack bottom in case i2c adjusted stack + __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that sp is now tos until next java call + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + __ restore_bcp(); + __ restore_locals(); + + // mdp: T8 + // ret: FSR + // tmp: T4 + if (state == atos) { + Register mdp = T8; + Register tmp = T4; + __ profile_return_type(mdp, FSR, tmp); + } + + + const Register cache = T4; + const Register index = T3; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); + __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1); + + Register java_thread; +#ifndef OPT_THREAD + java_thread = T4; + __ get_thread(java_thread); +#else + java_thread = TREG; +#endif + + __ check_and_handle_popframe(java_thread); + __ check_and_handle_earlyret(java_thread); + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step, + address continuation) { + address entry = __ pc(); + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); + // NULL last_sp until next java call + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ restore_bcp(); + __ restore_locals(); + +#if INCLUDE_JVMCI + // Check if we need to take lock at entry of synchronized method. This can + // only occur on method entry so emit it only for vtos with step 0. + if (EnableJVMCI && state == vtos && step == 0) { + Label L; + __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); + __ beqz(AT, L); + // Clear flag. + __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset())); + // Take lock. + lock_method(); + __ bind(L); + } else { +#ifdef ASSERT + if (EnableJVMCI) { + Label L; + __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); + __ beqz(AT, L); + __ stop("unexpected pending monitor in deopt entry"); + __ bind(L); + } +#endif + } +#endif + + // handle exceptions + { + Label L; + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + } + return entry; +} + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : i = 4; break; + case T_FLOAT : i = 5; break; + case T_DOUBLE : i = 6; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 7; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + { + __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ verify_oop(V0); // and verify it + } + break; + default : ShouldNotReachHere(); + } + __ jr(RA); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + + + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// prerequisites : method in T0, invocation counter in T3 +void TemplateInterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ld_d(FSR, Address(Rmethod, Method::method_data_offset())); + __ beqz(FSR, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ b(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(FSR, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(Rmethod, FSR, done); + __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ bind(done); + } else { // not TieredCompilation + const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + + __ get_method_counters(Rmethod, FSR, done); + + if (ProfileInterpreter) { // %%% Merge this into methodDataOop + __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + __ addi_d(T4, T4, 1); + __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ ld_w(T3, invocation_counter); + __ increment(T3, InvocationCounter::count_increment); + __ st_w(T3, invocation_counter); // save invocation count + + __ ld_w(FSR, backedge_counter); // load backedge counter + __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits + __ andr(FSR, FSR, AT); + + __ add_d(T3, T3, FSR); // add both counters + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { + __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); + __ bne_far(AT, R0, *profile_method_continue); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ ld_w(AT, AT, 0); + __ blt_far(T3, AT, *profile_method_continue, true /* signed */); + } + + // if no method data exists, go to profile_method + __ test_method_data_pointer(FSR, *profile_method); + } + + if (Assembler::is_simm(CompileThreshold, 12)) { + __ srli_w(AT, T3, InvocationCounter::count_shift); + __ slti(AT, AT, CompileThreshold); + __ beq_far(AT, R0, *overflow); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); + __ ld_w(AT, AT, 0); + __ bge_far(T3, AT, *overflow, true /* signed */); + } + + __ bind(done); + } +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { + + // Asm interpreter on entry + // S7 - locals + // S0 - bcp + // Rmethod - method + // FP - interpreter frame + + // On return (i.e. jump to entry_point) + // Rmethod - method + // RA - return address of interpreter caller + // tos - the last parameter to Java method + // SP - sender_sp + + // the bcp is valid if and only if it's not null + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), R0); + __ ld_d(Rmethod, FP, method_offset); + // Preserve invariant that S0/S7 contain bcp/locals of sender frame + __ b_far(do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// T2: number of additional locals this frame needs (what we must check) +// T0: Method* +// +void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { + // see if we've got enough room on the stack for locals plus overhead. + // the expression stack grows down incrementally, so the normal guard + // page mechanism will work for that. + // + // Registers live on entry: + // + // T0: Method* + // T2: number of additional locals this frame needs (what we must check) + + // NOTE: since the additional locals are also always pushed (wasn't obvious in + // generate_method_entry) so the guard should work for them too. + // + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) + + entry_size; + + const int page_size = os::vm_page_size(); + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); + __ bge(AT, T2, after_frame_check); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone +#ifndef OPT_THREAD + Register thread = T1; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + // locals + overhead, in bytes + __ slli_d(T3, T2, Interpreter::logStackElementSize); + __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); + __ bne(AT, R0, stack_base_okay); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); + __ bne(AT, R0, stack_size_okay); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT + __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 + __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT + __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 + + // Use the bigger size for banging. + const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); + + // add in the redzone and yellow size + __ li(AT, max_bang_size); + __ add_d(T3, T3, AT); + + // check against the current stack bottom + __ blt(T3, SP, after_frame_check); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + __ move(SP, Rsender); + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// Rmethod - Method* +void TemplateInterpreterGenerator::lock_method(void) { + // synchronize method + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { Label L; + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); + __ bne(T0, R0, L); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + // get synchronization object + { + Label done; + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, T0, JVM_ACC_STATIC); + __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); + __ beq(T2, R0, done); + __ load_mirror(T0, Rmethod, T4); + __ bind(done); + } + // add space for monitor & lock + __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry + __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // set new monitor block top + __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object + // FIXME: I do not know what lock_object will do and what it will need + __ move(c_rarg0, SP); // object address + __ lock_object(c_rarg0); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- T0(sender's sp) + // ... + // [ argument word 0 ] <--- S7 + + // initialize fixed part of activation frame + // sender's sp in Rsender + int i = 0; + int frame_size = 10; +#ifndef CORE + ++frame_size; +#endif + __ addi_d(SP, SP, (-frame_size) * wordSize); + __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address + __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp + __ addi_d(FP, SP, (frame_size - 2) * wordSize); + __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp + __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null + __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset + __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase + __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* + // Get mirror and store it in the frame as GC root for this Method* + __ load_mirror(T2, Rmethod, T4); + __ st_d(T2, FP, (-++i) * wordSize); // Mirror +#ifndef CORE + if (ProfileInterpreter) { + Label method_data_continue; + __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); + __ beq(AT, R0, method_data_continue); + __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); + __ bind(method_data_continue); + __ st_d(AT, FP, (-++i) * wordSize); + } else { + __ st_d(R0, FP, (-++i) * wordSize); + } +#endif // !CORE + + __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); + __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache + if (native_call) { + __ st_d(R0, FP, (-++i) * wordSize); // no bcp + } else { + __ st_d(BCP, FP, (-++i) * wordSize); // set bcp + } + __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom + assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_entry. + // + // Rmethod: Method* + // Rsender: senderSP must preserve for slow path, set SP to it on fast path + // RA is live. It must be saved around calls. + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + + Label slow_path; + const Register local_0 = A0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld_d(local_0, Address(SP, 0)); + __ beqz(local_0, slow_path); + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg); + + // areturn + __ move(SP, Rsender); + __ jr(RA); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + // Rsender: sender's sp + // Rmethod: Method* + address entry_point = __ pc(); + +#ifndef CORE + const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset())); +#endif + // get parameter size (always needed) + // the size in the java stack + __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); + __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); + + // native calls don't need the stack size check since they have no expression stack + // and the arguments are already on the stack and we only add a handful of words + // to the stack + + // Rmethod: Method* + // V0: size of parameters + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + + // for natives the size of locals is zero + + // compute beginning of parameters (S7) + __ slli_d(LVP, V0, Address::times_8); + __ addi_d(LVP, LVP, (-1) * wordSize); + __ add_d(LVP, LVP, SP); + + + // add 2 zero-initialized slots for native calls + // 1 slot for native oop temp offset (setup via runtime) + // 1 slot for static native result handler3 (setup via runtime) + __ push2(R0, R0); + + // Layout of frame at this point + // [ method holder mirror ] <--- sp + // [ result type info ] + // [ argument word n-1 ] <--- T0 + // ... + // [ argument word 0 ] <--- LVP + + +#ifndef CORE + if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count +#endif + + // initialize fixed part of activation frame + generate_fixed_frame(true); + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- sender's sp + // ... + // [ argument word 0 ] <--- S7 + + + // make sure method is native & not abstract +#ifdef ASSERT + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(AT, T0, JVM_ACC_NATIVE); + __ bne(AT, R0, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(AT, T0, JVM_ACC_ABSTRACT); + __ beq(AT, R0, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, (int)true); + __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); +#endif // CORE + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); + __ beq(AT, R0, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // after method_lock, the layout of frame is as following + // + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // start execution +#ifdef ASSERT + { + Label L; + __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ stop("broken stack frame setup in interpreter in asm"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + // work registers + const Register method = Rmethod; + const Register t = T8; + + __ get_method(method); + { + Label L, Lstatic; + __ ld_d(t,method,in_bytes(Method::const_offset())); + __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); + // LoongArch ABI: caller does not reserve space for the register auguments. + // A0 and A1(if needed) + __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, AT, JVM_ACC_STATIC); + __ beq(AT, R0, Lstatic); + __ addi_d(t, t, 1); + __ bind(Lstatic); + __ addi_d(t, t, -7); + __ bge(R0, t, L); + __ slli_d(t, t, Address::times_8); + __ sub_d(SP, SP, t); + __ bind(L); + } + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ move(AT, SP); + // [ ] <--- sp + // ... (size of parameters - 8 ) + // [ monitor entry ] + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + // get signature handler + { + Label L; + __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); + __ bne(T4, R0, L); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + // FIXME: when change codes in InterpreterRuntime, note this point + // from: begin of parameters + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); + // to: current sp + assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); + // temp: T3 + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); + + __ jalr(T4); + __ get_method(method); + + // + // if native function is static, and its second parameter has type length of double word, + // and first parameter has type length of word, we have to reserve one word + // for the first parameter, according to LoongArch abi. + // if native function is not static, and its third parameter has type length of double word, + // and second parameter has type length of word, we have to reserve one word for the second + // parameter. + // + + + // result handler is in V0 + // set result handler + __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); + +#define FIRSTPARA_SHIFT_COUNT 5 +#define SECONDPARA_SHIFT_COUNT 9 +#define THIRDPARA_SHIFT_COUNT 13 +#define PARA_MASK 0xf + + // pass mirror handle if static call + { + Label L; + __ ld_w(t, method, in_bytes(Method::access_flags_offset())); + __ andi(AT, t, JVM_ACC_STATIC); + __ beq(AT, R0, L); + + // get mirror + __ load_mirror(t, method, T4); + // copy mirror into activation frame + __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + // pass handle to mirror + __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ move(A1, t); + __ bind(L); + } + + // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) + // [ ] | + // ... size of parameters(or +1) | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Mirror ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // get native function entry point + { Label L; + __ ld_d(T4, method, in_bytes(Method::native_function_offset())); + __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ bne(T6, T4, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld_d(T4, method, in_bytes(Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + // native function in T4 +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); + __ move(A0, t); + // [ jni environment ] <--- sp + // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) + // [ ] | + // ... size of parameters | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Mirror ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // Set the last Java PC in the frame anchor to be the return address from + // the call to the native method: this will allow the debugger to + // generate an accurate stack trace. + Label native_return; + __ set_last_Java_frame(thread, SP, FP, native_return); + + // change thread state +#ifdef ASSERT + { + Label L; + __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ addi_d(t, t, (-1) * _thread_in_Java); + __ beq(t, R0, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + __ li(t, _thread_in_native); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + + // call native method + __ jalr(T4); + __ bind(native_return); + // result potentially in V0 or F0 + + + // via _last_native_pc and not via _last_jave_sp + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + //FIXME, should modify here + // save return value to keep the value from being destroyed by other calls + __ push(dtos); + __ push(ltos); + + // change thread state +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(t, _thread_in_native_trans); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(__ AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { Label Continue; + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + Label slow_path; + + __ safepoint_poll_acquire(slow_path, thread); + __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ bind(slow_path); + __ move(A0, thread); + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + relocInfo::runtime_call_type); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + //add for compressedoops + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ li(t, _thread_in_Java); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ reset_last_Java_frame(thread, true); + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); + __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); + + // If result was an oop then unbox and save it in the frame + { + Label no_oop; + __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); + __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); + __ bne(AT, T0, no_oop); + __ pop(ltos); + // Unbox oop result, e.g. JNIHandles::resolve value. + __ resolve_jobject(V0, thread, T4); + __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + { + Label no_reguard; + __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ li(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t, AT, no_reguard); + __ pushad(); + __ move(S5_heapbase, SP); + __ li(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); + __ move(SP, S5_heapbase); + __ popad(); + //add for compressedoops + __ reinit_heapbase(); + __ bind(no_reguard); + } + // restore BCP to have legal interpreter frame, + // i.e., bci == 0 <=> BCP == code_base() + // Can't call_VM until bcp is within reasonable. + __ get_method(method); // method is junk from thread_in_native to now. + __ ld_d(BCP, method, in_bytes(Method::const_offset())); + __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(t, R0, L); + // Note: At some point we may want to unify this with the code used in + // call_VM_base(); + // i.e., we should use the StubRoutines::forward_exception code. For now this + // doesn't work here because the sp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ ld_w(t, method, in_bytes(Method::access_flags_offset())); + __ andi(t, t, JVM_ACC_SYNCHRONIZED); + __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); + __ beq(t, R0, L); + // the code below should be shared with interpreter macro assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, + // since this is a synchronized method. However, need + // to check that the object has not been unlocked by + // an explicit monitorexit bytecode. + // address of first monitor + + __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ bne(t, R0, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg0); + } + __ bind(L); + } + + // jvmti/jvmpi support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in V0, + // call result handler to restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); + __ jalr(t); + + + // remove activation + __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp + __ ld_d(RA, FP, frame::java_frame_return_addr_offset * wordSize); // get return address + __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp + __ jr(RA); + +#ifndef CORE + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + // entry_point is the beginning of this + // function and checks again for compiled code + } +#endif + return entry_point; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Quick & dirty stack overflow checking: bang the stack & handle trap. + // Note that we do the banging after the frame is setup, since the exception + // handling code expects to find a valid interpreter frame on the stack. + // Doing the banging earlier fails if the caller frame is not an interpreter + // frame. + // (Also, the exception throwing code expects to unlock any synchronized + // method receiever, so do the banging after locking the receiver.) + + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; + const int start_page = native_call ? n_shadow_pages : 1; + BLOCK_COMMENT("bang_stack_shadow_pages:"); + for (int pages = start_page; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages*page_size); + } + } +} + +// +// Generic interpreted method entry to (asm) interpreter +// +// Layout of frame just at the entry +// +// [ argument word n-1 ] <--- sp +// ... +// [ argument word 0 ] +// assume Method* in Rmethod before call this method. +// prerequisites to the generated stub : the callee Method* in Rmethod +// note you must save the caller bcp before call the generated stub +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // Rmethod: Method* + // Rsender: sender 's sp + address entry_point = __ pc(); + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); + const Address invocation_counter(Rmethod, + in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); + + // get parameter size (always needed) + __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod + __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); + + // Rmethod: Method* + // V0: size of parameters + // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i + // get size of locals in words to T2 + __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); + // T2 = no. of additional locals, locals include parameters + __ sub_d(T2, T2, V0); + + // see if we've got enough room on the stack for locals plus overhead. + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + generate_stack_overflow_check(); + // after this function, the layout of frame does not change + + // compute beginning of parameters (LVP) + __ slli_d(LVP, V0, LogBytesPerWord); + __ addi_d(LVP, LVP, (-1) * wordSize); + __ add_d(LVP, LVP, SP); + + // T2 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ beq(T2, R0, exit); + + __ bind(loop); + __ addi_d(SP, SP, (-1) * wordSize); + __ addi_d(T2, T2, -1); // until everything initialized + __ st_d(R0, SP, 0); // initialize local variables + __ bne(T2, R0, loop); + + __ bind(exit); + } + + // + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argument word n-1 ] <--- T0? + // ... + // [ argument word 0 ] <--- LVP + + // initialize fixed part of activation frame + + generate_fixed_frame(false); + + + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] <--- fp + // [ return address ] + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(T2, AT, JVM_ACC_NATIVE); + __ beq(T2, R0, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(T2, AT, JVM_ACC_ABSTRACT); + __ beq(T2, R0, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ li(AT, (int)true); + __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + + // mdp : T8 + // tmp1: T4 + // tmp2: T2 + __ profile_parameters_type(T8, T4, T2); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + +#endif // CORE + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + // + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { Label L; + __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); + __ beq(T2, R0, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // layout of frame after lock_method + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // start execution +#ifdef ASSERT + { + Label L; + __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ stop("broken stack frame setup in interpreter in native"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ get_method(Rmethod); + __ b(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); + + // V0: exception + // V1: return address/pc that threw exception + __ restore_bcp(); // BCP points to call/send + __ restore_locals(); + + //add for compressedoops + __ reinit_heapbase(); + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // expression stack is undefined here + // V0: exception + // BCP: exception bcp + __ verify_oop(V0); + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ move(A1, V0); + __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); + // V0: exception handler entry point + // V1: preserved exception oop + // S0: bcp for exception handler + __ push(V1); // push exception which is now the only value on the stack + __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is removed and + // the exception is rethrown (i.e. exception continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // In current activation + // V0: exception + // BCP: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. +#ifndef OPT_THREAD + Register thread = T2; + __ get_thread(T2); +#else + Register thread = TREG; +#endif + __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + __ ori(T3, T3, JavaThread::popframe_processing_bit); + __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#ifndef CORE + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ld_d(A0, FP, frame::java_frame_return_addr_offset * wordSize); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); + __ bne(V0, R0, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to deoptimized caller + __ get_method(A1); + __ verify_oop(A1); + __ ld_d(A1, A1, in_bytes(Method::const_offset())); + __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); + __ shl(A1, Interpreter::logStackElementSize); + __ restore_locals(); + __ sub_d(A2, LVP, A1); + __ addi_d(A2, A2, wordSize); + // Save these arguments +#ifndef OPT_THREAD + __ get_thread(A0); +#else + __ move(A0, TREG); +#endif + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); + + __ remove_activation(vtos, T4, false, false, false); + + // Inform deoptimization that it is responsible for restoring these arguments +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + // Continue in deoptimization handler + __ jr(T4); + + __ bind(caller_not_deoptimized); + } +#endif /* !CORE */ + + __ remove_activation(vtos, T3, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Clear the popframe condition flag + // Finish with popframe handling + // A previous I2C followed by a deoptimization might have moved the + // outgoing arguments further up the stack. PopFrame expects the + // mutations to those outgoing arguments to be preserved and other + // constraints basically require this frame to look exactly as + // though it had previously invoked an interpreted activation with + // no space between the top of the expression stack (current + // last_sp) and the top of stack. Rather than force deopt to + // maintain this kind of invariant all the time we call a small + // fixup routine to move the mutated arguments onto the top of our + // expression stack if necessary. + __ move(T8, SP); + __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // PC must point into interpreter here + Label L; + __ bind(L); + __ set_last_Java_frame(thread, noreg, FP, L); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); + __ reset_last_Java_frame(thread, true); + // Restore the last_sp and null it out + __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + + + __ li(AT, JavaThread::popframe_inactive); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + + // Finish with popframe handling + __ restore_bcp(); + __ restore_locals(); + // S8 be used in C2 + __ li(S8, (long)Interpreter::dispatch_table(itos)); +#ifndef CORE + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } +#endif // !CORE + // Clear the popframe condition flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, JavaThread::popframe_inactive); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ ld_bu(AT, BCP, 0); + __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); + __ bne(AT, R0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(T4); + __ ld_d(T8, LVP, 0); + __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); + + __ beq(T8, R0, L_done); + + __ st_d(T8, SP, 0); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop(T0); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, T3, false, true, false); + // restore exception + __ get_vm_result(T0, thread); + __ verify_oop(T0); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects + // the following registers set up: + // + // T0: exception + // T1: return address/pc that threw exception + // SP: expression stack of caller + // FP: fp of caller + __ push2(T0, T3); // save exception and return address + __ move(A1, T3); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T4, V0); // save exception handler + __ pop2(V0, V1); // restore return address and exception + + // Note that an "issuing PC" is actually the next PC after the call + __ jr(T4); // jump to exception handler of caller +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ load_earlyret_value(state); + +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); + // Clear the earlyret state + __ li(AT, JvmtiThreadState::earlyret_inactive); + __ st_w(AT, cond_addr); + __ membar(__ AnyAny);//no membar here for aarch64 + + + __ remove_activation(state, T0, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ membar(__ AnyAny); + __ jr(T0); + + return entry; +} // end of ForceEarlyReturn support + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + fep = __ pc(); __ push(ftos); __ b(L); + dep = __ pc(); __ push(dtos); __ b(L); + lep = __ pc(); __ push(ltos); __ b(L); + aep =__ pc(); __ push(atos); __ b(L); + bep = cep = sep = + iep = __ pc(); __ push(itos); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + // prepare expression stack + __ push(state); // save tosca + + // tos & tos2 + // trace_bytecode need actually 4 args, the last two is tos&tos2 + // this work fine for x86. but LA ABI calling convention will store A2-A3 + // to the stack position it think is the tos&tos2 + // when the expression stack have no more than 2 data, error occur. + __ ld_d(A2, SP, 0); + __ ld_d(A3, SP, 1 * wordSize); + + // pass arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); + __ move(RA, V0); // make sure return address is not destroyed by pop(state) + + // restore expression stack + __ pop(state); // restore tosca + + // return + __ jr(RA); + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ li(T8, (long)&BytecodeCounter::_counter_value); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + __ li(T8, (long)&BytecodePairHistogram::_index); + __ ld_w(T4, T8, 0); + __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); + __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); + __ orr(T4, T4, T8); + __ li(T8, (long)&BytecodePairHistogram::_index); + __ st_w(T4, T8, 0); + __ slli_d(T4, T4, 2); + __ li(T8, (long)BytecodePairHistogram::_counters); + __ add_d(T8, T8, T4); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + address entry = Interpreter::trace_code(t->tos_in()); + assert(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + //add for compressedoops + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ li(T8, long(&BytecodeCounter::_counter_value)); + __ ld_w(T8, T8, 0); + __ li(AT, StopInterpreterAt); + __ bne(T8, AT, L); + __ brk(5); + __ bind(L); +} +#endif // !PRODUCT diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp new file mode 100644 index 00000000000..ddb38faf446 --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP +#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp new file mode 100644 index 00000000000..4f1d226a1a6 --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp @@ -0,0 +1,4115 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + + +#ifndef CC_INTERP + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T6 RT6 +#define T8 RT8 + +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { + // No LoongArch specific initialization +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(LVP, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} +static inline Address haddress(int n) { return iaddress(n + 0); } + + +static inline Address at_sp() { return Address(SP, 0); } +static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } +static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } + +// At top of Java expression stack which may be different than sp(). +// It isn't for category 1 objects. +static inline Address at_tos () { + Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); + return tos; +} + +static inline Address at_tos_p1() { + return Address(SP, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(SP, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(SP, Interpreter::expr_offset_in_bytes(3)); +} + +// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(BCP, offset); +} + +// Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address dst, + Register val, + DecoratorSet decorators = 0) { + assert(val == noreg || val == V0, "parameter is just for looks"); + __ store_heap_oop(dst, val, T4, T1, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators = 0) { + __ load_heap_oop(dst, src, T4, T1, decorators); +} + +// bytecode folding +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) { + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); + __ addi_d(bc_reg, R0, bc); + __ beq(tmp_reg, R0, L_patch_done); + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ li(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ ld_bu(tmp_reg, at_bcp(0)); + __ li(AT, Bytecodes::_breakpoint); + __ bne(tmp_reg, AT, L_fast_patch); + + __ get_method(tmp_reg); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); + + __ b(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ ld_bu(tmp_reg, at_bcp(0)); + __ li(AT, (int)Bytecodes::java_code(bc)); + __ beq(tmp_reg, AT, L_okay); + __ beq(tmp_reg, bc_reg, L_patch_done); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ st_b(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ move(FSR, R0); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ li(FSR, value); + } +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ li(FSR, value); + } +} + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + switch( value ) { + case 0: __ movgr2fr_w(FSF, R0); return; + case 1: __ addi_d(AT, R0, 1); break; + case 2: __ addi_d(AT, R0, 2); break; + default: ShouldNotReachHere(); + } + __ movgr2fr_w(FSF, AT); + __ ffint_s_w(FSF, FSF); +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + switch( value ) { + case 0: __ movgr2fr_d(FSF, R0); + return; + case 1: __ addi_d(AT, R0, 1); + __ movgr2fr_d(FSF, AT); + __ ffint_d_w(FSF, FSF); + break; + default: ShouldNotReachHere(); + } +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ ld_b(FSR, at_bcp(1)); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ ld_b(FSR, BCP, 1); + __ ld_bu(AT, BCP, 2); + __ slli_d(FSR, FSR, 8); + __ orr(FSR, FSR, AT); +} + +// T1 : tags +// T2 : index +// T3 : cpool +// T8 : tag +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, notInt, Done; + // get index in cpool + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + } else { + __ ld_bu(T2, at_bcp(1)); + } + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ add_d(AT, T1, T2); + __ ld_b(T1, AT, tags_offset); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + } + //now T1 is the tag + + // unresolved class - get the resolved class + __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); + __ beq(AT, R0, call_ldc); + + // unresolved class in error (resolution failed) - call into runtime + // so that the same error from first resolution attempt is thrown. + __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); + __ beq(AT, R0, call_ldc); + + // resolved class - need to call vm to get java mirror of the class + __ addi_d(AT, T1, - JVM_CONSTANT_Class); + __ slli_d(T2, T2, Address::times_8); + __ bne(AT, R0, notClass); + + __ bind(call_ldc); + __ li(A1, wide); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); + //__ push(atos); + __ addi_d(SP, SP, - Interpreter::stackElementSize); + __ st_d(FSR, SP, 0); + __ b(Done); + + __ bind(notClass); + __ addi_d(AT, T1, -JVM_CONSTANT_Float); + __ bne(AT, R0, notFloat); + // ftos + __ add_d(AT, T3, T2); + __ fld_s(FSF, AT, base_offset); + //__ push_f(); + __ addi_d(SP, SP, - Interpreter::stackElementSize); + __ fst_s(FSF, SP, 0); + __ b(Done); + + __ bind(notFloat); + __ addi_d(AT, T1, -JVM_CONSTANT_Integer); + __ bne(AT, R0, notInt); + // itos + __ add_d(T0, T3, T2); + __ ld_w(FSR, T0, base_offset); + __ push(itos); + __ b(Done); + + // assume the tag is for condy; if not, the VM runtime will tell us + __ bind(notInt); + condy_helper(Done); + + __ bind(Done); +} + +void TemplateTable::condy_helper(Label& Done) { + const Register obj = FSR; + const Register off = SSR; + const Register flags = T3; + const Register rarg = A1; + __ li(rarg, (int)bytecode()); + __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); + __ get_vm_result_2(flags, TREG); + // VMr = obj = base address to find primitive value to push + // VMr2 = flags = (tos, off) using format of CPCE::_flags + __ li(AT, ConstantPoolCacheEntry::field_index_mask); + __ andr(off, flags, AT); + __ add_d(obj, off, obj); + const Address field(obj, 0 * wordSize); + + // What sort of thing are we loading? + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + ConstantPoolCacheEntry::verify_tos_state_shift(); + + switch (bytecode()) { + case Bytecodes::_ldc: + case Bytecodes::_ldc_w: + { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ addi_d(AT, flags, -itos); + __ bne(AT, R0, notInt); + // itos + __ ld_d(obj, field); + __ push(itos); + __ b(Done); + + __ bind(notInt); + __ addi_d(AT, flags, -ftos); + __ bne(AT, R0, notFloat); + // ftos + __ fld_s(FSF, field); + __ push(ftos); + __ b(Done); + + __ bind(notFloat); + __ addi_d(AT, flags, -stos); + __ bne(AT, R0, notShort); + // stos + __ ld_h(obj, field); + __ push(stos); + __ b(Done); + + __ bind(notShort); + __ addi_d(AT, flags, -btos); + __ bne(AT, R0, notByte); + // btos + __ ld_b(obj, field); + __ push(btos); + __ b(Done); + + __ bind(notByte); + __ addi_d(AT, flags, -ctos); + __ bne(AT, R0, notChar); + // ctos + __ ld_hu(obj, field); + __ push(ctos); + __ b(Done); + + __ bind(notChar); + __ addi_d(AT, flags, -ztos); + __ bne(AT, R0, notBool); + // ztos + __ ld_bu(obj, field); + __ push(ztos); + __ b(Done); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: + { + Label notLong, notDouble; + __ addi_d(AT, flags, -ltos); + __ bne(AT, R0, notLong); + // ltos + __ ld_d(obj, field); + __ push(ltos); + __ b(Done); + + __ bind(notLong); + __ addi_d(AT, flags, -dtos); + __ bne(AT, R0, notDouble); + // dtos + __ fld_d(FSF, field); + __ push(dtos); + __ b(Done); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + Register result = FSR; + Register tmp = SSR; + Register rarg = A1; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp, T4); + __ bne(result, R0, resolved); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + // first time invocation - must resolve first + int i = (int)bytecode(); + __ li(rarg, i); + __ call_VM(result, entry, rarg); + + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + Label notNull; + __ li(rarg, (long)Universe::the_null_sentinel_addr()); + __ ld_ptr(tmp, Address(rarg)); + __ bne(tmp, result, notNull); + __ xorr(result, result, result); // NULL object reference + __ bind(notNull); + } + + if (VerifyOops) { + __ verify_oop(result); + } +} + +// used register: T2, T3, T1 +// T2 : index +// T3 : cpool +// T1 : tag +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label notDouble, notLong, Done; + + // get index in cpool + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type in T1 + __ add_d(AT, T1, T2); + __ ld_b(T1, AT, tags_offset); + + __ addi_d(AT, T1, -JVM_CONSTANT_Double); + __ bne(AT, R0, notDouble); + + // dtos + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ fld_d(FSF, AT, base_offset); + __ push(dtos); + __ b(Done); + + __ bind(notDouble); + __ addi_d(AT, T1, -JVM_CONSTANT_Long); + __ bne(AT, R0, notLong); + + // ltos + __ slli_d(T2, T2, Address::times_8); + __ add_d(AT, T3, T2); + __ ld_d(FSR, AT, base_offset); + __ push(ltos); + __ b(Done); + + __ bind(notLong); + condy_helper(Done); + + __ bind(Done); +} + +// we compute the actual local variable address here +void TemplateTable::locals_index(Register reg, int offset) { + __ ld_bu(reg, at_bcp(offset)); + __ slli_d(reg, reg, Address::times_8); + __ sub_d(reg, LVP, reg); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +// this method will do bytecode folding of the two form: +// iload iload iload caload +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + // get the next bytecode in T2 + __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ li(AT, Bytecodes::_iload); + __ beq(AT, T2, done); + + __ li(T3, Bytecodes::_fast_iload2); + __ li(AT, Bytecodes::_fast_iload); + __ beq(AT, T2, rewrite); + + // if _caload, rewrite to fast_icaload + __ li(T3, Bytecodes::_fast_icaload); + __ li(AT, Bytecodes::_caload); + __ beq(AT, T2, rewrite); + + // rewrite so iload doesn't check again. + __ li(T3, Bytecodes::_fast_iload); + + // rewrite + // T3 : fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, T3, T2, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(T2); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload2() { + transition(vtos, itos); + locals_index(T2); + __ ld_w(FSR, T2, 0); + __ push(itos); + locals_index(T2, 3); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload() { + transition(vtos, itos); + locals_index(T2); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::lload() { + transition(vtos, ltos); + locals_index(T2); + __ ld_d(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::fload() { + transition(vtos, ftos); + locals_index(T2); + __ fld_s(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::dload() { + transition(vtos, dtos); + locals_index(T2); + __ fld_d(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::aload() { + transition(vtos, atos); + locals_index(T2); + __ ld_d(FSR, T2, 0); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ get_unsigned_2_byte_index_at_bcp(reg, 2); + __ slli_d(reg, reg, Address::times_8); + __ sub_d(reg, LVP, reg); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(T2); + __ ld_d(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(T2); + __ ld_d(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(T2); + __ fld_s(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(T2); + __ fld_d(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(T2); + __ ld_d(FSR, T2, 0); +} + +// we use A2 as the regiser for index, BE CAREFUL! +// we dont use our tge 29 now, for later optimization +void TemplateTable::index_check(Register array, Register index) { + // Pop ptr into array + __ pop_ptr(array); + index_check_without_pop(array, index); +} + +void TemplateTable::index_check_without_pop(Register array, Register index) { + // destroys A2 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + + // sign extend since tos (index) might contain garbage in upper bits + __ slli_w(index, index, 0); + + // check index + Label ok; + __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); + __ bltu(index, AT, ok); + + //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 + if (A1 != array) __ move(A1, array); + if (A2 != index) __ move(A2, index); + __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ bind(ok); +} + +void TemplateTable::iaload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, 1); + __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); +} + +void TemplateTable::laload() { + transition(itos, ltos); + index_check(SSR, FSR); + __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); + __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); +} + +void TemplateTable::faload() { + transition(itos, ftos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1); + __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); +} + +void TemplateTable::daload() { + transition(itos, dtos); + index_check(SSR, FSR); + __ alsl_d(T4, FSR, SSR, 2); + __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); +} + +void TemplateTable::aaload() { + transition(itos, atos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); + //add for compressedoops + do_oop_load(_masm, + Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + FSR, + IS_ARRAY); +} + +void TemplateTable::baload() { + transition(itos, itos); + index_check(SSR, FSR); + __ add_d(FSR, SSR, FSR); + __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); +} + +void TemplateTable::caload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +// iload followed by caload frequent pair +// used register : T2 +// T2 : index +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // load index out of locals + locals_index(T2); + __ ld_w(FSR, T2, 0); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, 0); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +void TemplateTable::saload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); + __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ ld_w(FSR, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ ld_d(FSR, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ fld_s(FSF, faddress(n)); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ fld_d(FSF, laddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ld_d(FSR, aaddress(n)); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::aload_0_internal(RewriteControl rc) { + transition(vtos, atos); + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + // get the next bytecode in T2 + __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ li(AT, Bytecodes::_getfield); + __ beq(AT, T2, done); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_iaccess_0); + __ li(AT, Bytecodes::_fast_igetfield); + __ beq(AT, T2, rewrite); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_aaccess_0); + __ li(AT, Bytecodes::_fast_agetfield); + __ beq(AT, T2, rewrite); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_faccess_0); + __ li(AT, Bytecodes::_fast_fgetfield); + __ beq(AT, T2, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_aload_0); + + // rewrite + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, T3, T2, false); + + __ bind(done); + } else { + aload(0); + } +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(T2); + __ st_w(FSR, T2, 0); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(T2); + __ st_d(FSR, T2, -wordSize); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(T2); + __ fst_s(FSF, T2, 0); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(T2); + __ fst_d(FSF, T2, -wordSize); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index(T2); + __ st_d(FSR, T2, 0); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, 0); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, -wordSize); +} + +void TemplateTable::wide_fstore() { + wide_istore(); +} + +void TemplateTable::wide_dstore() { + wide_lstore(); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); // T2: array SSR: index + index_check(T2, SSR); // prefer index in SSR + __ alsl_d(T2, SSR, T2, Address::times_4 - 1); + __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); +} + +// used register T2, T3 +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i (T2); + index_check(T3, T2); + __ alsl_d(T3, T2, T3, Address::times_8 - 1); + __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); +} + +// used register T2 +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + __ alsl_d(T2, SSR, T2, Address::times_4 - 1); + __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); +} + +// used register T2, T3 +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i (T2); + index_check(T3, T2); + __ alsl_d(T3, T2, T3, Address::times_8 - 1); + __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); +} + +// used register : T2, T3, T8 +// T2 : array +// T3 : subklass +// T8 : supklass +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ld_d(FSR, at_tos()); // Value + __ ld_w(SSR, at_tos_p1()); // Index + __ ld_d(T2, at_tos_p2()); // Array + + // index_check(T2, SSR); + index_check_without_pop(T2, SSR); + // do array store check - check for NULL value first + __ beq(FSR, R0, is_null); + + // Move subklass into T3 + //add for compressedoops + __ load_klass(T3, FSR); + // Move superklass into T8 + //add for compressedoops + __ load_klass(T8, T2); + __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); + // Compress array+index*4+12 into a single register. T2 + __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); + __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Generate subtype check. + // Superklass in T8. Subklass in T3. + __ gen_subtype_check(T8, T3, ok_is_subtype); + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ArrayStoreException_entry); + // Come here on success + __ bind(ok_is_subtype); + do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); + __ b(done); + + // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(T4); + __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); + do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); + + __ bind(done); + __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(T4, T2); + __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); + + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ li(AT, diffbit); + + Label L_skip; + __ andr(AT, T4, AT); + __ beq(AT, R0, L_skip); + __ andi(FSR, FSR, 0x1); + __ bind(L_skip); + + __ add_d(SSR, T2, SSR); + __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); + __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); +} + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ st_w(FSR, iaddress(n)); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ st_d(FSR, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ fst_s(FSF, faddress(n)); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ fst_d(FSF, laddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(FSR); + __ st_d(FSR, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ addi_d(SP, SP, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + // stack: ..., a + __ load_ptr(0, FSR); + __ push_ptr(FSR); + // stack: ..., a, a +} + +// blows FSR +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(0, FSR); // load b + __ load_ptr(1, A5); // load a + __ store_ptr(1, FSR); // store b + __ store_ptr(0, A5); // store a + __ push_ptr(FSR); // push b + // stack: ..., b, a, b +} + +// blows FSR +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, FSR); // load c + __ load_ptr(2, A5); // load a + __ store_ptr(2, FSR); // store c in a + __ push_ptr(FSR); // push c + // stack: ..., c, b, c, c + __ load_ptr(2, FSR); // load b + __ store_ptr(2, A5); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, FSR); // store b in c + // stack: ..., c, a, b, c +} + +// blows FSR +void TemplateTable::dup2() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, FSR); // load a + __ push_ptr(FSR); // push a + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + // stack: ..., a, b, a, b +} + +// blows FSR +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, T2); // load c + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + __ push_ptr(T2); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, T2); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr(4, T2); // load a + __ store_ptr(2, T2); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, FSR); // store b in a + // stack: ..., b, c, a, b, c + + // stack: ..., b, c, a, b, c +} + +// blows FSR, SSR +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + // stack: ..., a, b, c, d + __ load_ptr(0, T2); // load d + __ load_ptr(1, FSR); // load c + __ push_ptr(FSR); // push c + __ push_ptr(T2); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, FSR); // load b + __ store_ptr(2, FSR); // store b in d + __ store_ptr(4, T2); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, T2); // load a + __ load_ptr(3, FSR); // load c + __ store_ptr(3, T2); // store a in c + __ store_ptr(5, FSR); // store c in a + // stack: ..., c, d, a, b, c, d + + // stack: ..., c, d, a, b, c, d +} + +// blows FSR +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + + __ load_ptr(1, A5); // load a + __ load_ptr(0, FSR); // load b + __ store_ptr(0, A5); // store a in b + __ store_ptr(1, FSR); // store b in a + + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + + __ pop_i(SSR); + switch (op) { + case add : __ add_w(FSR, SSR, FSR); break; + case sub : __ sub_w(FSR, SSR, FSR); break; + case mul : __ mul_w(FSR, SSR, FSR); break; + case _and : __ andr(FSR, SSR, FSR); break; + case _or : __ orr(FSR, SSR, FSR); break; + case _xor : __ xorr(FSR, SSR, FSR); break; + case shl : __ sll_w(FSR, SSR, FSR); break; + case shr : __ sra_w(FSR, SSR, FSR); break; + case ushr : __ srl_w(FSR, SSR, FSR); break; + default : ShouldNotReachHere(); + } +} + +// the result stored in FSR, SSR, +// used registers : T2, T3 +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(T2); + + switch (op) { + case add : __ add_d(FSR, T2, FSR); break; + case sub : __ sub_d(FSR, T2, FSR); break; + case _and: __ andr(FSR, T2, FSR); break; + case _or : __ orr(FSR, T2, FSR); break; + case _xor: __ xorr(FSR, T2, FSR); break; + default : ShouldNotReachHere(); + } +} + +// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, +// the result is 0x80000000 +// the godson2 cpu do the same, so we need not handle this specially like x86 +void TemplateTable::idiv() { + transition(itos, itos); + Label not_zero; + + __ bne(FSR, R0, not_zero); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ bind(not_zero); + + __ pop_i(SSR); + __ div_w(FSR, SSR, FSR); +} + +void TemplateTable::irem() { + transition(itos, itos); + Label not_zero; + __ pop_i(SSR); + + __ bne(FSR, R0, not_zero); + //__ brk(7); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(not_zero); + __ mod_w(FSR, SSR, FSR); +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(T2); + __ mul_d(FSR, T2, FSR); +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::ldiv() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + + //__ brk(7); //generate FPE + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(normal); + __ pop_l(A2); + __ div_d(FSR, A2, FSR); +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::lrem() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(normal); + __ pop_l (A2); + + __ mod_d(FSR, A2, FSR); +} + +// result in FSR +// used registers : T0 +void TemplateTable::lshl() { + transition(itos, ltos); + __ pop_l(T0); + __ sll_d(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lshr() { + transition(itos, ltos); + __ pop_l(T0); + __ sra_d(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lushr() { + transition(itos, ltos); + __ pop_l(T0); + __ srl_d(FSR, T0, FSR); +} + +// result in FSF +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: + __ fld_s(fscratch, at_sp()); + __ fadd_s(FSF, fscratch, FSF); + break; + case sub: + __ fld_s(fscratch, at_sp()); + __ fsub_s(FSF, fscratch, FSF); + break; + case mul: + __ fld_s(fscratch, at_sp()); + __ fmul_s(FSF, fscratch, FSF); + break; + case div: + __ fld_s(fscratch, at_sp()); + __ fdiv_s(FSF, fscratch, FSF); + break; + case rem: + __ fmov_s(FA1, FSF); + __ fld_s(FA0, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + break; + default : ShouldNotReachHere(); + } + + __ addi_d(SP, SP, 1 * wordSize); +} + +// result in SSF||FSF +// i dont handle the strict flags +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: + __ fld_d(fscratch, at_sp()); + __ fadd_d(FSF, fscratch, FSF); + break; + case sub: + __ fld_d(fscratch, at_sp()); + __ fsub_d(FSF, fscratch, FSF); + break; + case mul: + __ fld_d(fscratch, at_sp()); + __ fmul_d(FSF, fscratch, FSF); + break; + case div: + __ fld_d(fscratch, at_sp()); + __ fdiv_d(FSF, fscratch, FSF); + break; + case rem: + __ fmov_d(FA1, FSF); + __ fld_d(FA0, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + break; + default : ShouldNotReachHere(); + } + + __ addi_d(SP, SP, 2 * wordSize); +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ sub_w(FSR, R0, FSR); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ sub_d(FSR, R0, FSR); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ fneg_s(FSF, FSF); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ fneg_d(FSF, FSF); +} + +// used registers : T2 +void TemplateTable::iinc() { + transition(vtos, vtos); + locals_index(T2); + __ ld_w(FSR, T2, 0); + __ ld_b(AT, at_bcp(2)); // get constant + __ add_d(FSR, FSR, AT); + __ st_w(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(T2); + __ get_2_byte_integer_at_bcp(FSR, AT, 4); + __ hswap(FSR); + __ ld_w(AT, T2, 0); + __ add_d(FSR, AT, FSR); + __ st_w(FSR, T2, 0); +} + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ slli_w(FSR, FSR, 0); + break; + case Bytecodes::_i2f: + __ movgr2fr_w(FSF, FSR); + __ ffint_s_w(FSF, FSF); + break; + case Bytecodes::_i2d: + __ movgr2fr_w(FSF, FSR); + __ ffint_d_w(FSF, FSF); + break; + case Bytecodes::_i2b: + __ ext_w_b(FSR, FSR); + break; + case Bytecodes::_i2c: + __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits + break; + case Bytecodes::_i2s: + __ ext_w_h(FSR, FSR); + break; + case Bytecodes::_l2i: + __ slli_w(FSR, FSR, 0); + break; + case Bytecodes::_l2f: + __ movgr2fr_d(FSF, FSR); + __ ffint_s_l(FSF, FSF); + break; + case Bytecodes::_l2d: + __ movgr2fr_d(FSF, FSR); + __ ffint_d_l(FSF, FSF); + break; + case Bytecodes::_f2i: + __ ftintrz_w_s(fscratch, FSF); + __ movfr2gr_s(FSR, fscratch); + break; + case Bytecodes::_f2l: + __ ftintrz_l_s(fscratch, FSF); + __ movfr2gr_d(FSR, fscratch); + break; + case Bytecodes::_f2d: + __ fcvt_d_s(FSF, FSF); + break; + case Bytecodes::_d2i: + __ ftintrz_w_d(fscratch, FSF); + __ movfr2gr_s(FSR, fscratch); + break; + case Bytecodes::_d2l: + __ ftintrz_l_d(fscratch, FSF); + __ movfr2gr_d(FSR, fscratch); + break; + case Bytecodes::_d2f: + __ fcvt_s_d(FSF, FSF); + break; + default : + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + __ pop(T0); + __ pop(R0); + + __ slt(AT, T0, FSR); + __ slt(FSR, FSR, T0); + __ sub_d(FSR, FSR, AT); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + if (is_float) { + __ fld_s(fscratch, at_sp()); + __ addi_d(SP, SP, 1 * wordSize); + + if (unordered_result < 0) { + __ fcmp_clt_s(FCC0, FSF, fscratch); + __ fcmp_cult_s(FCC1, fscratch, FSF); + } else { + __ fcmp_cult_s(FCC0, FSF, fscratch); + __ fcmp_clt_s(FCC1, fscratch, FSF); + } + } else { + __ fld_d(fscratch, at_sp()); + __ addi_d(SP, SP, 2 * wordSize); + + if (unordered_result < 0) { + __ fcmp_clt_d(FCC0, FSF, fscratch); + __ fcmp_cult_d(FCC1, fscratch, FSF); + } else { + __ fcmp_cult_d(FCC0, FSF, fscratch); + __ fcmp_clt_d(FCC1, fscratch, FSF); + } + } + + __ movcf2gr(FSR, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(FSR, FSR, AT); +} + +// used registers : T3, A7, Rnext +// FSR : return bci, this is defined by the vm specification +// T2 : MDO taken count +// T3 : method +// A7 : offset +// Rnext : next bytecode, this is required by dispatch_base +void TemplateTable::branch(bool is_jsr, bool is_wide) { + __ get_method(T3); + __ profile_taken_branch(A7, T2); // only C2 meaningful + + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // Load up T4 with the branch displacement + if (!is_wide) { + __ ld_b(A7, BCP, 1); + __ ld_bu(AT, BCP, 2); + __ slli_d(A7, A7, 8); + __ orr(A7, A7, AT); + } else { + __ get_4_byte_integer_at_bcp(A7, 1); + __ swap(A7); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occuring below. + if (is_jsr) { + // Pre-load the next target bytecode into Rnext + __ ldx_bu(Rnext, BCP, A7); + + // compute return address as bci in FSR + __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); + __ ld_d(AT, T3, in_bytes(Method::const_offset())); + __ sub_d(FSR, FSR, AT); + // Adjust the bcp in BCP by the displacement in A7 + __ add_d(BCP, BCP, A7); + // jsr returns atos that is not an oop + // Push return address + __ push_i(FSR); + // jsr returns vtos + __ dispatch_only_noverify(vtos); + + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp in S0 by the displacement in T4 + __ add_d(BCP, BCP, A7); + + assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // T3: method + // T4: target offset + // BCP: target bcp + // LVP: locals pointer + __ blt(R0, A7, dispatch); // check if forward or backward branch + + // check if MethodCounters exists + Label has_counters; + __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ bne(AT, R0, has_counters); + __ push2(T3, A7); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), + T3); + __ pop2(T3, A7); + __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ beq(AT, R0, dispatch); + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + if (ProfileInterpreter) { + // Are we profiling? + __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); + __ beq(T0, R0, no_mdo); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + __ beq(R0, R0, dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ld_d(T0, Address(T3, Method::method_counters_offset())); + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + if (!UseOnStackReplacement) { + __ bind(backedge_counter_overflow); + } + } else { + // increment back edge counter + __ ld_d(T1, T3, in_bytes(Method::method_counters_offset())); + __ ld_w(T0, T1, in_bytes(be_offset)); + __ increment(T0, InvocationCounter::count_increment); + __ st_w(T0, T1, in_bytes(be_offset)); + + // load invocation counter + __ ld_w(T1, T1, in_bytes(inv_offset)); + // buffer bit added, mask no needed + + // dadd backedge counter & invocation counter + __ add_d(T1, T1, T0); + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + // T1 : backedge counter & invocation counter + if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { + __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); + __ bne(AT, R0, dispatch); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ ld_w(AT, AT, 0); + __ blt(T1, AT, dispatch); + } + + // if no method data exists, go to profile method + __ test_method_data_pointer(T1, profile_method); + + if (UseOnStackReplacement) { + if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) { + __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); + __ bne(AT, R0, dispatch); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ ld_w(AT, AT, 0); + __ blt(T2, AT, dispatch); + } + + // When ProfileInterpreter is on, the backedge_count comes + // from the methodDataOop, which value does not get reset on + // the call to frequency_counter_overflow(). + // To avoid excessive calls to the overflow routine while + // the method is being compiled, dadd a second test to make + // sure the overflow function is called only once every + // overflow_frequency. + const int overflow_frequency = 1024; + __ andi(AT, T2, overflow_frequency-1); + __ beq(AT, R0, backedge_counter_overflow); + } + } else { + if (UseOnStackReplacement) { + // check for overflow against AT, which is the sum of the counters + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ ld_w(AT, AT, 0); + __ bge(T1, AT, backedge_counter_overflow); + } + } + } + __ bind(dispatch); + } + + // Pre-load the next target bytecode into Rnext + __ ld_bu(Rnext, BCP, 0); + + // continue with the bytecode @ target + // FSR: return bci for jsr's, unused otherwise + // Rnext: target bytecode + // BCP: target bcp + __ dispatch_only(vtos, true); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + } + + if (UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ sub_d(A7, BCP, A7); // branch bcp + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), A7); + + // V0: osr nmethod (osr ok) or NULL (osr not possible) + // V1: osr adapter frame return address + // LVP: locals pointer + // BCP: bcp + __ beq(V0, R0, dispatch); + // nmethod may have been invalidated (VM may block upon call_VM return) + __ ld_b(T3, V0, nmethod::state_offset()); + __ li(AT, nmethod::in_use); + __ bne(AT, T3, dispatch); + + // We have the address of an on stack replacement routine in rax. + // In preparation of invoking it, first we must migrate the locals + // and monitors from off the interpreter frame on the stack. + // Ensure to save the osr nmethod over the migration call, + // it will be preserved in Rnext. + __ move(Rnext, V0); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // V0 is OSR buffer, move it to expected parameter location + // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp + __ move(T0, V0); + + // pop the interpreter frame + __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + // remove frame anchor + __ leave(); + __ move(LVP, RA); + __ move(SP, A7); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP , SP , AT); + + // push the (possibly adjusted) return address + // refer to osr_entry in c1_LIRAssembler_loongarch.cpp + __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset()); + __ jr(AT); + } + } +} + +void TemplateTable::if_0cmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + case less: + __ bge(FSR, R0, not_taken); + break; + case less_equal: + __ blt(R0, FSR, not_taken); + break; + case greater: + __ bge(R0, FSR, not_taken); + break; + case greater_equal: + __ blt(FSR, R0, not_taken); + break; + } + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + + __ pop_i(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + case less: + __ bge(SSR, FSR, not_taken); + break; + case less_equal: + __ blt(FSR, SSR, not_taken); + break; + case greater: + __ bge(FSR, SSR, not_taken); + break; + case greater_equal: + __ blt(SSR, FSR, not_taken); + break; + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + default: + ShouldNotReachHere(); + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + // __ ld_w(SSR, SP, 0); + __ pop_ptr(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + default: + ShouldNotReachHere(); + } + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::ret() { + transition(vtos, vtos); + + locals_index(T2); + __ ld_d(T2, T2, 0); + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld_d(BCP, T1, in_bytes(Method::const_offset())); + __ add_d(BCP, BCP, T2); + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos, 0, true); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::wide_ret() { + transition(vtos, vtos); + + locals_index_wide(T2); + __ ld_d(T2, T2, 0); // get return bci, compute return bcp + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld_d(BCP, T1, in_bytes(Method::const_offset())); + __ add_d(BCP, BCP, T2); + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos, 0, true); +} + +// used register T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : low +// A7 : high +// Rnext : dest bytecode, required by dispatch_base +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + + // align BCP + __ addi_d(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // load lo & hi + __ ld_w(T3, T2, 1 * BytesPerInt); + __ swap(T3); + __ ld_w(A7, T2, 2 * BytesPerInt); + __ swap(A7); + + // check against lo & hi + __ blt(FSR, T3, default_case); + __ blt(A7, FSR, default_case); + + // lookup dispatch offset, in A7 big endian + __ sub_d(FSR, FSR, T3); + __ alsl_d(AT, FSR, T2, Address::times_4 - 1); + __ ld_w(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(FSR, T4, T3); + + __ bind(continue_execution); + __ swap(A7); + __ add_d(BCP, BCP, A7); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); + + // handle default + __ bind(default_case); + __ profile_switch_default(FSR); + __ ld_w(A7, T2, 0); + __ b(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +// used registers : T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : pair index +// A7 : offset +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + + // swap FSR so we can avoid swapping the table entries + __ swap(FSR); + + // align BCP + __ addi_d(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // set counter + __ ld_w(T3, T2, BytesPerInt); + __ swap(T3); + __ b(loop_entry); + + // table search + __ bind(loop); + // get the entry value + __ alsl_d(AT, T3, T2, Address::times_8 - 1); + __ ld_w(AT, AT, 2 * BytesPerInt); + + // found? + __ beq(FSR, AT, found); + + __ bind(loop_entry); + Label L1; + __ bge(R0, T3, L1); + __ addi_d(T3, T3, -1); + __ b(loop); + __ bind(L1); + __ addi_d(T3, T3, -1); + + // default case + __ profile_switch_default(FSR); + __ ld_w(A7, T2, 0); + __ b(continue_execution); + + // entry found -> get offset + __ bind(found); + __ alsl_d(AT, T3, T2, Address::times_8 - 1); + __ ld_w(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(T3, FSR, T2); + + // continue execution + __ bind(continue_execution); + __ swap(A7); + __ add_d(BCP, BCP, A7); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); +} + +// used registers : T0, T1, T2, T3, A7, Rnext +// T2 : pairs address(array) +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // register allocation + const Register array = T2; + const Register i = T3, j = A7; + const Register h = T1; + const Register temp = T0; + const Register key = FSR; + + // setup array + __ addi_d(array, BCP, 3*BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(array, array, AT); + + // initialize i & j + __ move(i, R0); + __ ld_w(j, array, - 1 * BytesPerInt); + // Convert j into native byteordering + __ swap(j); + + // and start + Label entry; + __ b(entry); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ add_d(h, i, j); + __ srli_d(h, h, 1); + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ alsl_d(AT, h, array, Address::times_8 - 1); + __ ld_w(temp, AT, 0 * BytesPerInt); + __ swap(temp); + + __ slt(AT, key, temp); + __ maskeqz(i, i, AT); + __ masknez(temp, h, AT); + __ OR(i, i, temp); + __ masknez(j, j, AT); + __ maskeqz(temp, h, AT); + __ OR(j, j, temp); + + // while (i+1 < j) + __ bind(entry); + __ addi_d(h, i, 1); + __ blt(h, j, loop); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ alsl_d(AT, i, array, Address::times_8 - 1); + __ ld_w(temp, AT, 0 * BytesPerInt); + __ swap(temp); + __ bne(key, temp, default_case); + + // entry found -> j = offset + __ alsl_d(AT, i, array, Address::times_8 - 1); + __ ld_w(j, AT, 1 * BytesPerInt); + __ profile_switch_case(i, key, array); + __ swap(j); + + __ add_d(BCP, BCP, j); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ ld_w(j, array, - 2 * BytesPerInt); + __ swap(j); + __ add_d(BCP, BCP, j); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); +} + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + __ ld_d(T1, aaddress(0)); + __ load_klass(LVP, T1); + __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); + __ li(AT, JVM_ACC_HAS_FINALIZER); + __ andr(AT, AT, LVP); + Label skip_register_finalizer; + __ beq(AT, R0, skip_register_finalizer); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::register_finalizer), T1); + __ bind(skip_register_finalizer); + } + + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { + Label no_safepoint; + NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); + __ ld_b(AT, thread, in_bytes(Thread::polling_page_offset())); + __ andi(AT, AT, SafepointMechanism::poll_bit()); + __ beq(AT, R0, no_safepoint); + __ push(state); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::at_safepoint)); + __ pop(state); + __ bind(no_safepoint); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(FSR); + } + + __ remove_activation(state, T4); + __ membar(__ StoreStore); + + __ jr(T4); +} + +// we dont shift left 2 bits in get_cache_and_index_at_bcp +// for we always need shift the index we use it. the ConstantPoolCacheEntry +// is 16-byte long, index is the index in +// ConstantPoolCache, so cache + base_offset() + index * 16 is +// the corresponding ConstantPoolCacheEntry +// used registers : T2 +// NOTE : the returned index need also shift left 4 to get the address! +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + const Register temp = A1; + assert_different_registers(Rcache, index); + + Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + default: break; + } + + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + // is resolved? + int i = (int)code; + __ addi_d(temp, temp, -i); + __ beq(temp, R0, resolved); + + // resolve first time through + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + + __ li(temp, i); + __ call_VM(NOREG, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + __ bind(resolved); +} +//END: LA + +// The Rcache and index registers must be set before call +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); + // Flags + __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); + + // klass overwrite register + if (is_static) { + __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld_d(obj, Address(obj, mirror_offset)); + + __ resolve_oop_handle(obj, T4); + } +} + +// get the method, itable_index and flags of the current invoke +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = T3; + const Register index = T1; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); + // determine constant pool cache field offsets + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_d(method, AT, method_offset); + + if (itable_index != NOREG) { + __ ld_d(itable_index, AT, index_offset); + } + __ ld_d(flags, AT, flags_offset); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + // kill FSR + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + assert_different_registers(cache, index, AT); + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ ld_w(AT, AT, 0); + __ beq(AT, R0, L1); + + __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); + + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); + __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1); + + if (is_static) { + __ move(tmp1, R0); + } else { + __ ld_d(tmp1, SP, 0); + __ verify_oop(tmp1); + } + // tmp1: object pointer or NULL + // tmp2: cache entry pointer + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + tmp1, tmp2); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +// used registers : T1, T2, T3, T1 +// T1 : flags +// T2 : off +// T3 : obj +// T1 : field address +// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the +// following mapping to the TosState states: +// btos: 0 +// ctos: 1 +// stos: 2 +// itos: 3 +// ltos: 4 +// ftos: 5 +// dtos: 6 +// atos: 7 +// vtos: 8 +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + if (!is_static) pop_and_check_object(obj); + __ add_d(index, obj, off); + + const Address field(index, 0); + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + + // btos + __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); + __ push(btos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + + + __ bind(notByte); + __ li(AT, ztos); + __ bne(flags, AT, notBool); + + // ztos + __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); + __ push(ztos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + + + __ bind(notBool); + __ li(AT, itos); + __ bne(flags, AT, notInt); + + // itos + __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); + __ push(itos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); + } + __ b(Done); + + __ bind(notInt); + __ li(AT, atos); + __ bne(flags, AT, notObj); + + // atos + //add for compressedoops + do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); + __ push(atos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); + } + __ b(Done); + + __ bind(notObj); + __ li(AT, ctos); + __ bne(flags, AT, notChar); + + // ctos + __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); + __ push(ctos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); + } + __ b(Done); + + __ bind(notChar); + __ li(AT, stos); + __ bne(flags, AT, notShort); + + // stos + __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); + __ push(stos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); + } + __ b(Done); + + __ bind(notShort); + __ li(AT, ltos); + __ bne(flags, AT, notLong); + + // ltos + __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); + __ push(ltos); + + // Don't rewrite to _fast_lgetfield for potential volatile case. + __ b(Done); + + __ bind(notLong); + __ li(AT, ftos); + __ bne(flags, AT, notFloat); + + // ftos + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + __ push(ftos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); + } + __ b(Done); + + __ bind(notFloat); + __ li(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); +#endif + + // dtos + __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); + __ push(dtos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); + } + +#ifdef ASSERT + __ b(Done); + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + //kill AT, T1, T2, T3, T4 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T4; + assert_different_registers(cache, index, tmp4); + + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ ld_w(AT, AT, 0); + __ beq(AT, R0, L1); + + __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); + + if (is_static) { + __ move(tmp1, R0); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + Label two_word, valsize_known; + __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); + __ ld_d(tmp3, AT, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset())); + __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); + + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ move(tmp1, SP); + __ li(AT, ltos); + __ beq(tmp3, AT, two_word); + __ li(AT, dtos); + __ beq(tmp3, AT, two_word); + __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); + __ b(valsize_known); + + __ bind(two_word); + __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); + + __ bind(valsize_known); + // setup object pointer + __ ld_d(tmp1, tmp1, 0 * wordSize); + } + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); + __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1); + // object (tos) + __ move(tmp3, SP); + // tmp1: object pointer set up above (NULL if static) + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +// used registers : T0, T1, T2, T3, T8 +// T1 : flags +// T2 : off +// T3 : obj +// T8 : volatile bit +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + const Register bc = T3; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + + + Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + + // btos + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ztos + __ bind(notByte); + __ li(AT, ztos); + __ bne(flags, AT, notBool); + + __ pop(ztos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ andi(FSR, FSR, 0x1); + __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); + } + __ b(Done); + + // itos + __ bind(notBool); + __ li(AT, itos); + __ bne(flags, AT, notInt); + + __ pop(itos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); + } + __ b(Done); + + // atos + __ bind(notInt); + __ li(AT, atos); + __ bne(flags, AT, notObj); + + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + + do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ctos + __ bind(notObj); + __ li(AT, ctos); + __ bne(flags, AT, notChar); + + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); + } + __ b(Done); + + // stos + __ bind(notChar); + __ li(AT, stos); + __ bne(flags, AT, notShort); + + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ltos + __ bind(notShort); + __ li(AT, ltos); + __ bne(flags, AT, notLong); + + __ pop(ltos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ftos + __ bind(notLong); + __ li(AT, ftos); + __ bne(flags, AT, notFloat); + + __ pop(ftos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); + } + __ b(Done); + + + // dtos + __ bind(notFloat); + __ li(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); +#endif + + __ pop(dtos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(T4, obj, off); + __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); + } + +#ifdef ASSERT + __ b(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +// used registers : T1, T2, T3 +// T1 : cp_entry +// T2 : obj +// T3 : value pointer +void TemplateTable::jvmti_post_fast_field_mod() { + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + //kill AT, T1, T2, T3, T4 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T4; + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ ld_w(tmp3, AT, 0); + __ beq(tmp3, R0, L2); + __ pop_ptr(tmp1); + __ verify_oop(tmp1); + __ push_ptr(tmp1); + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(FSR); break; + case Bytecodes::_fast_dputfield: __ push_d(FSF); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(FSR); break; + default: ShouldNotReachHere(); + } + __ move(tmp3, SP); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); + __ verify_oop(tmp1); + // tmp1: object pointer copied above + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; + } + __ bind(L2); + } +} + +// used registers : T2, T3, T1 +// T2 : index & off & field address +// T3 : cache & obj +// T1 : flags +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + const Register scratch = T8; + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(__ LoadLoad); + + // test for volatile with T1 + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); + + // replace index with field offset from cache entry + __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); + + Label Done; + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, T1); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(T3); + + if (bytecode() != Bytecodes::_fast_aputfield) { + // field address + __ add_d(T2, T3, T2); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_zputfield: + __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 + __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: + __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: + __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: + __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: + __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_lputfield: + __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: + __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: + __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); + break; + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} + +// used registers : T2, T3, T1 +// T3 : cp_entry & cache +// T2 : index & offset +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + const Register scratch = T8; + + // do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label L1; + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ ld_w(T3, AT, 0); + __ beq(T3, R0, L1); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(T3, T1, 1); + __ move(TSR, FSR); + __ verify_oop(FSR); + // FSR: object pointer copied above + // T3: cache entry pointer + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + FSR, T3); + __ move(FSR, TSR); + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(__ LoadLoad); + + // replace index with field offset from cache entry + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // FSR: object + __ verify_oop(FSR); + __ null_check(FSR); + // field addresses + __ add_d(FSR, FSR, T2); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_bgetfield: + __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_sgetfield: + __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_cgetfield: + __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_igetfield: + __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_lgetfield: + __ stop("should not be rewritten"); + break; + case Bytecodes::_fast_fgetfield: + __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_dgetfield: + __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_agetfield: + do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); + __ verify_oop(FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + +// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 +// used registers : T1, T2, T3, T1 +// T1 : obj & field address +// T2 : off +// T3 : cache +// T1 : index +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + const Register scratch = T8; + + // get receiver + __ ld_d(T1, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 2); + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ addi_d(BCP, BCP, 1); + __ null_check(T1); + __ add_d(T1, T1, T2); + + if (state == itos) { + __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); + } else if (state == atos) { + do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); + __ verify_oop(FSR); + } else if (state == ftos) { + __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); + } else { + ShouldNotReachHere(); + } + __ addi_d(BCP, BCP, -1); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) { + // implemented elsewhere + ShouldNotReachHere(); +} + +// method, index, recv, flags: T1, T2, T3, T1 +// byte_no = 2 for _invokevirtual, 1 else +// T0 : return address +// get the method & index of the invoke, and push the return address of +// the invoke(first word in the frame) +// this address is where the return code jmp to. +// NOTE : this method will set T3&T1 as recv&flags +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == T1, "error flags reg."); + assert(recv == noreg || recv == T3, "error recv reg."); + + // setup registers & access constant pool cache + if(recv == noreg) recv = T3; + if(flags == noreg) flags = T1; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); + __ andr(AT, AT, flags); + __ beq(AT, R0, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + Register tmp = SSR; + __ push(tmp); + __ move(tmp, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, tmp, recv); + __ pop(tmp); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + // Note: no return address pushed yet + if (load_receiver) { + __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); + __ andr(recv, flags, AT); + // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. + const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address + const int receiver_is_at_end = -1; // back off one slot to get receiver + Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + __ ld_d(recv, recv_addr); + __ verify_oop(recv); + } + if(save_flags) { + __ move(BCP, flags); + } + + // compute return type + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, 0xf); + + // Make sure we don't need to mask flags for tos_state_shift after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table = (address) Interpreter::invoke_return_entry_table_for(code); + __ li(AT, (long)table); + __ alsl_d(AT, flags, AT, LogBytesPerWord - 1); + __ ld_d(RA, AT, 0); + } + + if (save_flags) { + __ move(flags, BCP); + __ restore_bcp(); + } +} + +// used registers : T0, T3, T1, T2 +// T3 : recv, this two register using convention is by prepare_invoke +// T1 : flags, klass +// Rmethod : method, index must be Rmethod +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) { + + assert_different_registers(index, recv, flags, T2); + + // Test for an invoke of a final method + Label notFinal; + __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, flags, AT); + __ beq(AT, R0, notFinal); + + Register method = index; // method must be Rmethod + assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // the index is indeed methodOop, for this is vfinal, + // see ConstantPoolCacheEntry::set_method for more info + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(T2); + + // T2: tmp, used for mdp + // method: callee + // T4: tmp + // is_virtual: true + __ profile_arguments_type(T2, method, T4, true); + + __ jump_from_interpreted(method, T2); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(T2, recv); + + // profile this call + __ profile_virtual_call(T2, T0, T1); + + // get target methodOop & entry point + __ lookup_virtual_method(T2, index, method); + __ profile_arguments_type(T2, method, T4, true); + __ jump_from_interpreted(method, T2); +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); + // now recv & flags in T3, T1 + invokevirtual_helper(Rmethod, T3, T1); +} + +// T4 : entry +// Rmethod : method +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3); + // now recv & flags in T3, T1 + __ verify_oop(T3); + __ null_check(T3); + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ jump_from_interpreted(Rmethod, T4); + __ move(T0, T3); +} + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG); + + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ jump_from_interpreted(Rmethod, T4); +} + +// i have no idea what to do here, now. for future change. FIXME. +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on LoongArch64"); +} + +// used registers : T0, T1, T2, T3, T1, A7 +// T0 : itable, vtable, entry +// T1 : interface +// T3 : receiver +// T1 : flags, klass +// Rmethod : index, method, this is required by interpreter_entry +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + //this method will use T1-T4 and T0 + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, T2, Rmethod, T3, T1); + // T2: reference klass (from f1) if interface method + // Rmethod: method (from f2) + // T3: receiver + // T1: flags + + // First check for Object case, then private interface method, + // then regular interface method. + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details. + Label notObjectMethod; + __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notObjectMethod); + + invokevirtual_helper(Rmethod, T3, T1); + // no return from above + __ bind(notObjectMethod); + + Label no_such_interface; // for receiver subtype check + Register recvKlass; // used for exception processing + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notVFinal); + + // Get receiver klass into FSR - also a null check + __ null_check(T3, oopDesc::klass_offset_in_bytes()); + __ load_klass(FSR, T3); + + Label subtype; + __ check_klass_subtype(FSR, T2, T0, subtype); + // If we get here the typecheck failed + recvKlass = T1; + __ move(recvKlass, FSR); + __ b(no_such_interface); + + __ bind(subtype); + + // do the call - rbx is actually the method to call + + __ profile_final_call(T1); + __ profile_arguments_type(T1, Rmethod, T0, true); + + __ jump_from_interpreted(Rmethod, T1); + // no return from above + __ bind(notVFinal); + + // Get receiver klass into T1 - also a null check + __ restore_locals(); + __ null_check(T3, oopDesc::klass_offset_in_bytes()); + __ load_klass(T1, T3); + + Label no_such_method; + + // Preserve method for throw_AbstractMethodErrorVerbose. + __ move(T3, Rmethod); + // Receiver subtype check against REFC. + // Superklass in T2. Subklass in T1. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, noreg, + // outputs: scan temp. reg, scan temp. reg + T0, FSR, + no_such_interface, + /*return_method=*/false); + + + // profile this call + __ restore_bcp(); + __ profile_virtual_call(T1, T0, FSR); + + // Get declaring interface class from method, and itable index + __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); + __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); + __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); + __ sub_w(Rmethod, R0, Rmethod); + + // Preserve recvKlass for throw_AbstractMethodErrorVerbose. + __ move(FSR, T1); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + FSR, T2, Rmethod, + // outputs: method, scan temp. reg + Rmethod, T0, + no_such_interface); + + // Rmethod: Method* to call + // T3: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ beq(Rmethod, R0, no_such_method); + + __ profile_called_method(Rmethod, T0, T1); + __ profile_arguments_type(T1, Rmethod, T0, true); + + // do the call + // T3: receiver + // Rmethod: Method* + __ jump_from_interpreted(Rmethod, T1); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. + recvKlass = A1; + Register method = A2; + if (recvKlass != T1) { __ move(recvKlass, T1); } + if (method != T3) { __ move(method, T3); } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. + if (recvKlass != T1) { __ move(recvKlass, T1); } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); +} + + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + const Register T2_method = Rmethod; + const Register FSR_mtype = FSR; + const Register T3_recv = T3; + + prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); + //??__ verify_method_ptr(T2_method); + __ verify_oop(T3_recv); + __ null_check(T3_recv); + + // T4: MethodType object (from cpool->resolved_references[f1], if necessary) + // T2_method: MH.invokeExact_MT method (from f2) + + // Note: T4 is already pushed (if necessary) by prepare_invoke + + // FIXME: profile the LambdaForm also + __ profile_final_call(T4); + + // T8: tmp, used for mdp + // T2_method: callee + // T4: tmp + // is_virtual: true + __ profile_arguments_type(T8, T2_method, T4, true); + + __ jump_from_interpreted(T2_method, T4); +} + + void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register T2_callsite = T2; + + prepare_invoke(byte_no, Rmethod, T2_callsite); + + // T2: CallSite object (from cpool->resolved_references[f1]) + // Rmethod: MH.linkToCallSite method (from f2) + + // Note: T2_callsite is already pushed by prepare_invoke + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ verify_oop(T2_callsite); + + __ jump_from_interpreted(Rmethod, T4); + } + +//----------------------------------------------------------------------------- +// Allocation +// T1 : tags & buffer end & thread +// T2 : object end +// T3 : klass +// T1 : object size +// A1 : cpool +// A2 : cp index +// return object in FSR +void TemplateTable::_new() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + Label allocate_shared; + + __ get_cpool_and_tags(A1, T1); + + // make sure the class we're about to instantiate has been resolved. + // Note: slow_case does a pop of stack, which is why we loaded class/pushed above + const int tags_offset = Array::base_offset_in_bytes(); + __ add_d(T1, T1, A2); + __ ld_b(AT, T1, tags_offset); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + } + __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); + __ bne(AT, R0, slow_case); + + // get InstanceKlass + __ load_resolved_klass_at_index(A1, A2, T3); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); + __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); + __ bne(AT, R0, slow_case); + + // has_finalizer + __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); + __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); + __ bne(AT, R0, slow_case); + + // Allocate the instance + // 1) Try to allocate in the TLAB + // 2) if fail and the object is large allocate in the shared Eden + // 3) if the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.) + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc(); + +#ifndef OPT_THREAD + const Register thread = T8; + if (UseTLAB || allow_shared_alloc) { + __ get_thread(thread); + } +#else + const Register thread = TREG; +#endif + + if (UseTLAB) { + // get tlab_top + __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); + // get tlab_end + __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); + __ add_d(T2, FSR, T0); + __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); + __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); + + if (ZeroTLAB) { + // the fields have been already cleared + __ beq(R0, R0, initialize_header); + } else { + // initialize both the header and fields + __ beq(R0, R0, initialize_object); + } + } + + // Allocation in the shared Eden , if allowed + // T0 : instance size in words + if(allow_shared_alloc){ + __ bind(allocate_shared); + + Label done, retry; + Address heap_top(T1); + __ li(T1, (long)Universe::heap()->top_addr()); + __ ld_d(FSR, heap_top); + + __ bind(retry); + __ li(AT, (long)Universe::heap()->end_addr()); + __ ld_d(AT, AT, 0); + __ add_d(T2, FSR, T0); + __ blt(AT, T2, slow_case); + + // Compare FSR with the top addr, and if still equal, store the new + // top addr in T2 at the address of the top addr pointer. Sets AT if was + // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. + // + // FSR: object begin + // T2: object end + // T0: instance size in words + + // if someone beat us on the allocation, try again, otherwise continue + __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); + + __ bind(done); + __ incr_allocated_bytes(thread, T0, 0); + } + + if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ li(AT, - sizeof(oopDesc)); + __ add_d(T0, T0, AT); + __ beq(T0, R0, initialize_header); + + // initialize remaining object fields: T0 is a multiple of 2 + { + Label loop; + __ add_d(T1, FSR, T0); + + __ bind(loop); + __ addi_d(T1, T1, -oopSize); + __ st_d(R0, T1, sizeof(oopDesc)); + __ bne(T1, FSR, loop); // dont clear header + } + + // klass in T3, + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); + __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); + } else { + __ li(AT, (long)markOopDesc::prototype()); + __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); + } + + __ store_klass_gap(FSR, R0); + __ store_klass(FSR, T3); + + { + SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); + // Trigger dtrace event for fastpath + __ push(atos); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); + __ pop(atos); + + } + __ b(done); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(A1); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); + + // continue + __ bind(done); + __ membar(__ StoreStore); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ ld_bu(A1, at_bcp(1)); + // type, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); + __ membar(__ StoreStore); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_2_byte_integer_at_bcp(A2, AT, 1); + __ huswap(A2); + __ get_constant_pool(A1); + // cp, index, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); + __ membar(__ StoreStore); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); + __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); +} + +// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) +// T2 : sub klass +// T3 : cpool +// T3 : super klass +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beq(FSR, R0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + __ add_d(AT, T1, T2); + __ ld_b(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + } + __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + + // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. + // Then, GC will move the object in V0 to another places in heap. + // Therefore, We should never save such an object in register. + // Instead, we should save it in the stack. It can be modified automatically by the GC thread. + // After GC, the object address in FSR is changed to a new place. + // + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + + // klass already in cp, get superklass in T3 + __ bind(quicked); + __ load_resolved_klass_at_index(T3, T2, T3); + + __ bind(resolved); + + // get subklass in T2 + __ load_klass(T2, FSR); + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ClassCastException_entry); + + // Come here on success + __ bind(ok_is_subtype); + + // Collect counts on whether this check-cast sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); + } + __ bind(done); +} + +// T3 as cpool, T1 as tags, T2 as index +// object always in FSR, superklass in T3, subklass in T2 +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + + __ beq(FSR, R0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + // get index + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + // quicked + __ add_d(AT, T1, T2); + __ ld_b(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + } + __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + + // get superklass in T3, subklass in T2 + __ bind(quicked); + __ load_resolved_klass_at_index(T3, T2, T3); + + __ bind(resolved); + // get subklass in T2 + __ load_klass(T2, FSR); + + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + __ move(FSR, R0); + // Come here on failure + __ b(done); + + // Come here on success + __ bind(ok_is_subtype); + __ li(FSR, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ beq(R0, R0, done); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // FSR = 0: obj == NULL or obj is not an instanceof the specified klass + // FSR = 1: obj != NULL and obj is an instanceof the specified klass +} + +//-------------------------------------------------------- +//-------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(A1); + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + A1, BCP); + __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal + + // post the breakpoint event + __ get_method(A1); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); + + // complete the execution of original bytecode + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(FSR); + __ jmp(Interpreter::throw_exception_entry()); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- SP = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [return addr ] <--- FP + +// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer +// object always in FSR +void TemplateTable::monitorenter() { + transition(atos, vtos); + + // check for NULL object + __ null_check(FSR); + + const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset + * wordSize); + const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); + Label allocated; + + // initialize entry pointer + __ move(c_rarg0, R0); + + // find a free slot in the monitor block (result in c_rarg0) + { + Label entry, loop, exit, next; + __ ld_d(T2, monitor_block_top); + __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + __ b(entry); + + // free slot? + __ bind(loop); + __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); + __ bne(AT, R0, next); + __ move(c_rarg0, T2); + + __ bind(next); + __ beq(FSR, AT, exit); + __ addi_d(T2, T2, entry_size); + + __ bind(entry); + __ bne(T3, T2, loop); + __ bind(exit); + } + + __ bne(c_rarg0, R0, allocated); + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // SP: old expression stack top + __ ld_d(c_rarg0, monitor_block_top); + __ addi_d(SP, SP, -entry_size); + __ addi_d(c_rarg0, c_rarg0, -entry_size); + __ st_d(c_rarg0, monitor_block_top); + __ move(T3, SP); + __ b(entry); + + // 2. move expression stack contents + __ bind(loop); + __ ld_d(AT, T3, entry_size); + __ st_d(AT, T3, 0); + __ addi_d(T3, T3, wordSize); + __ bind(entry); + __ bne(T3, c_rarg0, loop); + } + + __ bind(allocated); + // Increment bcp to point to the next bytecode, + // so exception handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ addi_d(BCP, BCP, 1); + __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ lock_object(c_rarg0); + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + // The bcp has already been incremented. Just need to dispatch to next instruction. + + __ dispatch_next(vtos); +} + +// T2 : top +// c_rarg0 : entry +void TemplateTable::monitorexit() { + transition(atos, vtos); + + __ null_check(FSR); + + const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); + Label found; + + // find matching slot + { + Label entry, loop; + __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + __ b(entry); + + __ bind(loop); + __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ beq(FSR, AT, found); + __ addi_d(c_rarg0, c_rarg0, entry_size); + __ bind(entry); + __ bne(T2, c_rarg0, loop); + } + + // error handling. Unlocking was not block-structured + Label end; + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + // c_rarg0: points to monitor entry + __ bind(found); + __ move(TSR, FSR); + __ unlock_object(c_rarg0); + __ move(FSR, TSR); + __ bind(end); +} + + +// Wide instructions +void TemplateTable::wide() { + transition(vtos, vtos); + __ ld_bu(Rnext, at_bcp(1)); + __ slli_d(T4, Rnext, Address::times_8); + __ li(AT, (long)Interpreter::_wentry_point); + __ add_d(AT, T4, AT); + __ ld_d(T4, AT, 0); + __ jr(T4); +} + + +void TemplateTable::multianewarray() { + transition(vtos, atos); + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ ld_bu(A1, at_bcp(3)); // dimension + __ addi_d(A1, A1, -1); + __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); + __ ld_bu(AT, at_bcp(3)); + __ alsl_d(SP, AT, SP, Address::times_8 - 1); + __ membar(__ AnyAny);//no membar here for aarch64 +} +#endif // !CC_INTERP diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp new file mode 100644 index 00000000000..5b9f7b78981 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP +#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp new file mode 100644 index 00000000000..eb8f075c715 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" +#include "vm_version_ext_loongarch.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features()); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp new file mode 100644 index 00000000000..1a93123134c --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); +}; + +#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp new file mode 100644 index 00000000000..91151351661 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp @@ -0,0 +1,397 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/vm_version.hpp" +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +#define T5 RT5 + +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; +bool VM_Version::_cpu_info_is_initialized = false; + +static BufferBlob* stub_blob; +static const int stub_size = 600; + +extern "C" { + typedef void (*get_cpu_info_stub_t)(void*); +} +static get_cpu_info_stub_t get_cpu_info_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_get_cpu_info() { + assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); + StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); +# define __ _masm-> + + address start = __ pc(); + + __ enter(); + __ push(AT); + __ push(T5); + + __ li(AT, (long)0); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + + __ li(AT, 1); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + + __ li(AT, 2); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + + __ li(AT, 3); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); + + __ li(AT, 4); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); + + __ li(AT, 5); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); + + __ li(AT, 6); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); + + __ li(AT, 10); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); + + __ li(AT, 11); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); + + __ li(AT, 12); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); + + __ li(AT, 13); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); + + __ li(AT, 14); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); + + __ pop(T5); + __ pop(AT); + __ leave(); + __ jr(RA); +# undef __ + return start; + }; +}; + +uint32_t VM_Version::get_feature_flags_by_cpucfg() { + uint32_t result = 0; + if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { + result |= CPU_LA32; + } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { + result |= CPU_LA64; + } + + if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) + result |= CPU_FP; + + if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) + result |= CPU_CCDMA; + if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) + result |= CPU_LLDBAR; + if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) + result |= CPU_SCDLY; + if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) + result |= CPU_LLEXC; + + result |= CPU_ULSYNC; + + return result; +} + +void VM_Version::get_processor_features() { + + clean_cpuFeatures(); + + get_os_cpu_info(); + + get_cpu_info_stub(&_cpuid_info); + _features |= get_feature_flags_by_cpucfg(); + + _supports_cx8 = true; + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { + FLAG_SET_DEFAULT(MaxGCPauseMillis, 150); + } + + if (supports_lsx()) { + if (FLAG_IS_DEFAULT(UseLSX)) { + FLAG_SET_DEFAULT(UseLSX, true); + } + } else if (UseLSX) { + warning("LSX instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLSX, false); + } + + if (supports_lasx()) { + if (FLAG_IS_DEFAULT(UseLASX)) { + FLAG_SET_DEFAULT(UseLASX, true); + } + } else if (UseLASX) { + warning("LASX instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLASX, false); + } + + if (UseLASX && !UseLSX) { + warning("LASX instructions depends on LSX, setting UseLASX to false"); + FLAG_SET_DEFAULT(UseLASX, false); + } + +#ifdef COMPILER2 + int max_vector_size = 0; + int min_vector_size = 0; + if (UseLASX) { + max_vector_size = 32; + min_vector_size = 16; + } + else if (UseLSX) { + max_vector_size = 16; + min_vector_size = 16; + } + + if (!FLAG_IS_DEFAULT(MaxVectorSize)) { + if (MaxVectorSize == 0) { + // do nothing + } else if (MaxVectorSize > max_vector_size) { + warning("MaxVectorSize must be at most %i on this platform", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } else if (MaxVectorSize < min_vector_size) { + warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); + } else if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } + } else { + // If default, use highest supported configuration + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } +#endif + + char buf[256]; + + // A note on the _features_string format: + // There are jtreg tests checking the _features_string for various properties. + // For some strange reason, these tests require the string to contain + // only _lowercase_ characters. Keep that in mind when being surprised + // about the unusual notation of features - and when adding new ones. + // Features may have one comma at the end. + // Furthermore, use one, and only one, separator space between features. + // Multiple spaces are considered separate tokens, messing up everything. + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " + "0x%lx, fp_ver: %d, lvz_ver: %d, ", + (is_la64() ? "la64" : ""), + (is_la32() ? "la32" : ""), + (supports_lsx() ? ", lsx" : ""), + (supports_lasx() ? ", lasx" : ""), + (supports_crypto() ? ", crypto" : ""), + (supports_lam() ? ", am" : ""), + (supports_ual() ? ", ual" : ""), + (supports_lldbar() ? ", lldbar" : ""), + (supports_scdly() ? ", scdly" : ""), + (supports_llexc() ? ", llexc" : ""), + (supports_lbt_x86() ? ", lbt_x86" : ""), + (supports_lbt_arm() ? ", lbt_arm" : ""), + (supports_lbt_mips() ? ", lbt_mips" : ""), + (needs_llsync() ? ", needs_llsync" : ""), + (needs_tgtsync() ? ", needs_tgtsync": ""), + (needs_ulsync() ? ", needs_ulsync": ""), + _cpuid_info.cpucfg_info_id0.bits.PRID, + _cpuid_info.cpucfg_info_id2.bits.FP_VER, + _cpuid_info.cpucfg_info_id2.bits.LVZ_VER); + _features_str = strdup(buf); + + assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); + assert( is_la64(), "Should be LoongArch64"); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); + } + + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); + } + + // Basic instructions are used to implement SHA Intrinsics on LA, so sha + // instructions support is not needed. + if (/*supports_crypto()*/ 1) { + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + + if (UseSHA/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } + + // Basic instructions are used to implement AES Intrinsics on LA, so AES + // instructions support is not needed. + if (/*supports_crypto()*/ 1) { + if (FLAG_IS_DEFAULT(UseAES)) { + FLAG_SET_DEFAULT(UseAES, true); + } + } else if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + if (UseAES/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } else if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } + + if (FLAG_IS_DEFAULT(UseCRC32)) { + FLAG_SET_DEFAULT(UseCRC32, true); + } + + if (UseCRC32) { + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + UseCRC32Intrinsics = true; + } + + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + UseCRC32CIntrinsics = true; + } + } + +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { + FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } +#endif + + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); + } + + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } + + if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { + FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); + } + + UNSUPPORTED_OPTION(CriticalJNINatives); +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); + } + CodeBuffer c(stub_blob); + VM_Version_StubGenerator g(&c); + get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, + g.generate_get_cpu_info()); + + get_processor_features(); +} diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp new file mode 100644 index 00000000000..00b8e608a1d --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP + +#include "runtime/abstract_vm_version.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version: public Abstract_VM_Version { + friend class JVMCIVMStructs; + +public: + + union LoongArch_Cpucfg_Id0 { + uint32_t value; + struct { + uint32_t PRID : 32; + } bits; + }; + + union LoongArch_Cpucfg_Id1 { + uint32_t value; + struct { + uint32_t ARCH : 2, + PGMMU : 1, + IOCSR : 1, + PALEN : 8, + VALEN : 8, + UAL : 1, // unaligned access + RI : 1, + EP : 1, + RPLV : 1, + HP : 1, + IOCSR_BRD : 1, + MSG_INT : 1, + : 5; + } bits; + }; + + union LoongArch_Cpucfg_Id2 { + uint32_t value; + struct { + uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead + FP_SP : 1, + FP_DP : 1, + FP_VER : 3, + LSX : 1, + LASX : 1, + COMPLEX : 1, + CRYPTO : 1, + LVZ : 1, + LVZ_VER : 3, + LLFTP : 1, + LLFTP_VER : 3, + LBT_X86 : 1, + LBT_ARM : 1, + LBT_MIPS : 1, + LSPW : 1, + LAM : 1, + : 9; + } bits; + }; + + union LoongArch_Cpucfg_Id3 { + uint32_t value; + struct { + uint32_t CCDMA : 1, + SFB : 1, + UCACC : 1, + LLEXC : 1, + SCDLY : 1, + LLDBAR : 1, + ITLBHMC : 1, + ICHMC : 1, + SPW_LVL : 3, + SPW_HP_HF : 1, + RVA : 1, + RVAMAXM1 : 4, + : 15; + } bits; + }; + + union LoongArch_Cpucfg_Id4 { + uint32_t value; + struct { + uint32_t CC_FREQ : 32; + } bits; + }; + + union LoongArch_Cpucfg_Id5 { + uint32_t value; + struct { + uint32_t CC_MUL : 16, + CC_DIV : 16; + } bits; + }; + + union LoongArch_Cpucfg_Id6 { + uint32_t value; + struct { + uint32_t PMP : 1, + PMVER : 3, + PMNUM : 4, + PMBITS : 6, + UPM : 1, + : 17; + } bits; + }; + + union LoongArch_Cpucfg_Id10 { + uint32_t value; + struct { + uint32_t L1IU_PRESENT : 1, + L1IU_UNIFY : 1, + L1D_PRESENT : 1, + L2IU_PRESENT : 1, + L2IU_UNIFY : 1, + L2IU_PRIVATE : 1, + L2IU_INCLUSIVE : 1, + L2D_PRESENT : 1, + L2D_PRIVATE : 1, + L2D_INCLUSIVE : 1, + L3IU_PRESENT : 1, + L3IU_UNIFY : 1, + L3IU_PRIVATE : 1, + L3IU_INCLUSIVE : 1, + L3D_PRESENT : 1, + L3D_PRIVATE : 1, + L3D_INCLUSIVE : 1, + : 15; + } bits; + }; + + union LoongArch_Cpucfg_Id11 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id12 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id13 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id14 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + +protected: + + enum { + CPU_LAM = (1 << 1), + CPU_UAL = (1 << 2), + CPU_LSX = (1 << 4), + CPU_LASX = (1 << 5), + CPU_COMPLEX = (1 << 7), + CPU_CRYPTO = (1 << 8), + CPU_LBT_X86 = (1 << 10), + CPU_LBT_ARM = (1 << 11), + CPU_LBT_MIPS = (1 << 12), + // flags above must follow Linux HWCAP + CPU_LA32 = (1 << 13), + CPU_LA64 = (1 << 14), + CPU_FP = (1 << 15), + CPU_LLEXC = (1 << 16), + CPU_SCDLY = (1 << 17), + CPU_LLDBAR = (1 << 18), + CPU_CCDMA = (1 << 19), + CPU_LLSYNC = (1 << 20), + CPU_TGTSYNC = (1 << 21), + CPU_ULSYNC = (1 << 22), + + //////////////////////add some other feature here////////////////// + } cpuFeatureFlags; + + static const char* _features_str; + static bool _cpu_info_is_initialized; + + struct CpuidInfo { + LoongArch_Cpucfg_Id0 cpucfg_info_id0; + LoongArch_Cpucfg_Id1 cpucfg_info_id1; + LoongArch_Cpucfg_Id2 cpucfg_info_id2; + LoongArch_Cpucfg_Id3 cpucfg_info_id3; + LoongArch_Cpucfg_Id4 cpucfg_info_id4; + LoongArch_Cpucfg_Id5 cpucfg_info_id5; + LoongArch_Cpucfg_Id6 cpucfg_info_id6; + LoongArch_Cpucfg_Id10 cpucfg_info_id10; + LoongArch_Cpucfg_Id11 cpucfg_info_id11; + LoongArch_Cpucfg_Id12 cpucfg_info_id12; + LoongArch_Cpucfg_Id13 cpucfg_info_id13; + LoongArch_Cpucfg_Id14 cpucfg_info_id14; + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + static uint32_t get_feature_flags_by_cpucfg(); + static void get_processor_features(); + static void get_os_cpu_info(); + +public: + // Offsets for cpuid asm stub + static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } + static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } + static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } + static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } + static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } + static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } + static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } + static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } + static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } + static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } + static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } + static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } + + static void clean_cpuFeatures() { _features = 0; } + + // Initialization + static void initialize(); + + static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } + + static bool is_la32() { return _features & CPU_LA32; } + static bool is_la64() { return _features & CPU_LA64; } + static bool supports_crypto() { return _features & CPU_CRYPTO; } + static bool supports_lsx() { return _features & CPU_LSX; } + static bool supports_lasx() { return _features & CPU_LASX; } + static bool supports_lam() { return _features & CPU_LAM; } + static bool supports_llexc() { return _features & CPU_LLEXC; } + static bool supports_scdly() { return _features & CPU_SCDLY; } + static bool supports_lldbar() { return _features & CPU_LLDBAR; } + static bool supports_ual() { return _features & CPU_UAL; } + static bool supports_lbt_x86() { return _features & CPU_LBT_X86; } + static bool supports_lbt_arm() { return _features & CPU_LBT_ARM; } + static bool supports_lbt_mips() { return _features & CPU_LBT_MIPS; } + static bool needs_llsync() { return !supports_lldbar(); } + static bool needs_tgtsync() { return 1; } + static bool needs_ulsync() { return 1; } + + static const char* cpu_features() { return _features_str; } +}; + +#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp new file mode 100644 index 00000000000..43caba5187e --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { + regName[i++] = freg->name(); + } + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp new file mode 100644 index 00000000000..819eaff0bb3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP +#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP + +inline bool is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline Register as_Register() { + assert( is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +} + +inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline FloatRegister as_FloatRegister() { + assert( is_FloatRegister() && is_even(value()), "must be" ); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / + FloatRegisterImpl::max_slots_per_register); +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); + if (is_FloatRegister()) { + int base = value() - ConcreteRegisterImpl::max_gpr; + return base % FloatRegisterImpl::max_slots_per_register == 0; + } else { + return is_even(value()); + } +} + +#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp new file mode 100644 index 00000000000..edb78e36daa --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_gpr); +} + +#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp new file mode 100644 index 00000000000..2c4b60653b3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_loongarch.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); +#endif + +// used by compiler only; reciever in T0. +// used registers : +// Rmethod : receiver klass & method +// NOTE: If this code is used by the C1, the receiver_location is always 0. +// when reach here, receiver in T0, klass in T8 +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li + // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. + const int index_dependent_slop = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + Register t1 = T8, t2 = Rmethod; +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + start_pc = __ pc(); + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ ld_w(t1, AT , 0); + __ addi_w(t1, t1, 1); + __ st_w(t1, AT,0); + } +#endif + + // get receiver (need to skip return address on top of stack) + //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(t1, T0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset())); + assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); + __ li(AT, vtable_index*vtableEntry::size()); + __ blt(AT, t2, L); + __ li(A2, vtable_index); + __ move(A1, A0); + + // VTABLE TODO: find upper bound for call_VM length. + start_pc = __ pc(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); + const ptrdiff_t estimate = 512; + const ptrdiff_t codesize = __ pc() - start_pc; + slop_delta = estimate - codesize; // call_VM varies in length, depending on data + assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); + __ bind(L); + } +#endif // PRODUCT + const Register method = Rmethod; + + // load methodOop and target address + start_pc = __ pc(); + // lookup_virtual_method generates 6 instructions (worst case) + __ lookup_virtual_method(t1, vtable_index, method); + slop_delta = 6*BytesPerInstWord - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ beq(method, R0, L); + __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } +#endif // PRODUCT + + // T8: receiver klass + // T0: receiver + // Rmethod: methodOop + // T4: entry + address ame_addr = __ pc(); + __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); + __ jr(T4); + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); + + return s; +} + + +// used registers : +// T1 T2 +// when reach here, the receiver in T0, klass in T1 +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler *masm = new MacroAssembler(&cb); + + // we use T8, T4, T2 as temparary register, they are free from register allocator + Register t1 = T8, t2 = T2, t3 = T4; + // Entry arguments: + // T1: Interface + // T0: Receiver + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + start_pc = __ pc(); + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ ld_w(T8, AT, 0); + __ addi_w(T8, T8, 1); + __ st_w(T8, AT, 0); + } +#endif // PRODUCT + + const Register holder_klass_reg = T1; // declaring interface klass (DECC) + const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) + const Register icholder_reg = T1; + + Label L_no_such_interface; + + __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(t1, T0); + + // x86 use lookup_interface_method, but lookup_interface_method makes more instructions. + // No dynamic code size variance here, so slop_bytes is not needed. + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ addi_d(t2, t1, base); + __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset())); + __ alsl_d(t2, AT, t2, Address::times_8 - 1); + + __ move(t3, t2); + { + Label hit, entry; + + __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); + __ beq(AT, resolved_klass_reg, hit); + + __ bind(entry); + // Check that the entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + __ beqz(AT, L_no_such_interface); + + __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize); + __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, resolved_klass_reg, entry); + + __ bind(hit); + } + + { + Label hit, entry; + + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ beq(AT, holder_klass_reg, hit); + + __ bind(entry); + // Check that the entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + __ beqz(AT, L_no_such_interface); + + __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, holder_klass_reg, entry); + + __ bind(hit); + } + + // We found a hit, move offset into T4 + __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes()); + + // Compute itableMethodEntry. + const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + + itableMethodEntry::method_offset_in_bytes(); + + // Get methodOop and entrypoint for compiler + const Register method = Rmethod; + + start_pc = __ pc(); + __ li(AT, method_offset); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ add_d(AT, AT, t2); + __ ldx_d(method, t1, AT); + +#ifdef ASSERT + if (DebugVtables) { + Label L1; + __ beq(method, R0, L1); + __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L1); + __ stop("methodOop is null"); + __ bind(L1); + } +#endif // ASSERT + + // Rmethod: methodOop + // T0: receiver + // T4: entry point + address ame_addr = __ pc(); + __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset())); + __ jr(T4); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); + __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); + + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); + + return s; +} + +// NOTE : whenever you change the code above, dont forget to change the const here +int VtableStub::pd_code_alignment() { + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp new file mode 100644 index 00000000000..73f021c9b7a --- /dev/null +++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ci/ciMethod.hpp" +#include "interpreter/interpreter.hpp" +#include "runtime/frame.inline.hpp" + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved ebp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = 6; // see generate_call_stub + // return overhead_size + method->max_locals() + method->max_stack() + stub_code; + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return overhead_size + method_stack + stub_code; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + // If interpreter_frame!=NULL, set up the method, locals, and monitors. + // The frame interpreter_frame, if not NULL, is guaranteed to be the + // right size, as determined by a previous call to this method. + // It is also guaranteed to be walkable even though it is in a skeletal state + + // fixed size of an interpreter frame: + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp+8 + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); + + //set last sp; + intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + // All frames but the initial interpreter frame we fill in have a + // value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + // + if (extra_locals != 0 && + interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); +} + diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp new file mode 100644 index 00000000000..c8c7a5d4dff --- /dev/null +++ b/src/hotspot/cpu/mips/assembler_mips.cpp @@ -0,0 +1,759 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#ifndef PRODUCT +#include "compiler/disassembler.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +// Implementation of AddressLiteral + +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + _rspec = rspec_from_rtype(rtype, target); +} + +// Implementation of Address + +Address Address::make_array(ArrayAddress adr) { + AddressLiteral base = adr.base(); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(index._base, index._index, index._scale, (intptr_t) base.target()); + array._rspec = base._rspec; + return array; +} + +// exceedingly dangerous constructor +Address::Address(address loc, RelocationHolder spec) { + _base = noreg; + _index = noreg; + _scale = no_scale; + _disp = (intptr_t) loc; + _rspec = spec; +} + + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of Assembler +const char *Assembler::ops_name[] = { + "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", + "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", + "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", + "daddi", "daddiu", "ldl", "ldr", "", "", "", "", + "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", + "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", + "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", + "sc", "swc1", "", "", "scd", "sdc1", "", "sd" +}; + +const char* Assembler::special_name[] = { + "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", + "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", + "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", + "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", + "add", "addu", "sub", "subu", "and", "or", "xor", "nor", + "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", + "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", + "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" +}; + +const char* Assembler::cop1_name[] = { + "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", + "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", + "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" +}; + +const char* Assembler::cop1x_name[] = { + "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", + "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", + "", "", "", "", "", "", "alnv.ps", "", + "", "", "", "", "", "", "", "", + "madd.s", "madd.d", "", "", "", "", "madd.ps", "", + "msub.s", "msub.d", "", "", "", "", "msub.ps", "", + "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", + "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" +}; + +const char* Assembler::special2_name[] = { + "madd", "", "mul", "", "msub", "", "", "", + "", "", "", "", "", "", "", "", + "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", + "", "", "", "", "gsmod", "gsdmod", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "" +}; + +const char* Assembler::special3_name[] = { + "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "bshfl", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", +}; + +const char* Assembler::regimm_name[] = { + "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", + "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", + "bltzal", "bgezal", "bltzall", "bgezall" +}; + +const char* Assembler::gs_ldc2_name[] = { + "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" +}; + + +const char* Assembler::gs_lwc2_name[] = { + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", + "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ + "gslq", "" +}; + +const char* Assembler::gs_sdc2_name[] = { + "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" +}; + +const char* Assembler::gs_swc2_name[] = { + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", + "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", + "gssq", "" +}; + +//misleading name, print only branch/jump instruction +void Assembler::print_instruction(int inst) { + const char *s; + switch( opcode(inst) ) { + default: + s = ops_name[opcode(inst)]; + break; + case special_op: + s = special_name[special(inst)]; + break; + case regimm_op: + s = special_name[rt(inst)]; + break; + } + + ::tty->print("%s", s); +} + +int Assembler::is_int_mask(int x) { + int xx = x; + int count = 0; + + while (x != 0) { + x &= (x - 1); + count++; + } + + if ((1<>2; + switch(opcode(inst)) { + case j_op: + case jal_op: + case lui_op: + case ori_op: + case daddiu_op: + ShouldNotReachHere(); + break; + default: + assert(is_simm16(v), "must be simm16"); +#ifndef PRODUCT + if (!is_simm16(v)) { + tty->print_cr("must be simm16"); + tty->print_cr("Inst: %x", inst); + } +#endif + + v = low16(v); + inst &= 0xffff0000; + break; + } + + return inst | v; +} + +int Assembler::branch_destination(int inst, int pos) { + int off = 0; + + switch(opcode(inst)) { + case j_op: + case jal_op: + assert(false, "should not use j/jal here"); + break; + default: + off = expand(low16(inst), 15); + break; + } + + return off ? pos + 4 + (off<<2) : 0; +} + +int AbstractAssembler::code_fill_byte() { + return 0x00; // illegal instruction 0x00000000 +} + +// Now the Assembler instruction (identical for 32/64 bits) + +void Assembler::lb(Register rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lb(rt, src.base(), src.disp()); +} + +void Assembler::lbu(Register rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lbu(rt, src.base(), src.disp()); +} + +void Assembler::ld(Register rt, Address dst){ + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (Assembler::is_simm16(disp)) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gsldx(src, base, index, disp); + } else { + dsll(AT, index, scale); + gsldx(src, base, AT, disp); + } + } else { + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + ld(src, AT, disp); + } + } else { + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gsldx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + ld(src, AT, 0); + } + } else { + assert_different_registers(src, AT); + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(src, split_low(disp >> 16)); + if (split_low(disp)) ori(src, src, split_low(disp)); + if (UseLEXT1) { + gsldx(src, AT, src, 0); + } else { + daddu(AT, AT, src); + ld(src, AT, 0); + } + } + } + } else { + if (Assembler::is_simm16(disp)) { + ld(src, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gsldx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + ld(src, AT, 0); + } + } + } +} + +void Assembler::ldl(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ldl(rt, src.base(), src.disp()); +} + +void Assembler::ldr(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ldr(rt, src.base(), src.disp()); +} + +void Assembler::lh(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lh(rt, src.base(), src.disp()); +} + +void Assembler::lhu(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lhu(rt, src.base(), src.disp()); +} + +void Assembler::ll(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll(rt, src.base(), src.disp()); +} + +void Assembler::lld(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lld(rt, src.base(), src.disp()); +} + +void Assembler::lw(Register rt, Address dst){ + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (Assembler::is_simm16(disp)) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gslwx(src, base, index, disp); + } else { + dsll(AT, index, scale); + gslwx(src, base, AT, disp); + } + } else { + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + lw(src, AT, disp); + } + } else { + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gslwx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + lw(src, AT, 0); + } + } else { + assert_different_registers(src, AT); + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(src, split_low(disp >> 16)); + if (split_low(disp)) ori(src, src, split_low(disp)); + if (UseLEXT1) { + gslwx(src, AT, src, 0); + } else { + daddu(AT, AT, src); + lw(src, AT, 0); + } + } + } + } else { + if (Assembler::is_simm16(disp)) { + lw(src, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gslwx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + lw(src, AT, 0); + } + } + } +} + +void Assembler::lea(Register rt, Address src) { + Register dst = rt; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index == noreg) { + if (is_simm16(disp)) { + daddiu(dst, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(dst, base, AT); + } + } else { + if (scale == 0) { + if (is_simm16(disp)) { + daddu(AT, base, index); + daddiu(dst, AT, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, base, AT); + daddu(dst, AT, index); + } + } else { + if (is_simm16(disp)) { + dsll(AT, index, scale); + daddu(AT, AT, base); + daddiu(dst, AT, disp); + } else { + assert_different_registers(dst, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + dsll(dst, index, scale); + daddu(dst, dst, AT); + } + } + } +} + +void Assembler::lwl(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwl(rt, src.base(), src.disp()); +} + +void Assembler::lwr(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwr(rt, src.base(), src.disp()); +} + +void Assembler::lwu(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwu(rt, src.base(), src.disp()); +} + +void Assembler::sb(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sb(rt, dst.base(), dst.disp()); +} + +void Assembler::sc(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc(rt, dst.base(), dst.disp()); +} + +void Assembler::scd(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + scd(rt, dst.base(), dst.disp()); +} + +void Assembler::sd(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm16(disp)) { + if ( UseLEXT1 && is_simm(disp, 8)) { + if (scale == 0) { + gssdx(src, base, index, disp); + } else { + assert_different_registers(rt, AT); + dsll(AT, index, scale); + gssdx(src, base, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + sd(src, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gssdx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + sd(src, AT, 0); + } + } else { + daddiu(SP, SP, -wordSize); + sd(T9, SP, 0); + + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(T9, split_low(disp >> 16)); + if (split_low(disp)) ori(T9, T9, split_low(disp)); + daddu(AT, AT, T9); + ld(T9, SP, 0); + daddiu(SP, SP, wordSize); + sd(src, AT, 0); + } + } + } else { + if (is_simm16(disp)) { + sd(src, base, disp); + } else { + assert_different_registers(rt, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gssdx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + sd(src, AT, 0); + } + } + } +} + +void Assembler::sdl(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdl(rt, dst.base(), dst.disp()); +} + +void Assembler::sdr(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdr(rt, dst.base(), dst.disp()); +} + +void Assembler::sh(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sh(rt, dst.base(), dst.disp()); +} + +void Assembler::sw(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gsswx(src, base, index, disp); + } else { + assert_different_registers(rt, AT); + dsll(AT, index, scale); + gsswx(src, base, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + sw(src, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gsswx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + sw(src, AT, 0); + } + } else { + daddiu(SP, SP, -wordSize); + sd(T9, SP, 0); + + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(T9, split_low(disp >> 16)); + if (split_low(disp)) ori(T9, T9, split_low(disp)); + daddu(AT, AT, T9); + ld(T9, SP, 0); + daddiu(SP, SP, wordSize); + sw(src, AT, 0); + } + } + } else { + if (Assembler::is_simm16(disp)) { + sw(src, base, disp); + } else { + assert_different_registers(rt, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gsswx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + sw(src, AT, 0); + } + } + } +} + +void Assembler::swl(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swl(rt, dst.base(), dst.disp()); +} + +void Assembler::swr(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swr(rt, dst.base(), dst.disp()); +} + +void Assembler::lwc1(FloatRegister rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lwc1(rt, src.base(), src.disp()); +} + +void Assembler::ldc1(FloatRegister rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + ldc1(rt, src.base(), src.disp()); +} + +void Assembler::swc1(FloatRegister rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swc1(rt, dst.base(), dst.disp()); +} + +void Assembler::sdc1(FloatRegister rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdc1(rt, dst.base(), dst.disp()); +} + +void Assembler::j(address entry) { + int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; + emit_long((j_op<<26) | dest); + has_delay_slot(); +} + +void Assembler::jal(address entry) { + int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; + emit_long((jal_op<<26) | dest); + has_delay_slot(); +} + +void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long + check_delay(); + AbstractAssembler::emit_int32(x); +} + +inline void Assembler::emit_data(int x) { emit_long(x); } +inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_long(x); +} + +inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_long(x); +} + +inline void Assembler::check_delay() { +#ifdef CHECK_DELAY + guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); + delay_state = no_delay; +#endif +} diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp new file mode 100644 index 00000000000..102a7ba52fe --- /dev/null +++ b/src/hotspot/cpu/mips/assembler_mips.hpp @@ -0,0 +1,1789 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP + +#include "asm/register.hpp" +#include "runtime/vm_version.hpp" + +class BiasedLockingCounters; + + +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class ArrayAddress; + +class Address { + public: + enum ScaleFactor { + no_scale = -1, + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_ptr = times_8 + }; + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + + private: + Register _base; + Register _index; + ScaleFactor _scale; + int _disp; + RelocationHolder _rspec; + + // Easily misused constructors make them private + Address(address loc, RelocationHolder spec); + Address(int disp, address loc, relocInfo::relocType rtype); + Address(int disp, address loc, RelocationHolder spec); + + public: + + // creation + Address() + : _base(noreg), + _index(noreg), + _scale(no_scale), + _disp(0) { + } + + // No default displacement otherwise Register can be implicitly + // converted to 0(Register) which is quite a different animal. + + Address(Register base, int disp = 0) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(disp) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(index), + _scale(scale), + _disp (disp) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } + + // The following two overloads are used in connection with the + // ByteSize type (see sizes.hpp). They simplify the use of + // ByteSize'd arguments in assembly code. Note that their equivalent + // for the optimized build are the member functions with int disp + // argument since ByteSize is mapped to an int type in that case. + // + // Note: DO NOT introduce similar overloaded functions for WordSize + // arguments as in the optimized mode, both ByteSize and WordSize + // are mapped to the same type and thus the compiler cannot make a + // distinction anymore (=> compiler errors). + +#ifdef ASSERT + Address(Register base, ByteSize disp) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(in_bytes(disp)) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) + : _base(base), + _index(index), + _scale(scale), + _disp(in_bytes(disp)) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } +#endif // ASSERT + + // accessors + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } + + static Address make_array(ArrayAddress); + + friend class Assembler; + friend class MacroAssembler; + friend class LIR_Assembler; // base/index/scale/disp +}; + +// Calling convention +class Argument { + private: + int _number; + public: + enum { + n_register_parameters = 8, // 8 integer registers used to pass parameters + n_float_register_parameters = 8 // 8 float registers used to pass parameters + }; + + Argument(int number):_number(number){ } + Argument successor() {return Argument(number() + 1);} + + int number()const {return _number;} + bool is_Register()const {return _number < n_register_parameters;} + bool is_FloatRegister()const {return _number < n_float_register_parameters;} + + Register as_Register()const { + assert(is_Register(), "must be a register argument"); + return ::as_Register(A0->encoding() + _number); + } + FloatRegister as_FloatRegister()const { + assert(is_FloatRegister(), "must be a float register argument"); + return ::as_FloatRegister(F12->encoding() + _number); + } + + Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} +}; + +// +// AddressLiteral has been split out from Address because operands of this type +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out +// the few instructions that need to deal with address literals are unique and the +// MacroAssembler does not have to implement every instruction in the Assembler +// in order to search for address literals that may need special handling depending +// on the instruction and the platform. As small step on the way to merging i486/amd64 +// directories. +// +class AddressLiteral { + friend class ArrayAddress; + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to + // a register to reach it. Otherwise if near we can do rip + // relative addressing. + + address _target; + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + + AddressLiteral(address target, relocInfo::relocType rtype); + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::poll_type: + case relocInfo::poll_return_type: + return Relocation::spec_simple(rtype); + case relocInfo::none: + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + +}; + +// Convience classes +class RuntimeAddress: public AddressLiteral { + + public: + + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} + +}; + +class OopAddress: public AddressLiteral { + + public: + + OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} + +}; + +class ExternalAddress: public AddressLiteral { + + public: + + ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} + +}; + +// x86 can do array addressing as a single operation since disp can be an absolute +// address amd64 can't. We create a class that expresses the concept but does extra +// magic on amd64 to get the final result + +class ArrayAddress { + private: + + AddressLiteral _base; + Address _index; + + public: + + ArrayAddress() {}; + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; + AddressLiteral base() { return _base; } + Address index() { return _index; } + +}; + +const int FPUStateSizeInWords = 512 / wordSize; + +// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction +// level ; i.e., what you write is what you get. The Assembler is generating code into +// a CodeBuffer. + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; // for the non-virtual hack + friend class LIR_Assembler; // as_Address() + friend class StubGenerator; + + public: + enum Condition { + zero , + notZero , + equal , + notEqual , + less , + lessEqual , + greater , + greaterEqual , + below , + belowEqual , + above , + aboveEqual + }; + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + // opcode, highest 6 bits: bits[31...26] + enum ops { + special_op = 0x00, // special_ops + regimm_op = 0x01, // regimm_ops + j_op = 0x02, + jal_op = 0x03, + beq_op = 0x04, + bne_op = 0x05, + blez_op = 0x06, + bgtz_op = 0x07, + addiu_op = 0x09, + slti_op = 0x0a, + sltiu_op = 0x0b, + andi_op = 0x0c, + ori_op = 0x0d, + xori_op = 0x0e, + lui_op = 0x0f, + cop0_op = 0x10, // cop0_ops + cop1_op = 0x11, // cop1_ops + gs_cop2_op = 0x12, // gs_cop2_ops + cop1x_op = 0x13, // cop1x_ops + beql_op = 0x14, + bnel_op = 0x15, + blezl_op = 0x16, + bgtzl_op = 0x17, + daddiu_op = 0x19, + ldl_op = 0x1a, + ldr_op = 0x1b, + special2_op = 0x1c, // special2_ops + msa_op = 0x1e, // msa_ops + special3_op = 0x1f, // special3_ops + lb_op = 0x20, + lh_op = 0x21, + lwl_op = 0x22, + lw_op = 0x23, + lbu_op = 0x24, + lhu_op = 0x25, + lwr_op = 0x26, + lwu_op = 0x27, + sb_op = 0x28, + sh_op = 0x29, + swl_op = 0x2a, + sw_op = 0x2b, + sdl_op = 0x2c, + sdr_op = 0x2d, + swr_op = 0x2e, + cache_op = 0x2f, + ll_op = 0x30, + lwc1_op = 0x31, + gs_lwc2_op = 0x32, //gs_lwc2_ops + pref_op = 0x33, + lld_op = 0x34, + ldc1_op = 0x35, + gs_ldc2_op = 0x36, //gs_ldc2_ops + ld_op = 0x37, + sc_op = 0x38, + swc1_op = 0x39, + gs_swc2_op = 0x3a, //gs_swc2_ops + scd_op = 0x3c, + sdc1_op = 0x3d, + gs_sdc2_op = 0x3e, //gs_sdc2_ops + sd_op = 0x3f + }; + + static const char *ops_name[]; + + //special family, the opcode is in low 6 bits. + enum special_ops { + sll_op = 0x00, + movci_op = 0x01, + srl_op = 0x02, + sra_op = 0x03, + sllv_op = 0x04, + srlv_op = 0x06, + srav_op = 0x07, + jr_op = 0x08, + jalr_op = 0x09, + movz_op = 0x0a, + movn_op = 0x0b, + syscall_op = 0x0c, + break_op = 0x0d, + sync_op = 0x0f, + mfhi_op = 0x10, + mthi_op = 0x11, + mflo_op = 0x12, + mtlo_op = 0x13, + dsllv_op = 0x14, + dsrlv_op = 0x16, + dsrav_op = 0x17, + mult_op = 0x18, + multu_op = 0x19, + div_op = 0x1a, + divu_op = 0x1b, + dmult_op = 0x1c, + dmultu_op = 0x1d, + ddiv_op = 0x1e, + ddivu_op = 0x1f, + addu_op = 0x21, + subu_op = 0x23, + and_op = 0x24, + or_op = 0x25, + xor_op = 0x26, + nor_op = 0x27, + slt_op = 0x2a, + sltu_op = 0x2b, + daddu_op = 0x2d, + dsubu_op = 0x2f, + tge_op = 0x30, + tgeu_op = 0x31, + tlt_op = 0x32, + tltu_op = 0x33, + teq_op = 0x34, + tne_op = 0x36, + dsll_op = 0x38, + dsrl_op = 0x3a, + dsra_op = 0x3b, + dsll32_op = 0x3c, + dsrl32_op = 0x3e, + dsra32_op = 0x3f + }; + + static const char* special_name[]; + + //regimm family, the opcode is in rt[16...20], 5 bits + enum regimm_ops { + bltz_op = 0x00, + bgez_op = 0x01, + bltzl_op = 0x02, + bgezl_op = 0x03, + tgei_op = 0x08, + tgeiu_op = 0x09, + tlti_op = 0x0a, + tltiu_op = 0x0b, + teqi_op = 0x0c, + tnei_op = 0x0e, + bltzal_op = 0x10, + bgezal_op = 0x11, + bltzall_op = 0x12, + bgezall_op = 0x13, + bposge32_op = 0x1c, + bposge64_op = 0x1d, + synci_op = 0x1f, + }; + + static const char* regimm_name[]; + + //cop0 family, the ops is in bits[25...21], 5 bits + enum cop0_ops { + mfc0_op = 0x00, + dmfc0_op = 0x01, + // + mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 + mtc0_op = 0x04, + dmtc0_op = 0x05, + rdpgpr_op = 0x0a, + inter_op = 0x0b, + wrpgpr_op = 0x0c + }; + + //cop1 family, the ops is in bits[25...21], 5 bits + enum cop1_ops { + mfc1_op = 0x00, + dmfc1_op = 0x01, + cfc1_op = 0x02, + mfhc1_op = 0x03, + mtc1_op = 0x04, + dmtc1_op = 0x05, + ctc1_op = 0x06, + mthc1_op = 0x07, + bc1f_op = 0x08, + single_fmt = 0x10, + double_fmt = 0x11, + word_fmt = 0x14, + long_fmt = 0x15, + ps_fmt = 0x16 + }; + + + //2 bist (bits[17...16]) of bc1x instructions (cop1) + enum bc_ops { + bcf_op = 0x0, + bct_op = 0x1, + bcfl_op = 0x2, + bctl_op = 0x3, + }; + + // low 6 bits of c_x_fmt instructions (cop1) + enum c_conds { + f_cond = 0x30, + un_cond = 0x31, + eq_cond = 0x32, + ueq_cond = 0x33, + olt_cond = 0x34, + ult_cond = 0x35, + ole_cond = 0x36, + ule_cond = 0x37, + sf_cond = 0x38, + ngle_cond = 0x39, + seq_cond = 0x3a, + ngl_cond = 0x3b, + lt_cond = 0x3c, + nge_cond = 0x3d, + le_cond = 0x3e, + ngt_cond = 0x3f + }; + + // low 6 bits of cop1 instructions + enum float_ops { + fadd_op = 0x00, + fsub_op = 0x01, + fmul_op = 0x02, + fdiv_op = 0x03, + fsqrt_op = 0x04, + fabs_op = 0x05, + fmov_op = 0x06, + fneg_op = 0x07, + froundl_op = 0x08, + ftruncl_op = 0x09, + fceill_op = 0x0a, + ffloorl_op = 0x0b, + froundw_op = 0x0c, + ftruncw_op = 0x0d, + fceilw_op = 0x0e, + ffloorw_op = 0x0f, + movf_f_op = 0x11, + movt_f_op = 0x11, + movz_f_op = 0x12, + movn_f_op = 0x13, + frecip_op = 0x15, + frsqrt_op = 0x16, + fcvts_op = 0x20, + fcvtd_op = 0x21, + fcvtw_op = 0x24, + fcvtl_op = 0x25, + fcvtps_op = 0x26, + fcvtspl_op = 0x28, + fpll_op = 0x2c, + fplu_op = 0x2d, + fpul_op = 0x2e, + fpuu_op = 0x2f + }; + + static const char* cop1_name[]; + + //cop1x family, the opcode is in low 6 bits. + enum cop1x_ops { + lwxc1_op = 0x00, + ldxc1_op = 0x01, + luxc1_op = 0x05, + swxc1_op = 0x08, + sdxc1_op = 0x09, + suxc1_op = 0x0d, + prefx_op = 0x0f, + + alnv_ps_op = 0x1e, + madd_s_op = 0x20, + madd_d_op = 0x21, + madd_ps_op = 0x26, + msub_s_op = 0x28, + msub_d_op = 0x29, + msub_ps_op = 0x2e, + nmadd_s_op = 0x30, + nmadd_d_op = 0x31, + nmadd_ps_op = 0x36, + nmsub_s_op = 0x38, + nmsub_d_op = 0x39, + nmsub_ps_op = 0x3e + }; + + static const char* cop1x_name[]; + + //special2 family, the opcode is in low 6 bits. + enum special2_ops { + madd_op = 0x00, + maddu_op = 0x01, + mul_op = 0x02, + gs0x03_op = 0x03, + msub_op = 0x04, + msubu_op = 0x05, + gs0x06_op = 0x06, + gsemul2_op = 0x07, + gsemul3_op = 0x08, + gsemul4_op = 0x09, + gsemul5_op = 0x0a, + gsemul6_op = 0x0b, + gsemul7_op = 0x0c, + gsemul8_op = 0x0d, + gsemul9_op = 0x0e, + gsemul10_op = 0x0f, + gsmult_op = 0x10, + gsdmult_op = 0x11, + gsmultu_op = 0x12, + gsdmultu_op = 0x13, + gsdiv_op = 0x14, + gsddiv_op = 0x15, + gsdivu_op = 0x16, + gsddivu_op = 0x17, + gsmod_op = 0x1c, + gsdmod_op = 0x1d, + gsmodu_op = 0x1e, + gsdmodu_op = 0x1f, + clz_op = 0x20, + clo_op = 0x21, + xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX + gsrxr_x_op = 0x23, //gsX + dclz_op = 0x24, + dclo_op = 0x25, + gsle_op = 0x26, + gsgt_op = 0x27, + gs86j_op = 0x28, + gsloop_op = 0x29, + gsaj_op = 0x2a, + gsldpc_op = 0x2b, + gs86set_op = 0x30, + gstm_op = 0x31, + gscvt_ld_op = 0x32, + gscvt_ud_op = 0x33, + gseflag_op = 0x34, + gscam_op = 0x35, + gstop_op = 0x36, + gssettag_op = 0x37, + gssdbbp_op = 0x38 + }; + + static const char* special2_name[]; + + // special3 family, the opcode is in low 6 bits. + enum special3_ops { + ext_op = 0x00, + dextm_op = 0x01, + dextu_op = 0x02, + dext_op = 0x03, + ins_op = 0x04, + dinsm_op = 0x05, + dinsu_op = 0x06, + dins_op = 0x07, + lxx_op = 0x0a, //lwx, lhx, lbux, ldx + insv_op = 0x0c, + dinsv_op = 0x0d, + ar1_op = 0x10, //MIPS DSP + cmp1_op = 0x11, //MIPS DSP + re1_op = 0x12, //MIPS DSP, re1_ops + sh1_op = 0x13, //MIPS DSP + ar2_op = 0x14, //MIPS DSP + cmp2_op = 0x15, //MIPS DSP + re2_op = 0x16, //MIPS DSP, re2_ops + sh2_op = 0x17, //MIPS DSP + ar3_op = 0x18, //MIPS DSP + bshfl_op = 0x20 //seb, seh + }; + + // re1_ops + enum re1_ops { + absq_s_qb_op = 0x01, + repl_qb_op = 0x02, + replv_qb_op = 0x03, + absq_s_ph_op = 0x09, + repl_ph_op = 0x0a, + replv_ph_op = 0x0b, + absq_s_w_op = 0x11, + bitrev_op = 0x1b + }; + + // re2_ops + enum re2_ops { + repl_ob_op = 0x02, + replv_ob_op = 0x03, + absq_s_qh_op = 0x09, + repl_qh_op = 0x0a, + replv_qh_op = 0x0b, + absq_s_pw_op = 0x11, + repl_pw_op = 0x12, + replv_pw_op = 0x13 + }; + + static const char* special3_name[]; + + // lwc2/gs_lwc2 family, the opcode is in low 6 bits. + enum gs_lwc2_ops { + gslble_op = 0x10, + gslbgt_op = 0x11, + gslhle_op = 0x12, + gslhgt_op = 0x13, + gslwle_op = 0x14, + gslwgt_op = 0x15, + gsldle_op = 0x16, + gsldgt_op = 0x17, + gslwlec1_op = 0x1c, + gslwgtc1_op = 0x1d, + gsldlec1_op = 0x1e, + gsldgtc1_op = 0x1f, + gslq_op = 0x20 + }; + + static const char* gs_lwc2_name[]; + + // ldc2/gs_ldc2 family, the opcode is in low 3 bits. + enum gs_ldc2_ops { + gslbx_op = 0x0, + gslhx_op = 0x1, + gslwx_op = 0x2, + gsldx_op = 0x3, + gslwxc1_op = 0x6, + gsldxc1_op = 0x7 + }; + + static const char* gs_ldc2_name[]; + + // swc2/gs_swc2 family, the opcode is in low 6 bits. + enum gs_swc2_ops { + gssble_op = 0x10, + gssbgt_op = 0x11, + gsshle_op = 0x12, + gsshgt_op = 0x13, + gsswle_op = 0x14, + gsswgt_op = 0x15, + gssdle_op = 0x16, + gssdgt_op = 0x17, + gsswlec1_op = 0x1c, + gsswgtc1_op = 0x1d, + gssdlec1_op = 0x1e, + gssdgtc1_op = 0x1f, + gssq_op = 0x20 + }; + + static const char* gs_swc2_name[]; + + // sdc2/gs_sdc2 family, the opcode is in low 3 bits. + enum gs_sdc2_ops { + gssbx_op = 0x0, + gsshx_op = 0x1, + gsswx_op = 0x2, + gssdx_op = 0x3, + gsswxc1_op = 0x6, + gssdxc1_op = 0x7 + }; + + static const char* gs_sdc2_name[]; + + enum WhichOperand { + // input to locate_operand, and format code for relocations + imm_operand = 0, // embedded 32-bit|64-bit immediate operand + disp32_operand = 1, // embedded 32-bit displacement or address + call32_operand = 2, // embedded 32-bit self-relative displacement + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + _WhichOperand_limit = 4 + }; + + static int opcode(int insn) { return (insn>>26)&0x3f; } + static int rs(int insn) { return (insn>>21)&0x1f; } + static int rt(int insn) { return (insn>>16)&0x1f; } + static int rd(int insn) { return (insn>>11)&0x1f; } + static int sa(int insn) { return (insn>>6)&0x1f; } + static int special(int insn) { return insn&0x3f; } + static int imm_off(int insn) { return (short)low16(insn); } + + static int low (int x, int l) { return bitfield(x, 0, l); } + static int low16(int x) { return low(x, 16); } + static int low26(int x) { return low(x, 26); } + + protected: + //help methods for instruction ejection + + // I-Type (Immediate) + // 31 26 25 21 20 16 15 0 + //| opcode | rs | rt | immediat | + //| | | | | + // 6 5 5 16 + static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } + + // R-Type (Register) + // 31 26 25 21 20 16 15 11 10 6 5 0 + //| special | rs | rt | rd | 0 | opcode | + //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | + // 6 5 5 5 5 6 + static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } + static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } + static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } + + static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } + static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } + + static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { + return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; + } + static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { + return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; + } + + static int high (int x, int l) { return bitfield(x, 32-l, l); } + static int high16(int x) { return high(x, 16); } + static int high6 (int x) { return high(x, 6); } + + //get the offset field of jump/branch instruction + int offset(address entry) { + assert(is_simm16((entry - pc() - 4) / 4), "change this code"); + if (!is_simm16((entry - pc() - 4) / 4)) { + tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); + } + return (entry - pc() - 4) / 4; + } + + +public: + using AbstractAssembler::offset; + + //sign expand with the sign bit is h + static int expand(int x, int h) { return -(x & (1<> 16; + } + + static int split_high(int x) { + return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; + } + + static int merge(int low, int high) { + return expand(low, 15) + (high<<16); + } + + static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { + return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; + } + + // Test if x is within signed immediate range for nbits. + static bool is_simm (int x, int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_simm16(int x) { return is_simm(x, 16); } + static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } + + static bool fit_in_jal(address target, address pc) { + intptr_t mask = 0xfffffffff0000000; + return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); + } + + bool fit_int_branch(address entry) { + return is_simm16(offset(entry)); + } + +protected: +#ifdef ASSERT + #define CHECK_DELAY +#endif +#ifdef CHECK_DELAY + enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; +#endif + +public: + void assert_not_delayed() { +#ifdef CHECK_DELAY + assert(delay_state == no_delay, "next instruction should not be a delay slot"); +#endif + } + +protected: + // Delay slot helpers + // cti is called when emitting control-transfer instruction, + // BEFORE doing the emitting. + // Only effective when assertion-checking is enabled. + + // called when emitting cti with a delay slot, AFTER emitting + void has_delay_slot() { +#ifdef CHECK_DELAY + assert(delay_state == no_delay, "just checking"); + delay_state = at_delay_slot; +#endif + } + +public: + Assembler* delayed() { +#ifdef CHECK_DELAY + guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); + delay_state = filling_delay_slot; +#endif + return this; + } + + void flush() { +#ifdef CHECK_DELAY + guarantee( delay_state == no_delay, "ending code with a delay slot"); +#endif + AbstractAssembler::flush(); + } + + void emit_long(int); // shadows AbstractAssembler::emit_long + void emit_data(int); + void emit_data(int, RelocationHolder const&); + void emit_data(int, relocInfo::relocType rtype); + void check_delay(); + + + // Generic instructions + // Does 32bit or 64bit as needed for the platform. In some sense these + // belong in macro assembler but there is no need for both varieties to exist + + void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } + void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} + void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } + + void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } + void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } + void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } + void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } + void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } + void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } + void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } + void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } + void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } + void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + // two versions of brk: + // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set + // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) + // both versions work + void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } + void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } + + void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } + void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} + void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } + void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } + void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } + void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } + void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } + void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } + void blez (Register rs, address entry) { blez (rs, offset(entry)); } + void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } + void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } + void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } + void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } + void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } + void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } + void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } + + void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } + void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } + void bgez (Register rs, Label& L){ bgez (rs, target(L)); } + void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } + void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } + void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } + void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } + void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } + void blez (Register rs, Label& L){ blez (rs, target(L)); } + void blezl (Register rs, Label& L){ blezl (rs, target(L)); } + void bltz (Register rs, Label& L){ bltz (rs, target(L)); } + void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } + void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } + void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } + void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } + void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } + + void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } + void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } + void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } + + void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } + void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } + + void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } + void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } + + enum bshfl_ops { + seb_op = 0x10, + seh_op = 0x18 + }; + void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } + void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } + + void ext (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); + + int lsb = pos; + int msbd = size - 1; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); + } + + void dext (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); + + int lsb = pos; + int msbd = size - 1; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); + } + + void dextm (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); + guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); + + int lsb = pos; + int msbd = size - 1 - 32; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); + } + + void rotr (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); + } + + void drotr (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); + } + + void drotr32 (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); + } + + void rotrv (Register rd, Register rt, Register rs) { + emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); + } + + void drotrv (Register rd, Register rt, Register rs) { + emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); + } + + void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } + void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } + void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } + void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } + void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } + void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } + void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } + void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } + void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } + void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } + void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } + void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } + void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } + void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } + + void b(int off) { beq(R0, R0, off); } + void b(address entry) { b(offset(entry)); } + void b(Label& L) { b(target(L)); } + + void j(address entry); + void jal(address entry); + + void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } + void jalr(Register rs) { jalr(RA, rs); } + void jalr() { jalr(RT9); } + + void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } + void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } + + void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } + void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + + void lb (Register rt, Address src); + void lbu(Register rt, Address src); + void ld (Register rt, Address src); + void ldl(Register rt, Address src); + void ldr(Register rt, Address src); + void lh (Register rt, Address src); + void lhu(Register rt, Address src); + void ll (Register rt, Address src); + void lld(Register rt, Address src); + void lw (Register rt, Address src); + void lwl(Register rt, Address src); + void lwr(Register rt, Address src); + void lwu(Register rt, Address src); + void lea(Register rt, Address src); + void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } + + void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } + void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } + void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } + void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } + + void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } + void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } + + void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } + + void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } + void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } + void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } + void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } + void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } + void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } + void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } + void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } + void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } + + void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } + void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } + void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } + void sync () { + if (os::is_ActiveCoresMP()) + emit_long(0); + else + emit_long(sync_op); + } + void syscall(int code) { emit_long( (code<<6) | syscall_op ); } + + void sb(Register rt, Address dst); + void sc(Register rt, Address dst); + void scd(Register rt, Address dst); + void sd(Register rt, Address dst); + void sdl(Register rt, Address dst); + void sdr(Register rt, Address dst); + void sh(Register rt, Address dst); + void sw(Register rt, Address dst); + void swl(Register rt, Address dst); + void swr(Register rt, Address dst); + + void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } + void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } + void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } + void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } + void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } + void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } + void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } + void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } + void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } + void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } + void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } + void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } + + void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } + void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void nop() { emit_long(0); } + + + + void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void ldc1(FloatRegister ft, Address src); + void lwc1(FloatRegister ft, Address src); + + //COP0 + void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } + void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } + // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet + void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } + void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } + //COP0 end + + + //COP1 + void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } + void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } + void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } + void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } + + void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } + void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } + void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } + void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } + + void bc1f (address entry) { bc1f(offset(entry)); } + void bc1fl(address entry) { bc1fl(offset(entry)); } + void bc1t (address entry) { bc1t(offset(entry)); } + void bc1tl(address entry) { bc1tl(offset(entry)); } + + void bc1f (Label& L) { bc1f(target(L)); } + void bc1fl(Label& L) { bc1fl(target(L)); } + void bc1t (Label& L) { bc1t(target(L)); } + void bc1tl(Label& L) { bc1tl(target(L)); } + +//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. +#define INSN_SINGLE(r1, r2, r3, op) \ + { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} + void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} + void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} + void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} + void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} + void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} + void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} + void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} + void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} + void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} + void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} + void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} + void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} + void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} + void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} + void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} + //null + void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} + void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} + //null + void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} + void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} + //null + void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} + //null + void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} + void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} + void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} + //null + void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} + void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} + void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} + void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} + void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} + void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} + void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} + void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} + void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} + void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} + void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} + void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} + void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} + void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} + void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} + void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} + +#undef INSN_SINGLE + + +//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. +#define INSN_DOUBLE(r1, r2, r3, op) \ + { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + + void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} + void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} + void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} + void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} + void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} + void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} + void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} + void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} + void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} + void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} + void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} + void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} + void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} + void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} + void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} + void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} + //null + void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} + void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} + //null + void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} + void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} + //null + void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} + void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} + //null + void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} + //null + void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} + void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} + void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} + void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} + void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} + void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} + void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} + void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} + void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} + void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} + void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} + void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} + void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} + void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} + void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} + void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} + +#undef INSN_DOUBLE + + + //null + void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } + void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } + //null + void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } + void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } + //null + + +//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. +#define INSN_PS(r1, r2, r3, op) \ + { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + + void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} + void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} + void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} + //null + void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} + void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} + void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} + //null + //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} + //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } + void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} + void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} + //null + void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} + //null + void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} + //null + void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} + void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} + void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} + void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} + void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} + void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} + void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} + void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} + void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} + void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} + void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} + void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} + void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} + void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} + void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} + void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} + void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} + void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} + void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} + void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} + //null +#undef INSN_PS + //COP1 end + + + //COP1X +//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. +#define INSN_COP1X(r0, r1, r2, r3, op) \ + { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } + void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } + void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } + void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } + void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } + void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } + void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } + void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } + void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } + void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } + void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } + void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } +#undef INSN_COP1X + //COP1X end + + //SPECIAL2 +//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. +#define INSN_S2(op) \ + { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} + + void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } + void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } + void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } + void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } + void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } + void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } + void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } + + void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } + void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } + void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } + void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} + void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } + void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } + void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } + void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } + void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } + void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } + void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } + void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } + void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } + void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } + void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } + void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } + void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } + void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } + void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } + void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } + +#undef INSN_S2 + + //SPECIAL3 +/* +// FIXME +#define is_0_to_32(a, b) \ + assert (a >= 0, " just a check"); \ + assert (a <= 0, " just a check"); \ + assert (b >= 0, " just a check"); \ + assert (b <= 0, " just a check"); \ + assert (a+b >= 0, " just a check"); \ + assert (a+b <= 0, " just a check"); + */ +#define is_0_to_32(a, b) + + void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } + void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } + void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } + void dins (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); + } + + void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } + void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } + void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } + void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } + + void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } + void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } + void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } + void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } + void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } + void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } + + void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void sdc1(FloatRegister ft, Address dst); + void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void swc1(FloatRegister ft, Address dst); + + + static void print_instruction(int); + int patched_branch(int dest_pos, int inst, int inst_pos); + int branch_destination(int inst, int pos); + + // Loongson extension + + // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". + void gslble(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); + } + + void gslbgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); + } + + void gslhle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); + } + + void gslhgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); + } + + void gslwle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); + } + + void gslwgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); + } + + void gsldle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); + } + + void gsldgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); + } + + void gslwlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); + } + + void gslwgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); + } + + void gsldlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); + } + + void gsldgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); + } + + void gslq(Register rq, Register rt, Register base, int off) { + assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); + } + + void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { + assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); + } + + void gssble(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); + } + + void gssbgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); + } + + void gsshle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); + } + + void gsshgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); + } + + void gsswle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); + } + + void gsswgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); + } + + void gssdle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); + } + + void gssdgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); + } + + void gsswlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); + } + + void gsswgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); + } + + void gssdlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); + } + + void gssdgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); + } + + void gssq(Register rq, Register rt, Register base, int off) { + assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); + } + + void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { + assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); + } + + //LDC2 & SDC2 +#define INSN(OPS, OP) \ + assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ + assert(UseLEXT1, "check UseLEXT1"); \ + emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ + ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); + +#define INSN_LDC2(NAME, op) \ + void NAME(Register rt, Register base, Register index, int off) { \ + INSN(gs_ldc2_op, op) \ + } + +#define INSN_LDC2_F(NAME, op) \ + void NAME(FloatRegister rt, Register base, Register index, int off) { \ + INSN(gs_ldc2_op, op) \ + } + +#define INSN_SDC2(NAME, op) \ + void NAME(Register rt, Register base, Register index, int off) { \ + INSN(gs_sdc2_op, op) \ + } + +#define INSN_SDC2_F(NAME, op) \ + void NAME(FloatRegister rt, Register base, Register index, int off) { \ + INSN(gs_sdc2_op, op) \ + } + +/* + void gslbx(Register rt, Register base, Register index, int off) { + assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); + assert(UseLEXT1, "check UseLEXT1"); + emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | + ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); + void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} + + INSN_LDC2(gslbx, gslbx_op) + INSN_LDC2(gslhx, gslhx_op) + INSN_LDC2(gslwx, gslwx_op) + INSN_LDC2(gsldx, gsldx_op) + INSN_LDC2_F(gslwxc1, gslwxc1_op) + INSN_LDC2_F(gsldxc1, gsldxc1_op) + + INSN_SDC2(gssbx, gssbx_op) + INSN_SDC2(gsshx, gsshx_op) + INSN_SDC2(gsswx, gsswx_op) + INSN_SDC2(gssdx, gssdx_op) + INSN_SDC2_F(gsswxc1, gsswxc1_op) + INSN_SDC2_F(gssdxc1, gssdxc1_op) +*/ + void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } + void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } + void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } + void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } + void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } + void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } + + void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } + void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } + void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } + void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } + void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } + void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } + +#undef INSN +#undef INSN_LDC2 +#undef INSN_LDC2_F +#undef INSN_SDC2 +#undef INSN_SDC2_F + + // cpucfg on Loongson CPUs above 3A4000 + void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} + + enum Membar_mask_bits { + StoreStore = 1 << 3, + LoadStore = 1 << 2, + StoreLoad = 1 << 1, + LoadLoad = 1 << 0 + }; + + // Serializes memory and blows flags + void membar(Membar_mask_bits order_constraint) { + sync(); + } + +public: + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef CHECK_DELAY + delay_state = no_delay; +#endif + } + + // Decoding + static address locate_operand(address inst, WhichOperand which); + static address locate_next_instruction(address inst); +}; + +#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp new file mode 100644 index 00000000000..f35a06fc4e2 --- /dev/null +++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp new file mode 100644 index 00000000000..4172db219b1 --- /dev/null +++ b/src/hotspot/cpu/mips/bytes_mips.hpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP +#define CPU_MIPS_VM_BYTES_MIPS_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. + // we use mipsel, so return true + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since x86 CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { + if ((intptr_t)p & 0x1) { + return ((u2)p[1] << 8) | (u2)p[0]; + } else { + return *(u2*)p; + } + } + + static inline u4 get_native_u4(address p) { + if ((intptr_t)p & 3) { + u4 res; + __asm__ __volatile__ ( + " .set push\n" + " .set mips64\n" + " .set noreorder\n" + + " lwr %[res], 0(%[addr]) \n" + " lwl %[res], 3(%[addr]) \n" + + " .set pop" + : [res] "=&r" (res) + : [addr] "r" (p) + : "memory" + ); + return res; + } else { + return *(u4*)p; + } + } + + static inline u8 get_native_u8(address p) { + u8 res; + u8 temp = 0; + // u4 tp;//tmp register + __asm__ __volatile__ ( + " .set push\n" + " .set mips64\n" + " .set noreorder\n" + " .set noat\n" + " andi $1,%[addr],0x7 \n" + " beqz $1,1f \n" + " nop \n" + " ldr %[temp], 0(%[addr]) \n" + " ldl %[temp], 7(%[addr]) \n" + " b 2f \n" + " nop \n" + " 1:\t ld %[temp],0(%[addr]) \n" + " 2:\t sd %[temp], %[res] \n" + + " .set at\n" + " .set pop\n" + : [addr]"=r"(p), [temp]"=r" (temp) + : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) + : "memory" + ); + + return res; + } + + //use mips unaligned load instructions + static inline void put_native_u2(address p, u2 x) { + if((intptr_t)p & 0x1) { + p[0] = (u_char)(x); + p[1] = (u_char)(x>>8); + } else { + *(u2*)p = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while mips is little-endian + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while mips is little-endian + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[1] = x >> 32; + ((u4*)p)[0] = x; + break; + + case 2: ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[7] = x >> 56; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since MIPS64EL CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#include OS_CPU_HEADER_INLINE(bytes) + +#endif // CPU_MIPS_VM_BYTES_MIPS_HPP diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp new file mode 100644 index 00000000000..ef11827abfe --- /dev/null +++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +#ifdef CC_INTERP +define_pd_global(bool, ProfileInterpreter, false); +#else +define_pd_global(bool, ProfileInterpreter, true); +#endif // CC_INTERP +// Disable C1 in server JIT +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 10000); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 6); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 13); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); + +define_pd_global(intx, ReservedCodeCacheSize, 120*M); +define_pd_global(intx, NonProfiledCodeHeapSize, 57*M); +define_pd_global(intx, ProfiledCodeHeapSize, 58*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); + +// Heap related flags +define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp new file mode 100644 index 00000000000..e6d5815f424 --- /dev/null +++ b/src/hotspot/cpu/mips/c2_init_mips.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for mips + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); +} diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp new file mode 100644 index 00000000000..3cc191006d4 --- /dev/null +++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp new file mode 100644 index 00000000000..068ca4799d7 --- /dev/null +++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp @@ -0,0 +1,151 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + + if (mark == NULL) { + mark = cbuf.insts_mark(); // get mark within main instrs section + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (base == NULL) return NULL; // CodeBuffer::expand failed + // static stub relocation stores the instruction address of the call + + __ relocate(static_stub_Relocation::spec(mark), 0); + + // Code stream for loading method may be changed. + __ synci(R0, 0); + + // Rmethod contains methodOop, it should be relocated for GC + // static stub relocation also tags the methodOop in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + + __ relocate(relocInfo::runtime_call_type); + + cbuf.set_insts_mark(); + address call_pc = (address)-1; + __ patchable_jump(call_pc); + __ align(16); + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; + return round_to(size, 16); +} + +int CompiledStaticCall::to_trampoline_stub_size() { + return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 16; +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(false /* is_aot */); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + if (os::is_MP()) { + _call->verify_alignment(); + } + + // Verify stub. + address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp new file mode 100644 index 00000000000..dcc77adfec1 --- /dev/null +++ b/src/hotspot/cpu/mips/copy_mips.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_COPY_MIPS_HPP +#define CPU_MIPS_VM_COPY_MIPS_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#include OS_CPU_HEADER_INLINE(copy) + +// Template for atomic, element-wise copy. +template +static void copy_conjoint_atomic(const T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif //CPU_MIPS_VM_COPY_MIPS_HPP diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp new file mode 100644 index 00000000000..756ccb68f9c --- /dev/null +++ b/src/hotspot/cpu/mips/depChecker_mips.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_mips.hpp" + +// Nothing to do on mips diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp new file mode 100644 index 00000000000..11e52b4e8f8 --- /dev/null +++ b/src/hotspot/cpu/mips/depChecker_mips.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP + +// Nothing to do on MIPS + +#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp new file mode 100644 index 00000000000..c5f3a8888dd --- /dev/null +++ b/src/hotspot/cpu/mips/disassembler_mips.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "gpr-names=64"; + } + +#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp new file mode 100644 index 00000000000..d49bd6290d4 --- /dev/null +++ b/src/hotspot/cpu/mips/frame_mips.cpp @@ -0,0 +1,690 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_mips.inline.hpp" + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support +// for Profiling - acting on another frame. walks sender frames +// if valid. +// frame profile_find_Java_sender_frame(JavaThread *thread); + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? + JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && + (unextended_sp >= sp); + + if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } + sender_unextended_sp = sender_sp; + // On MIPS the return_address is always the word on the stack + sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // FP is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); + + return jcw_safe; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +#ifdef CC_INTERP +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + // QQQ why does this specialize method exist if frame::sender_sp() does same thing? + // seems odd and if we always know interpreted vs. non then sender_sp() is really + // doing too much work. + return get_interpreterState()->sender_sp(); +} + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*) get_interpreterState()->stack_base(); +} + +#else // CC_INTERP + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); + assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} +#endif // CC_INTERP + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + if (jfa->last_Java_pc() != NULL ) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; + } + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // sp is the raw sp from the sender after adapter or interpreter extension + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + + // The interpreter and compiler(s) always save FP in a known + // location on entry. We must record where that location is + // so this if FP was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves FP if we record where it is then + // we don't have to always save FP on entry and exit to c2 compiled + // code, on entry will be enough. +#ifdef COMPILER2 + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif /* COMPILER2 */ + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On MIPS, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(FP->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(FP->as_VMReg()->next(), (address) link_addr); + } +} + +//------------------------------sender_for_compiled_frame----------------------- +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + + // On Loongson the return_address is always the word on the stack + // the fp in compiler points to sender fp, but in interpreter, fp points to return address, + // so getting sender for compiled frame is not same as interpreter frame. + // we hard code here temporarily + // spark + address sender_pc = (address) *(sender_sp-1); + + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of epb there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +// QQQ +#ifdef CC_INTERP +#else + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) return false; + + // stack frames shouldn't be much larger than max_stack elements + + //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { + if (fp() - sp() > 4096) { // stack frames shouldn't be large. + return false; + } + + // validate bci/bcp + + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate ConstantPoolCache* + + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + + if (MetaspaceObj::is_valid(cp) == false) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + +#endif // CC_INTERP + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the interpreterState object + assert(false, "NYI"); + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + tos_addr += 2; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { +#ifdef CC_INTERP + obj = istate->_oop_temp; +#else + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); +#endif // CC_INTERP + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} + +void frame::pd_ps() {} +#endif diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp new file mode 100644 index 00000000000..bdbfa8aaa2d --- /dev/null +++ b/src/hotspot/cpu/mips/frame_mips.hpp @@ -0,0 +1,215 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP +#define CPU_MIPS_VM_FRAME_MIPS_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp +// [monitors ] \ +// ... | monitor block size +// [monitors ] / +// [monitor block size ] +// [byte code index/pointr] = bcx() bcx_offset +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset +// [last sp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset +// [old frame pointer ] <- fp = link() +// [return pc ] +// [oop temp ] (only for native calls) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C++ interpreter ---------------------------------------- +// +// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) +// +// <- SP (current sp) +// [local variables ] BytecodeInterpreter::run local variables +// ... BytecodeInterpreter::run local variables +// [local variables ] BytecodeInterpreter::run local variables +// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] +// [return pc ] (return to frame manager) +// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- +// [expression stack ] <- last_Java_sp | +// [... ] * <- interpreter_state.stack | +// [expression stack ] * <- interpreter_state.stack_base | +// [monitors ] \ | +// ... | monitor block size | +// [monitors ] / <- interpreter_state.monitor_base | +// [struct interpretState ] <-----------------------------------------| +// [return pc ] (return to callee of frame manager [1] +// [locals and parameters ] +// <- sender sp + +// [1] When the c++ interpreter calls a new method it returns to the frame +// manager which allocates a new frame on the stack. In that case there +// is no real callee of this newly allocated frame. The frame manager is +// aware of the additional frame(s) and will pop them as nested calls +// complete. Howevers tTo make it look good in the debugger the frame +// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation +// with a fake interpreter_state* parameter to make it easy to debug +// nested calls. + +// Note that contrary to the layout for the assembly interpreter the +// expression stack allocated for the C++ interpreter is full sized. +// However this is not as bad as it seems as the interpreter frame_manager +// will truncate the unused space on succesive method calls. +// +// ------------------------------ C++ interpreter ---------------------------------------- + +// Layout of interpreter frame: +// +// [ monitor entry ] <--- sp +// ... +// [ monitor entry ] +// -9 [ monitor block top ] ( the top monitor entry ) +// -8 [ byte code pointer ] (if native, bcp = 0) +// -7 [ constant pool cache ] +// -6 [ methodData ] mdx_offset(not core only) +// -5 [ mirror ] +// -4 [ methodOop ] +// -3 [ locals offset ] +// -2 [ last_sp ] +// -1 [ sender's sp ] +// 0 [ sender's fp ] <--- fp +// 1 [ return address ] +// 2 [ oop temp offset ] (only for native calls) +// 3 [ result handler offset ] (only for native calls) +// 4 [ result type info ] (only for native calls) +// [ local var m-1 ] +// ... +// [ local var 0 ] +// [ argumnet word n-1 ] <--- ( sender's sp ) +// ... +// [ argument word 0 ] <--- S7 + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + // non-interpreter frames + sender_sp_offset = 2, + + // Interpreter frames + interpreter_frame_return_addr_offset = 1, + interpreter_frame_result_handler_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_fp_offset = 0, + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + entry_frame_call_wrapper_offset = -9, + + // Native frames + + native_frame_initial_param_offset = 2 + + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved FP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +#endif // CPU_MIPS_VM_FRAME_MIPS_HPP diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp new file mode 100644 index 00000000000..c408f01d69a --- /dev/null +++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp @@ -0,0 +1,238 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for Loongson frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline intptr_t* frame::link() const { + return (intptr_t*) *(intptr_t **)addr_at(link_offset); +} + +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = (intptr_t **)addr_at(link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL ) { + return sp(); + } else { + // sp() may have been extended by an adapter + assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + +// Compiled frames + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(V0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(V0->as_VMReg())) = obj; +} + +#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp new file mode 100644 index 00000000000..179f7703c87 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + + if (!dest_uninitialized) { +#ifndef OPT_THREAD + Register thread = T9; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + Label filtered; + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lw(AT, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lb(AT, in_progress); + } + + __ beq(AT, R0, filtered); + __ delayed()->nop(); + + __ pushad(); // push registers + if (count == A0) { + if (addr == A1) { + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, addr); + } + } else { + __ move(A0, addr); + __ move(A1, count); + } + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + } + __ popad(); + + __ bind(filtered); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) { + __ pushad(); // push registers (overkill) + if (count == A0) { + assert_different_registers(A1, addr); + __ move(A1, count); + __ move(A0, addr); + } else { + assert_different_registers(A0, count); + __ move(A0, addr); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); + __ popad(); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + thread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == TREG, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != V0, "check this code"); + } + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lw(AT, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lb(AT, in_progress); + } + __ beq(AT, R0, done); + __ delayed()->nop(); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + __ beq(pre_val, R0, done); + __ delayed()->nop(); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ld(tmp, index); + __ beq(tmp, R0, runtime); + __ delayed()->nop(); + + __ daddiu(tmp, tmp, -1 * wordSize); + __ sd(tmp, index); + __ ld(AT, buffer); + __ daddu(tmp, tmp, AT); + + // Record the previous value + __ sd(pre_val, tmp, 0); + __ beq(R0, R0, done); + __ delayed()->nop(); + + __ bind(runtime); + // save the live input values + if (tosca_live) __ push(V0); + + if (obj != noreg && obj != V0) __ push(obj); + + if (pre_val != V0) __ push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + assert(pre_val != A1, "smashed arg"); + if (thread != A1) __ move(A1, thread); + if (pre_val != A0) __ move(A0, pre_val); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + // save the live input values + if (pre_val != V0) + __ pop(pre_val); + + if (obj != noreg && obj != V0) + __ pop(obj); + + if (tosca_live) __ pop(V0); + + __ bind(done); +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert_different_registers(tmp, tmp2, AT); + assert(thread == TREG, "must be"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + __ xorr(AT, store_addr, new_val); + __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); + __ beq(AT, R0, done); + __ delayed()->nop(); + + // crosses regions, storing NULL? + __ beq(new_val, R0, done); + __ delayed()->nop(); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp; + const Register cardtable = tmp2; + + __ move(card_addr, store_addr); + __ dsrl(card_addr, card_addr, CardTable::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ daddu(card_addr, card_addr, cardtable); + + __ lb(AT, card_addr, 0); + __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); + __ beq(AT, R0, done); + __ delayed()->nop(); + + __ sync(); + __ lb(AT, card_addr, 0); + __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val()); + __ beq(AT, R0, done); + __ delayed()->nop(); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + __ move(AT, (int)G1CardTable::dirty_card_val()); + __ sb(AT, card_addr, 0); + + __ lw(AT, queue_index); + __ beq(AT, R0, runtime); + __ delayed()->nop(); + __ daddiu(AT, AT, -1 * wordSize); + __ sw(AT, queue_index); + __ ld(tmp2, buffer); + __ ld(AT, queue_index); + __ daddu(tmp2, tmp2, AT); + __ sd(card_addr, tmp2, 0); + __ beq(R0, R0, done); + __ delayed()->nop(); + + __ bind(runtime); + // save the live input values + __ push(store_addr); + __ push(new_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); + __ pop(new_val); + __ pop(store_addr); + + __ bind(done); +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + + bool needs_pre_barrier = as_normal; + bool needs_post_barrier = val != noreg && in_heap; + + Register tmp3 = RT3; + Register rthread = TREG; + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (dst.index() == noreg && dst.disp() == 0) { + if (dst.base() != tmp3) { + __ move(tmp3, dst.base()); + } + } else { + __ lea(tmp3, dst); + } + + if (needs_pre_barrier) { + g1_write_barrier_pre(masm /*masm*/, + tmp3 /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + } + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); + } else { + Register new_val = val; + if (needs_post_barrier) { + // G1 barrier needs uncompressed oop for region cross check. + if (UseCompressedOops) { + new_val = tmp2; + __ move(new_val, val); + } + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); + if (needs_post_barrier) { + g1_write_barrier_post(masm /*masm*/, + tmp3 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } + } +} diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp new file mode 100644 index 00000000000..ec5c243c3f1 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP +#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class LIR_Assembler; +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { + protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); + + void g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + public: + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp new file mode 100644 index 00000000000..071debdc3a3 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (UseCompressedOops) { + __ lwu(dst, src); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); + } + } else + { + __ ld_ptr(dst, src); + } + } else { + assert(in_native, "why else?"); + __ ld_ptr(dst, src); + } + break; + } + case T_BOOLEAN: __ lbu (dst, src); break; + case T_BYTE: __ lb (dst, src); break; + case T_CHAR: __ lhu (dst, src); break; + case T_SHORT: __ lh (dst, src); break; + case T_INT: __ lw (dst, src); break; + case T_LONG: __ ld (dst, src); break; + case T_ADDRESS: __ ld_ptr(dst, src); break; + case T_FLOAT: + assert(dst == noreg, "only to ftos"); + __ lwc1(FSF, src); + break; + case T_DOUBLE: + assert(dst == noreg, "only to dtos"); + __ ldc1(FSF, src); + break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + if (UseCompressedOops) { + __ sw(R0, dst); + } else { + __ sd(R0, dst); + } + } else { + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ sw(val, dst); + } else + { + __ st_ptr(val, dst); + } + } + } else { + assert(in_native, "why else?"); + assert(val != noreg, "not supported"); + __ st_ptr(val, dst); + } + break; + } + case T_BOOLEAN: + __ andi(val, val, 0x1); // boolean is true if LSB is 1 + __ sb(val, dst); + break; + case T_BYTE: + __ sb(val, dst); + break; + case T_SHORT: + __ sh(val, dst); + break; + case T_CHAR: + __ sh(val, dst); + break; + case T_INT: + __ sw(val, dst); + break; + case T_LONG: + __ sd(val, dst); + break; + case T_FLOAT: + assert(val == noreg, "only tos"); + __ swc1(FSF, dst); + break; + case T_DOUBLE: + assert(val == noreg, "only tos"); + __ sdc1(FSF, dst); + break; + case T_ADDRESS: + __ st_ptr(val, dst); + break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::obj_equals(MacroAssembler* masm, + Register obj1, Address obj2) { + Unimplemented(); +} + +void BarrierSetAssembler::obj_equals(MacroAssembler* masm, + Register obj1, Register obj2) { + Unimplemented(); +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + __ clear_jweak_tag(obj); + __ ld_ptr(obj, Address(obj, 0)); +} + +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + Unimplemented(); +} + +// Defines obj, preserves var_size_in_bytes +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + Unimplemented(); +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + Unimplemented(); +} diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp new file mode 100644 index 00000000000..b97ecbcca50 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP +#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class InterpreterMacroAssembler; + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch = NOREG) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch = NOREG) {} + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void obj_equals(MacroAssembler* masm, + Register obj1, Register obj2); + virtual void obj_equals(MacroAssembler* masm, + Register obj1, Address obj2); + + virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { + // Default implementation does not need to do anything. + } + + // Support for jniFastGetField to try resolving a jobject/jweak in native + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, Register t2, + Label& slow_case); + virtual void eden_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case); + + virtual void barrier_stubs_init() {} +}; + +#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp new file mode 100644 index 00000000000..f33165334c7 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" + +#define __ masm-> + +#define T9 RT9 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) { + BarrierSet *bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + intptr_t disp = (intptr_t) ct->byte_map_base(); + + Label L_loop, L_done; + const Register end = count; + assert_different_registers(addr, end); + + __ beq(count, R0, L_done); // zero count - nothing to do + __ delayed()->nop(); + + if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore); + + __ set64(tmp, disp); + + __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size + __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive + __ shr(addr, CardTable::card_shift); + __ shr(end, CardTable::card_shift); + __ dsubu(end, end, addr); // end --> cards count + + __ daddu(addr, addr, tmp); + + __ BIND(L_loop); + if (UseLEXT1) { + __ gssbx(R0, addr, count, 0); + } else { + __ daddu(AT, addr, count); + __ sb(R0, AT, 0); + } + __ daddiu(count, count, -1); + __ bgez(count, L_loop); + __ delayed()->nop(); + + __ BIND(L_done); +} + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + BarrierSet* bs = BarrierSet::barrier_set(); + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ shr(obj, CardTable::card_shift); + + Address card_addr; + + intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); + Register tmp = T9; + assert_different_registers(tmp, obj); + __ li(tmp, byte_map_base); + __ addu(tmp, tmp, obj); + + assert(CardTable::dirty_card_val() == 0, "must be"); + + jbyte dirty = CardTable::dirty_card_val(); + if (UseCondCardMark) { + Label L_already_dirty; + __ membar(Assembler::StoreLoad); + __ lb(AT, tmp, 0); + __ addiu(AT, AT, -1 * dirty); + __ beq(AT, R0, L_already_dirty); + __ delayed()->nop(); + __ sb(R0, tmp, 0); + __ bind(L_already_dirty); + } else { + if (ct->scanned_concurrently()) { + __ membar(Assembler::StoreStore); + } + __ sb(R0, tmp, 0); + } +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || (dst.index() == noreg && dst.disp() == 0)) { + store_check(masm, dst.base(), dst); + } else { + __ lea(tmp1, dst); + store_check(masm, tmp1, dst); + } + } +} diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp new file mode 100644 index 00000000000..49c2a0ea80e --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP +#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Address dst); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp new file mode 100644 index 00000000000..765259e6266 --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch) { + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch) { + if (is_oop) { + gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch); + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (type == T_OBJECT || type == T_ARRAY) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp new file mode 100644 index 00000000000..5320a4c0add --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP +#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) {} + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch = NOREG); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register dst, Register count, Register scratch = NOREG); + + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp new file mode 100644 index 00000000000..abf8141e8bc --- /dev/null +++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +// Size of MIPS Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +// If set, SharedRuntime::c_calling_convention() must adapt +// signatures accordingly. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +#define SUPPORT_RESERVED_STACK_AREA + +#define THREAD_LOCAL_POLL + +#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp new file mode 100644 index 00000000000..3bcad005d1c --- /dev/null +++ b/src/hotspot/cpu/mips/globals_mips.hpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP +#define CPU_MIPS_VM_GLOBALS_MIPS_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64); +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +// MIPS generates 3x instructions than X86 +define_pd_global(intx, InlineSmallCode, 4000); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) +#define DEFAULT_STACK_RESERVED_PAGES (1) +define_pd_global(uintx, TLABSize, 0); +define_pd_global(uintx, NewSize, 1024 * K); +define_pd_global(intx, PreInflateSpin, 10); + +define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); +define_pd_global(intx, PrefetchScanIntervalInBytes, -1); +define_pd_global(intx, PrefetchFieldsAhead, -1); + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(intx, StackYellowPages, 2); +define_pd_global(intx, StackRedPages, 1); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); +define_pd_global(bool, UseMembar, true); +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + +define_pd_global(bool, ThreadLocalHandshakes, true); +// Only c2 cares about this at the moment +define_pd_global(intx, AllocatePrefetchStyle, 2); +define_pd_global(intx, AllocatePrefetchDistance, -1); + +#define ARCH_FLAGS(develop, \ + product, \ + diagnostic, \ + experimental, \ + notproduct, \ + range, \ + constraint, \ + writeable) \ + \ + product(bool, UseLEXT1, false, \ + "Use LoongISA general EXTensions 1") \ + \ + product(bool, UseLEXT2, false, \ + "Use LoongISA general EXTensions 2") \ + \ + product(bool, UseLEXT3, false, \ + "Use LoongISA general EXTensions 3") \ + \ + product(bool, UseCodeCacheAllocOpt, true, \ + "Allocate code cache within 32-bit memory address space") \ + \ + product(intx, UseSyncLevel, 10000, \ + "The sync level on Loongson CPUs" \ + "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ + "UseSyncLevel == 4000, 101, maybe for GS464V" \ + "UseSyncLevel == 3000, 001, maybe for GS464V" \ + "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ + "UseSyncLevel == 1000, 110, maybe for GS464") \ + \ + develop(bool, UseBoundCheckInstruction, false, \ + "Use bound check instruction") \ + \ + product(intx, SetFSFOFN, 999, \ + "Set the FS/FO/FN bits in FCSR" \ + "999 means FS/FO/FN will not be changed" \ + "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ + \ + /* assembler */ \ + product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ + "Use count leading zeros instruction") \ + \ + product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ + "Use count trailing zeros instruction") \ + \ + product(bool, UseActiveCoresMP, false, \ + "Eliminate barriers for single active cpu") + +#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp new file mode 100644 index 00000000000..6586c639653 --- /dev/null +++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_mips.hpp" +#include "oops/oop.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +int InlineCacheBuffer::ic_stub_code_size() { + return NativeMovConstReg::instruction_size + + NativeGeneralJump::instruction_size + + 1; + // so that code_end can be set in CodeBuffer + // 64bit 15 = 6 + 8 bytes + 1 byte + // 32bit 7 = 2 + 4 bytes + 1 byte +} + + +// we use T1 as cached oop(klass) now. this is the target of virtual call, +// when reach here, the receiver in T0 +// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded oop, we do not need reloc info + // because + // (1) the oop is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear +// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); +#define __ masm-> + __ patchable_set48(T1, (long)cached_value); + + __ patchable_jump(entry_point); + __ flush(); +#undef __ +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // creation also verifies the object + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + // Verifies the jump + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + void* o= (void*)move->data(); + return o; +} diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp new file mode 100644 index 00000000000..e84e37358ba --- /dev/null +++ b/src/hotspot/cpu/mips/icache_mips.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) +{ +#define __ _masm-> + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + address start = __ pc(); + + __ jr_hb(RA); + __ delayed()->ori(V0, A2, 0); + + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +#undef __ +} diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp new file mode 100644 index 00000000000..f90dee6eef7 --- /dev/null +++ b/src/hotspot/cpu/mips/icache_mips.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP +#define CPU_MIPS_VM_ICACHE_MIPS_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes + line_size = 32, // flush instruction affects a dword + log2_line_size = 5 // log2(line_size) + }; +}; + +#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp new file mode 100644 index 00000000000..e526e39d53f --- /dev/null +++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP + private: + + Register _locals_register; // register that contains the pointer to the locals + Register _bcp_register; // register that contains the bcp + + protected: + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); +#endif // CC_INTERP + + public: + void jump_to_entry(address entry); + // narrow int return value + void narrow(Register result); + + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} + + void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); + void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void load_earlyret_value(TosState state); + +#ifdef CC_INTERP + void save_bcp() { /* not needed in c++ interpreter and harmless */ } + void restore_bcp() { /* not needed in c++ interpreter and harmless */ } + + // Helpers for runtime call arguments/results + void get_method(Register reg); + +#else + + // Interpreter-specific registers + void save_bcp() { + sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); + } + + void restore_bcp() { + ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); + } + + void restore_locals() { + ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); + } + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); + } + + void get_const(Register reg){ + get_method(reg); + ld(reg, reg, in_bytes(Method::const_offset())); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld(reg, reg, in_bytes(ConstMethod::constants_offset())); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld(reg, reg, ConstantPool::cache_offset_in_bytes()); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index, Register tmp); + + // load cpool->resolved_klass_at(index) + void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) + Register index, // the constant pool index (corrupted on return) + Register klass); // contains the Klass on return + + void pop_ptr( Register r = FSR); + void pop_i( Register r = FSR); + void pop_l( Register r = FSR); + void pop_f(FloatRegister r = FSF); + void pop_d(FloatRegister r = FSF); + + void push_ptr( Register r = FSR); + void push_i( Register r = FSR); + void push_l( Register r = FSR); + void push_f(FloatRegister r = FSF); + void push_d(FloatRegister r = FSF); + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // NULL last_sp until next java call + sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + } + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state, bool generate_poll = false); + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); +#endif // CC_INTERP + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + +#ifndef CC_INTERP + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); +#endif // !CC_INTERP + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); +}; + +#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp new file mode 100644 index 00000000000..eb35bb0633e --- /dev/null +++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp @@ -0,0 +1,2126 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_mips.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of InterpreterMacroAssembler + +#ifdef CC_INTERP +void InterpreterMacroAssembler::get_method(Register reg) { +} +#endif // CC_INTERP + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + // The runtime address of BCP may be unaligned. + // Refer to the SPARC implementation. + lbu(reg, BCP, offset+1); + lbu(tmp, BCP, offset); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); +} + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + assert(reg != tmp, "need separate temp register"); + if (offset & 3) { // Offset unaligned? + lbu(reg, BCP, offset+3); + lbu(tmp, BCP, offset+2); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + lbu(tmp, BCP, offset+1); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + lbu(tmp, BCP, offset); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + } else { + lwu(reg, BCP, offset); + } +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + jmp(entry); +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore bcp & locals (r13 & r14) pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use BCP/LVP as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. +#ifdef ASSERT + save_bcp(); + { + Label L; + ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT,R0,L); + delayed()->nop(); + stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); + bind(L); + } +#endif + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + // interpreter specific + // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save them here (see note above) the assert is invalid. +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT, R0, L); + delayed()->nop(); + stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); + // interpreter specific + restore_bcp(); + restore_locals(); +} + + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + // Not clear if any other register is available, so load AT twice + assert(AT != java_thread, "check"); + lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_pending_bit); + beq(AT, R0, L); + delayed()->nop(); + + lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_processing_bit); + bne(AT, R0, L); + delayed()->nop(); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(V0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); + const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + //V0, oop_addr,V1,val_addr + switch (state) { + case atos: + ld_ptr(V0, oop_addr); + st_ptr(R0, oop_addr); + verify_oop(V0, state); + break; + case ltos: + ld_ptr(V0, val_addr); // fall through + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + lw(V0, val_addr); + break; + case ftos: + lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case dtos: + ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + move(AT, (int)ilgl); + sw(AT, tos_addr); + sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register tmp = T9; + + assert(java_thread != AT, "check"); + assert(java_thread != tmp, "check"); + ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + beq(AT, R0, L); + delayed()->nop(); + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); + move(tmp, JvmtiThreadState::earlyret_pending); + bne(tmp, AT, L); + delayed()->nop(); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + move(A0, AT); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); + jr(V0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + lbu(AT, BCP, bcp_offset); + lbu(reg, BCP, bcp_offset + 1); + ins(reg, AT, 8, 8); +} + + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(index, AT, bcp_offset); + } else if (index_size == sizeof(u4)) { + get_4_byte_integer_at_bcp(index, AT, bcp_offset); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + nor(index, index, R0); + sll(index, index, 0); + } else if (index_size == sizeof(u1)) { + lbu(index, BCP, bcp_offset); + } else { + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + get_cache_index_at_bcp(index, bcp_offset, index_size); + ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); + shl(index, 2); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + dsll(AT, index, Address::times_ptr); + daddu(AT, cache, AT); + lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + if(os::is_MP()) { + sync(); // load acquire + } + + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + dsrl(bytecode, bytecode, shift_count); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + move(AT, ConstantPoolCacheEntry::bytecode_1_mask); + andr(bytecode, bytecode, AT); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + shl(tmp, 2 + LogBytesPerWord); + ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + // skip past the header + daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + daddu(cache, cache, tmp); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld(mcs, method, in_bytes(Method::method_counters_offset())); + bne(mcs, R0, has_counters); + delayed()->nop(); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld(mcs, method, in_bytes(Method::method_counters_offset())); + beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory + delayed()->nop(); + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + shl(index, LogBytesPerHeapOop); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ld(result, result, ConstantPool::cache_offset_in_bytes()); + ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); + resolve_oop_handle(result, tmp); + // Add in the index + daddu(result, result, index); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); +} + +// load cpool->resolved_klass_at(index) +void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, + Register index, Register klass) { + dsll(AT, index, Address::times_ptr); + if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { + gslhx(index, cpool, AT, sizeof(ConstantPool)); + } else { + daddu(AT, cpool, AT); + lh(index, AT, sizeof(ConstantPool)); + } + Register resolved_klasses = cpool; + ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); + dsll(AT, index, Address::times_ptr); + daddu(AT, resolved_klasses, AT); + ld(klass, AT, Array::base_offset_in_bytes()); +} + +// Resets LVP to locals. Register sub_klass cannot be any of the above. +void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { + assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); + assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); + assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); + // Profile the not-null value's klass. + // Here T9 and T1 are used as temporary registers. + profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 + + // Do the check. + check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 + + // Profile the failure of the check. + profile_typecheck_failed(T9); // blows T9 +} + + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + lw(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld(r, SP, 0); + daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + lwc1(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + ldc1(r, SP, 0); + daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + daddiu(SP, SP, - Interpreter::stackElementSize); + sd(r, SP, 0); +} + +void InterpreterMacroAssembler::push_i(Register r) { + // For compatibility reason, don't change to sw. + daddiu(SP, SP, - Interpreter::stackElementSize); + sd(r, SP, 0); +} + +void InterpreterMacroAssembler::push_l(Register r) { + daddiu(SP, SP, -2 * Interpreter::stackElementSize); + sd(r, SP, 0); + sd(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + daddiu(SP, SP, - Interpreter::stackElementSize); + swc1(r, SP, 0); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + daddiu(SP, SP, -2 * Interpreter::stackElementSize); + sdc1(r, SP, 0); + sd(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(FSR, state); +} + +//FSR=V0,SSR=V1 +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(FSR, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + + + +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + sd(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + // record last_sp + move(Rsender, SP); + sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. +#ifndef OPT_THREAD + Register thread = temp; + get_thread(thread); +#else + Register thread = TREG; +#endif + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(AT, R0, run_compiled_code); + delayed()->nop(); + ld(AT, method, in_bytes(Method::interpreter_entry_offset())); + jr(AT); + delayed()->nop(); + bind(run_compiled_code); + } + + ld(AT, method, in_bytes(Method::from_interpreted_offset())); + jr(AT); + delayed()->nop(); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. mips64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing mips64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +// assume the next bytecode in T8. +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + + if (VerifyActivationFrameSize) { + Label L; + + dsubu(T2, FP, SP); + int min_frame_size = (frame::link_offset - + frame::interpreter_frame_initial_sp_offset) * wordSize; + daddiu(T2, T2, -min_frame_size); + bgez(T2, L); + delayed()->nop(); + stop("broken stack frame"); + bind(L); + } + // FIXME: I do not know which register should pass to verify_oop + if (verifyoop) verify_oop(FSR, state); + dsll(T2, Rnext, LogBytesPerWord); + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); + bool needs_thread_local_poll = generate_poll && + SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ld(T3, thread, in_bytes(Thread::polling_page_offset())); + andi(T3, T3, SafepointMechanism::poll_bit()); + bne(T3, R0, safepoint); + delayed()->nop(); + } + + if((long)table >= (long)Interpreter::dispatch_table(btos) && + (long)table <= (long)Interpreter::dispatch_table(vtos) + ) { + int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); + int table_offset = ((int)state - (int)itos) * table_size; + + // GP points to the starting address of Interpreter::dispatch_table(itos). + // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. + if(table_offset != 0) { + daddiu(T3, GP, table_offset); + if (UseLEXT1) { + gsldx(T3, T2, T3, 0); + } else { + daddu(T3, T2, T3); + ld(T3, T3, 0); + } + } else { + if (UseLEXT1) { + gsldx(T3, T2, GP, 0); + } else { + daddu(T3, T2, GP); + ld(T3, T3, 0); + } + } + } else { + li(T3, (long)table); + if (UseLEXT1) { + gsldx(T3, T2, T3, 0); + } else { + daddu(T3, T2, T3); + ld(T3, T3, 0); + } + } + jr(T3); + delayed()->nop(); + + if (needs_thread_local_poll) { + bind(safepoint); + li(T3, (long)safepoint_table); + if (UseLEXT1) { + gsldx(T3, T2, T3, 0); + } else { + daddu(T3, T2, T3); + ld(T3, T3, 0); + } + jr(T3); + delayed()->nop(); + } +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { + // load next bytecode (load before advancing r13 to prevent AGI) + lbu(Rnext, BCP, step); + increment(BCP, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + lbu(Rnext, BCP, 0); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +// used registers : T1, T2, T3, T8 +// T1 : thread, method access flags +// T2 : monitor entry pointer +// T3 : method, monitor top +// T8 : unlock flag +void InterpreterMacroAssembler::remove_activation( + TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers V0, V1 and F0, F1 may be in use for the result + // check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into T8 +#ifndef OPT_THREAD + Register thread = T1; + get_thread(thread); +#else + Register thread = TREG; +#endif + lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // reset the flag + sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // get method access flags + ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); + lw(T1, T3, in_bytes(Method::access_flags_offset())); + andi(T1, T1, JVM_ACC_SYNCHRONIZED); + beq(T1, R0, unlocked); + delayed()->nop(); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + bne(T8, R0, no_unlock); + delayed()->nop(); + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize + - (int)sizeof(BasicObjectLock)); + // address of first monitor + ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, unlock); + delayed()->nop(); + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + // I think mips do not need empty_FPU_stack + // remove possible return value from FPU-stack, otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + + } + + b(unlocked); + delayed()->nop(); + } + + bind(unlock); + unlock_object(c_rarg0); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // V0, V1: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top(FP, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + + bind(restart); + // points to current entry, starting with top-most entry + ld(c_rarg0, monitor_block_top); + // points to word before bottom of monitor block + daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + b(entry); + delayed()->nop(); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + // We don't have to preserve c_rarg0, since we are going to + // throw an exception + + push(state); + unlock_object(c_rarg0); + pop(state); + + if (install_monitor_exception) { + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + b(restart); + delayed()->nop(); + } + + bind(loop); + ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, exception);// check if current entry is used + delayed()->nop(); + + daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry + bind(entry); + bne(c_rarg0, T3, loop); // check if bottom reached + delayed()->nop(); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); + dsubu(AT, TSR, AT); + blez(AT, no_reserved_zone_enabling); + delayed()->nop(); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); + ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); + move(SP, TSR); // set sp to sender sp +} + +#endif // CC_INTERP + +// Lock object +// +// Args: +// c_rarg0: BasicObjectLock to be used for locking +// +// Kills: +// T1 +// T2 +void InterpreterMacroAssembler::lock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + } else { + Label done, slow_case; + const Register tmp_reg = T2; + const Register scr_reg = T1; + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + // Load object pointer into scr_reg + ld(scr_reg, lock_reg, obj_offset); + + if (UseBiasedLocking) { + // Note: we use noreg for the temporary register since it's hard + // to come up with a free register on all incoming code paths + biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); + } + + // Load (object->mark() | 1) into tmp_reg + ld(AT, scr_reg, 0); + ori(tmp_reg, AT, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + sd(tmp_reg, lock_reg, mark_offset); + + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); + + if (PrintBiasedLockingStatistics) { + Label succ, fail; + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); + bind(succ); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + b(done); + delayed()->nop(); + bind(fail); + } else { + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) SP <= mark < SP + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in tmp_reg as the result of cmpxchg + + dsubu(tmp_reg, tmp_reg, SP); + move(AT, 7 - os::vm_page_size()); + andr(tmp_reg, tmp_reg, AT); + // Save the test result, for recursive case, the result is zero + sd(tmp_reg, lock_reg, mark_offset); + if (PrintBiasedLockingStatistics) { + bne(tmp_reg, R0, slow_case); + delayed()->nop(); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + } + beq(tmp_reg, R0, done); + delayed()->nop(); + + bind(slow_case); + // Call the runtime routine for slow case + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg0: BasicObjectLock for lock +// +// Kills: +// T1 +// T2 +// T3 +// Throw an IllegalMonitorException if object is not locked by current thread +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + + const Register tmp_reg = T1; + const Register scr_reg = T2; + const Register hdr_reg = T3; + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure + // Store the BasicLock address into %T2 + daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); + + // Load oop into scr_reg(%T1) + ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + // free entry + sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + if (UseBiasedLocking) { + biased_locking_exit(scr_reg, hdr_reg, done); + } + + // Load the old header from BasicLock structure + ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); + // zero for recursive case + beq(hdr_reg, R0, done); + delayed()->nop(); + + // Atomic swap back the old header + cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); + + // Call the runtime routine for slow case. + sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj + call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); + beq(mdp, R0, zero_continue); + delayed()->nop(); +} + + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // V0 and T0 will be used as two temporary registers. + push2(V0, T0); + + get_method(T0); + // Test MDO to avoid the call if it is NULL. + ld(V0, T0, in_bytes(Method::method_data_offset())); + beq(V0, R0, set_mdp); + delayed()->nop(); + + // method: T0 + // bcp: BCP --> S0 + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); + // mdi: V0 + // mdo is guaranteed to be non-zero here, we checked for it before the call. + get_method(T0); + ld(T0, T0, in_bytes(Method::method_data_offset())); + daddiu(T0, T0, in_bytes(MethodData::data_offset())); + daddu(V0, T0, V0); + bind(set_mdp); + sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); + pop2(V0, T0); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + Register method = V0; + Register mdp = V1; + Register tmp = A0; + push(method); + push(mdp); + push(tmp); + test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue + get_method(method); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); + ld(AT, method, in_bytes(Method::const_offset())); + daddu(tmp, tmp, AT); + daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); + beq(tmp, BCP, verify_continue); + delayed()->nop(); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); + bind(verify_continue); + pop(tmp); + pop(mdp); + pop(method); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + sd(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + // Counter address + Address data(mdp_in, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + Register tmp = S0; + push(tmp); + if (decrement) { + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Decrement the register. + ld(AT, data); + sltu(tmp, R0, AT); + dsubu(AT, AT, tmp); + sd(AT, data); + } else { + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Increment the register. + ld(AT, data); + daddiu(tmp, AT, DataLayout::counter_increment); + sltu(tmp, R0, tmp); + daddu(AT, AT, tmp); + sd(AT, data); + } + pop(tmp); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + Register tmp = S0; + push(tmp); + if (decrement) { + assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Decrement the register. + daddu(tmp, mdp_in, reg); + ld(AT, tmp, constant); + sltu(tmp, R0, AT); + dsubu(AT, AT, tmp); + daddu(tmp, mdp_in, reg); + sd(AT, tmp, constant); + } else { + assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + // Increment the register. + daddu(tmp, mdp_in, reg); + ld(AT, tmp, constant); + daddiu(tmp, AT, DataLayout::counter_increment); + sltu(tmp, R0, tmp); + daddu(AT, AT, tmp); + daddu(tmp, mdp_in, reg); + sd(AT, tmp, constant); + } + pop(tmp); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::header_offset()); + int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); + // Set the flag + lw(AT, Address(mdp_in, header_offset)); + if(Assembler::is_simm16(header_bits)) { + ori(AT, AT, header_bits); + } else { + push(T8); + // T8 is used as a temporary register. + move(T8, header_bits); + orr(AT, AT, T8); + pop(T8); + } + sw(AT, Address(mdp_in, header_offset)); +} + + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld(AT, Address(mdp_in, offset)); + bne(AT, value, not_equal_continue); + delayed()->nop(); + } else { + // Put the test value into a register, so caller can use it: + ld(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + delayed()->nop(); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); + ld(AT, mdp_in, offset_of_disp); + daddu(mdp_in, mdp_in, AT); + sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + daddu(AT, reg, mdp_in); + assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); + ld(AT, AT, offset_of_disp); + daddu(mdp_in, mdp_in, AT); + sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if(Assembler::is_simm16(constant)) { + daddiu(mdp_in, mdp_in, constant); + } else { + move(AT, constant); + daddu(mdp_in, mdp_in, AT); + } + sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(return_bci); // save/restore across call_VM + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + pop(return_bci); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); + assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); + daddiu(AT, bumped_count, DataLayout::counter_increment); + sltu(AT, R0, AT); + daddu(bumped_count, bumped_count, AT); + sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + bne(receiver, R0, not_null); + delayed()->nop(); + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + beq(R0, R0, skip_receiver_profile); + delayed()->nop(); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +#if INCLUDE_JVMCI +void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { + assert_different_registers(method, mdp, reg2); + if (ProfileInterpreter && MethodProfileWidth > 0) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done; + record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, + &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); + bind(done); + + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} +#endif // INCLUDE_JVMCI + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + return; + } + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + test_mdp_data_at(mdp, recvr_offset, receiver, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the receiver from the CallData.) + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset); + beq(R0, R0, done); + delayed()->nop(); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + beq(reg2, R0, found_null); + delayed()->nop(); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + beq(R0, R0, done); + delayed()->nop(); + bind(found_null); + } else { + bne(reg2, R0, done); + delayed()->nop(); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beq(reg2, R0, found_null); + delayed()->nop(); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + move(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + beq(R0, R0, done); + delayed()->nop(); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + beq(R0, R0, profile_continue); + delayed()->nop(); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + move(reg2, in_bytes(MultiBranchData::per_case_size())); + if (UseLEXT1) { + gsdmult(index, index, reg2); + } else { + dmult(index, reg2); + mflo(index); + } + daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::narrow(Register result) { + + // Get method->_constMethod->_result_type + ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); + ld(T9, T9, in_bytes(Method::const_offset())); + lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + addiu(AT, T9, -T_INT); + beq(AT, R0, done); + delayed()->nop(); + + // mask integer result to narrower return type. + addiu(AT, T9, -T_BOOLEAN); + bne(AT, R0, notBool); + delayed()->nop(); + andi(result, result, 0x1); + beq(R0, R0, done); + delayed()->nop(); + + bind(notBool); + addiu(AT, T9, -T_BYTE); + bne(AT, R0, notByte); + delayed()->nop(); + seb(result, result); + beq(R0, R0, done); + delayed()->nop(); + + bind(notByte); + addiu(AT, T9, -T_CHAR); + bne(AT, R0, notChar); + delayed()->nop(); + andi(result, result, 0xFFFF); + beq(R0, R0, done); + delayed()->nop(); + + bind(notChar); + seh(result, result); + + // Nothing to do for T_INT + bind(done); +} + + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + if (mdo_addr.index() != noreg) { + guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); + guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); + push(T0); + dsll(T0, mdo_addr.index(), mdo_addr.scale()); + daddu(T0, T0, mdo_addr.base()); + } + + bne(obj, R0, update); + delayed()->nop(); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::null_seen); + if (mdo_addr.index() == noreg) { + sd(AT, mdo_addr); + } else { + sd(AT, T0, mdo_addr.disp()); + } + + beq(R0, R0, next); + delayed()->nop(); + + bind(update); + load_klass(obj, obj); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + + assert(TypeEntries::type_klass_mask == -4, "must be"); + dextm(AT, obj, 2, 62); + beq(AT, R0, next); + delayed()->nop(); + + andi(AT, obj, TypeEntries::type_unknown); + bne(AT, R0, next); + delayed()->nop(); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + beq(AT, R0, none); + delayed()->nop(); + + daddiu(AT, AT, -(TypeEntries::null_seen)); + beq(AT, R0, none); + delayed()->nop(); + + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + assert(TypeEntries::type_klass_mask == -4, "must be"); + dextm(AT, obj, 2, 62); + beq(AT, R0, next); + delayed()->nop(); + + // different than before. Cannot keep accurate profile. + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::type_unknown); + if (mdo_addr.index() == noreg) { + sd(AT, mdo_addr); + } else { + sd(AT, T0, mdo_addr.disp()); + } + beq(R0, R0, next); + delayed()->nop(); + + bind(none); + // first time here. Set profile type. + if (mdo_addr.index() == noreg) { + sd(obj, mdo_addr); + } else { + sd(obj, T0, mdo_addr.disp()); + } + + bind(next); + if (mdo_addr.index() != noreg) { + pop(T0); + } +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); + li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + bne(tmp, AT, profile_continue); + delayed()->nop(); + + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + if (Assembler::is_simm16(off_to_args)) { + daddiu(mdp, mdp, off_to_args); + } else { + move(AT, off_to_args); + daddu(mdp, mdp, AT); + } + + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { + addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); + } else { + li(AT, i*TypeStackSlotEntries::per_arg_count()); + subu32(tmp, tmp, AT); + } + + li(AT, TypeStackSlotEntries::per_arg_count()); + slt(AT, tmp, AT); + bne(AT, R0, done); + delayed()->nop(); + } + ld(tmp, callee, in_bytes(Method::const_offset())); + + lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); + + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); + subu(tmp, tmp, AT); + + addiu32(tmp, tmp, -1); + + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + if (Assembler::is_simm16(to_add)) { + daddiu(mdp, mdp, to_add); + } else { + move(AT, to_add); + daddu(mdp, mdp, AT); + } + + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); + if (Assembler::is_simm16(-1 * tmp_arg_counts)) { + addiu32(tmp, tmp, -1 * tmp_arg_counts); + } else { + move(AT, tmp_arg_counts); + subu32(mdp, mdp, AT); + } + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + sll(tmp, tmp, exact_log2(DataLayout::cell_size)); + daddu(mdp, mdp, tmp); + } + sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, _bcp_register); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + lb(tmp, _bcp_register, 0); + daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); + beq(AT, R0, do_profile); + delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); + beq(AT, R0, do_profile); + delayed()->nop(); + + get_method(tmp); + lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); + li(AT, vmIntrinsics::_compiledLambdaForm); + bne(tmp, AT, profile_continue); + delayed()->nop(); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + daddu(tmp, ret, R0); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); + + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); + bltz(tmp1, profile_continue); + delayed()->nop(); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + daddu(mdp, mdp, tmp1); + ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); + Address arg_type(mdp, tmp1, per_arg_scale, type_base); + + // load offset on the stack from the slot for this parameter + dsll(AT, tmp1, per_arg_scale); + daddu(AT, AT, mdp); + ld(tmp2, AT, off_base); + + subu(tmp2, R0, tmp2); + + // read the parameter from the local area + dsll(AT, tmp2, Interpreter::logStackElementSize); + daddu(AT, AT, _locals_register); + ld(tmp2, AT, 0); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + bgtz(tmp1, loop); + delayed()->nop(); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { +} +#endif // !CC_INTERP + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register tempreg = T0; +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, L); + delayed()->nop(); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + //Rthread, + thread, + //Rmethod); + S3); + } + +} + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + Register tempreg = T0; +#ifndef OPT_THREAD + Register thread = T8; + get_thread(thread); +#else + Register thread = TREG; +#endif + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label skip; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // template interpreter will leave it on the top of the stack. + push(state); + lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, skip); + delayed()->nop(); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(skip); + pop(state); + } + + { + // Dtrace notification + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + push(state); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + //Rthread, Rmethod); + thread, S3); + pop(state); + } +} + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where) { + assert_different_registers(scratch, AT); + + if (!preloaded) { + lw(scratch, counter_addr); + } + addiu32(scratch, scratch, increment); + sw(scratch, counter_addr); + + move(AT, mask); + andr(scratch, scratch, AT); + + if (cond == Assembler::zero) { + beq(scratch, R0, *where); + delayed()->nop(); + } else { + unimplemented(); + } +} diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp new file mode 100644 index 00000000000..054138ea42b --- /dev/null +++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + + void move(int from_offset, int to_offset); + + void box(int from_offset, int to_offset); + void pass_int(); + void pass_long(); + void pass_object(); + void pass_float(); + void pass_double(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp new file mode 100644 index 00000000000..e655b2a1a83 --- /dev/null +++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of SignatureHandlerGenerator +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); +} + +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); + __ sd(temp(), to(), to_offset * longSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { + __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); + __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); + + __ movz(temp(), R0, AT); + __ sw(temp(), to(), to_offset * wordSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + // return result handler + __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); + // return + __ jr(RA); + __ delayed()->nop(); + + __ flush(); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ sw(temp(), jni_arg.as_caller_address()); + } +} + +// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ sd(temp(), jni_arg.as_caller_address()); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Argument jni_arg(jni_offset()); + + // the handle for a receiver will never be null + bool do_NULL_check = offset() != 0 || is_static(); + if (do_NULL_check) { + __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); + __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); + } else { + __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); + } + + if (!jni_arg.is_Register()) + __ sd(temp(), jni_arg.as_caller_address()); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ sw(temp(), jni_arg.as_caller_address()); + } +} + +// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ sd(temp(), jni_arg.as_caller_address()); + } +} + + +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _reg_args; + intptr_t* _fp_identifiers; + unsigned int _num_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters) { + *_reg_args++ = from_obj; + *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters) { + *_reg_args++ = from_obj; + *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double + _num_args++; + } else { + *_to++ = from_obj; + } + } + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + // see TemplateInterpreterGenerator::generate_slow_signature_handler() + _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; + _fp_identifiers = to - 1; + *(int*) _fp_identifiers = 0; + _num_args = jni_offset(); + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp new file mode 100644 index 00000000000..dccdf6a019c --- /dev/null +++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + +#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp new file mode 100644 index 00000000000..bba5b7eee87 --- /dev/null +++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing lfence for LoadLoad barrier, we create data dependency +// between loads, which is more efficient than lfence. + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name = NULL; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + // return pc RA + // jni env A0 + // obj A1 + // jfieldID A2 + + address counter_addr = SafepointSynchronize::safepoint_counter_addr(); + __ set64(AT, (long)counter_addr); + __ lw(T1, AT, 0); + + // Parameters(A0~A3) should not be modified, since they will be used in slow path + __ andi(AT, T1, 1); + __ bne(AT, R0, slow); + __ delayed()->nop(); + + __ move(T0, A1); + // Both T0 and T9 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow); + + __ dsrl(T2, A2, 2); // offset + __ daddu(T0, T0, T2); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ lbu (V0, T0, 0); break; + case T_BYTE: __ lb (V0, T0, 0); break; + case T_CHAR: __ lhu (V0, T0, 0); break; + case T_SHORT: __ lh (V0, T0, 0); break; + case T_INT: __ lw (V0, T0, 0); break; + case T_LONG: __ ld (V0, T0, 0); break; + case T_FLOAT: __ lwc1(F0, T0, 0); break; + case T_DOUBLE: __ ldc1(F0, T0, 0); break; + default: ShouldNotReachHere(); + } + + __ set64(AT, (long)counter_addr); + __ lw(AT, AT, 0); + __ bne(T1, AT, slow); + __ delayed()->nop(); + + __ jr(RA); + __ delayed()->nop(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + __ jmp(slow_case_addr); + __ delayed()->nop(); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp new file mode 100644 index 00000000000..e93237ffd92 --- /dev/null +++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP +#define CPU_MIPS_VM_JNITYPES_MIPS_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } + +public: + // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] + // is 8 bytes. + // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. + // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. + // This error occurs in ReflectInvoke.java + // The parameter of DD(int) should be 4 instead of 0x550000004. + // + // See: [runtime/javaCalls.hpp] + + static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + *(jlong*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + *(jlong*) (to + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + *(jlong*) (to + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 0 + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + *(jdouble*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + *(jdouble*) (to + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + *(jdouble*) (to + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp new file mode 100644 index 00000000000..cc868cae556 --- /dev/null +++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp @@ -0,0 +1,4257 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "nativeInst_mips.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" + +#ifdef COMPILER2 +#include "opto/intrinsicnode.hpp" +#endif + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of MacroAssembler + +intptr_t MacroAssembler::i[32] = {0}; +float MacroAssembler::f[32] = {0.0}; + +void MacroAssembler::print(outputStream *s) { + unsigned int k; + for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); + } + s->cr(); + + for(k=0; kprint_cr("f%d = %f", k, f[k]); + } + s->cr(); +} + +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } + +void MacroAssembler::save_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ sw (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ swc1 (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + +void MacroAssembler::restore_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ lw (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + + +void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*) branch; + jint *pc = (jint *)branch; + + if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { + //b_far: + // move(AT, RA); // daddu + // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + // nop(); + // lui(T9, 0); // to be patched + // ori(T9, 0); + // daddu(T9, T9, RA); + // move(RA, AT); + // jr(T9); + + assert(opcode(pc[3]) == lui_op + && opcode(pc[4]) == ori_op + && special(pc[5]) == daddu_op, "Not a branch label patch"); + if(!(opcode(pc[3]) == lui_op + && opcode(pc[4]) == ori_op + && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } + + int offset = target - branch; + if (!is_simm16(offset)) { + pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); + pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); + } else { + // revert to "beq + nop" + CodeBuffer cb(branch, 4 * 10); + MacroAssembler masm(&cb); +#define __ masm. + __ b(target); + __ delayed()->nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + } + return; + } else if (special(pc[4]) == jr_op + && opcode(pc[4]) == special_op + && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { + //jmp_far: + // patchable_set48(T9, target); + // jr(T9); + // nop(); + + CodeBuffer cb(branch, 4 * 4); + MacroAssembler masm(&cb); + masm.patchable_set48(T9, (long)(target)); + return; + } + +#ifndef PRODUCT + if (!is_simm16((target - branch - 4) >> 2)) { + tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); + tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); + Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); + tty->print_cr("======= End of decoding ======="); + } +#endif + + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +static inline address first_cache_address() { + return CodeCache::low_bound() + sizeof(HeapBlock::Header); +} + +static inline address last_cache_address() { + return CodeCache::high_bound() - Assembler::InstructionSize; +} + +int MacroAssembler::call_size(address target, bool far, bool patchable) { + if (patchable) return 6 << Assembler::LogInstructionSize; + if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop + return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; +} + +// Can we reach target using jal/j from anywhere +// in the code cache (because code can be relocated)? +bool MacroAssembler::reachable_from_cache(address target) { + address cl = first_cache_address(); + address ch = last_cache_address(); + + return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); +} + +bool MacroAssembler::reachable_from_cache() { + if (ForceUnreachable) { + return false; + } else { + address cl = first_cache_address(); + address ch = last_cache_address(); + + return fit_in_jal(cl, ch); + } +} + +void MacroAssembler::general_jump(address target) { + if (reachable_from_cache(target)) { + j(target); + delayed()->nop(); + } else { + set64(T9, (long)target); + jr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_general_jump(address target) { + if (reachable_from_cache(target)) { + //j(target); + //nop(); + return 2; + } else { + //set64(T9, (long)target); + //jr(T9); + //nop(); + return insts_for_set64((jlong)target) + 2; + } +} + +void MacroAssembler::patchable_jump(address target) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + j(target); + delayed()->nop(); + } else { + patchable_set48(T9, (long)target); + jr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_patchable_jump(address target) { + return 6; +} + +void MacroAssembler::general_call(address target) { + if (reachable_from_cache(target)) { + jal(target); + delayed()->nop(); + } else { + set64(T9, (long)target); + jalr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_general_call(address target) { + if (reachable_from_cache(target)) { + //jal(target); + //nop(); + return 2; + } else { + //set64(T9, (long)target); + //jalr(T9); + //nop(); + return insts_for_set64((jlong)target) + 2; + } +} + +void MacroAssembler::patchable_call(address target) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + jal(target); + delayed()->nop(); + } else { + patchable_set48(T9, (long)target); + jalr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_patchable_call(address target) { + return 6; +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + address target = entry.target(); + if (!reachable_from_cache()) { + address stub = emit_trampoline_stub(offset(), target); + if (stub == NULL) { + return NULL; // CodeCache is full + } + } + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); + + if (reachable_from_cache()) { + nop(); + nop(); + nop(); + nop(); + jal(target); + delayed()->nop(); + } else { + // load the call target from the trampoline stub + // branch + long dest = (long)pc(); + dest += (dest & 0x8000) << 1; + lui(T9, dest >> 32); + ori(T9, T9, split_low(dest >> 16)); + dsll(T9, T9, 16); + ld(T9, T9, simm16(split_low(dest))); + jalr(T9); + delayed()->nop(); + } + return pc(); +} + +// Emit a trampoline stub for a call to a target which is too far away. +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + // Max stub size: alignment nop, TrampolineStub. + address stub = start_a_stub(NativeInstruction::nop_instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + emit_int64((int64_t)dest); + end_a_stub(); + return stub; +} + +void MacroAssembler::beq_far(Register rs, Register rt, address entry) { + u_char * cur_pc = pc(); + + // Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + Assembler::beq(rs, rt, offset(entry)); + } else { + Label not_jump; + bne(rs, rt, not_jump); + delayed()->nop(); + + b_far(entry); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + beq_far(rs, rt, target(L)); + } else { + u_char * cur_pc = pc(); + Label not_jump; + bne(rs, rt, not_jump); + delayed()->nop(); + + b_far(L); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, address entry) { + u_char * cur_pc = pc(); + + //Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + Assembler::bne(rs, rt, offset(entry)); + } else { + Label not_jump; + beq(rs, rt, not_jump); + delayed()->nop(); + + b_far(entry); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + bne_far(rs, rt, target(L)); + } else { + u_char * cur_pc = pc(); + Label not_jump; + beq(rs, rt, not_jump); + delayed()->nop(); + + b_far(L); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { + Label not_taken; + + bne(rs, rt, not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { + Label not_taken; + + beq(rs, rt, not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bc1t_long(Label& L) { + Label not_taken; + + bc1f(not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bc1f_long(Label& L) { + Label not_taken; + + bc1t(not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::b_far(Label& L) { + if (L.is_bound()) { + b_far(target(L)); + } else { + volatile address dest = target(L); +// +// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 +// 0x00000055651ed514: daddu at, ra, zero +// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 +// +// 0x00000055651ed51c: sll zero, zero, 0 +// 0x00000055651ed520: lui t9, 0x0 +// 0x00000055651ed524: ori t9, t9, 0x21b8 +// 0x00000055651ed528: daddu t9, t9, ra +// 0x00000055651ed52c: daddu ra, at, zero +// 0x00000055651ed530: jr t9 +// 0x00000055651ed534: sll zero, zero, 0 +// + move(AT, RA); + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + nop(); + lui(T9, 0); // to be patched + ori(T9, T9, 0); + daddu(T9, T9, RA); + move(RA, AT); + jr(T9); + } +} + +void MacroAssembler::b_far(address entry) { + u_char * cur_pc = pc(); + + // Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + b(offset(entry)); + } else { + // address must be bounded + move(AT, RA); + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + nop(); + li32(T9, entry - pc()); + daddu(T9, T9, RA); + move(RA, AT); + jr(T9); + } +} + +void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { + addu_long(AT, base, offset); + ld_ptr(rt, AT, 0); +} + +void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { + guarantee(AT != rt, "AT must not equal rt"); + addu_long(AT, base, offset); + st_ptr(rt, AT, 0); +} + +Address MacroAssembler::as_Address(AddressLiteral adr) { + return Address(adr.target(), adr.rspec()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + return Address::make_array(adr); +} + +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { + Label again; + + li(tmp_reg1, counter_addr); + bind(again); + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + ll(tmp_reg2, tmp_reg1, 0); + addiu(tmp_reg2, tmp_reg2, inc); + sc(tmp_reg2, tmp_reg1, 0); + beq(tmp_reg2, R0, again); + delayed()->nop(); +} + +void MacroAssembler::reserved_stack_check() { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); + dsubu(AT, SP, AT); + bltz(AT, no_reserved_zone_enabling); + delayed()->nop(); + + enter(); // RA and FP are live. + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); + leave(); + + // We have already removed our own frame. + // throw_delayed_StackOverflowError will think that it's been + // called by our caller. + li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); + jr(AT); + delayed()->nop(); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = T9; + } + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ld_ptr(swap_reg, mark_addr); + } + + if (need_tmp_reg) { + push(tmp_reg); + } + move(tmp_reg, swap_reg); + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); + daddiu(AT, R0, markOopDesc::biased_lock_pattern); + dsubu(AT, AT, tmp_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + + bne(AT, R0, cas_label); + delayed()->nop(); + + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + // Note that because there is no current thread register on MIPS we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + st_ptr(swap_reg, saved_mark_addr); + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); + xorr(tmp_reg, tmp_reg, swap_reg); +#ifndef OPT_THREAD + get_thread(swap_reg); + xorr(swap_reg, swap_reg, tmp_reg); +#else + xorr(swap_reg, TREG, tmp_reg); +#endif + + move(AT, ~((int) markOopDesc::age_mask_in_place)); + andr(swap_reg, swap_reg, AT); + + if (PrintBiasedLockingStatistics) { + Label L; + bne(swap_reg, R0, L); + delayed()->nop(); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (need_tmp_reg) { + pop(tmp_reg); + } + beq(swap_reg, R0, done); + delayed()->nop(); + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + + move(AT, markOopDesc::biased_lock_mask_in_place); + andr(AT, swap_reg, AT); + bne(AT, R0, try_revoke_bias); + delayed()->nop(); + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + + move(AT, markOopDesc::epoch_mask_in_place); + andr(AT,swap_reg, AT); + bne(AT, R0, try_rebias); + delayed()->nop(); + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + ld_ptr(swap_reg, saved_mark_addr); + + move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, AT); + + if (need_tmp_reg) { + push(tmp_reg); + } +#ifndef OPT_THREAD + get_thread(tmp_reg); + orr(tmp_reg, tmp_reg, swap_reg); +#else + orr(tmp_reg, TREG, swap_reg); +#endif + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + delayed()->nop(); + } + b(done); + delayed()->nop(); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); +#ifndef OPT_THREAD + get_thread(swap_reg); + orr(tmp_reg, tmp_reg, swap_reg); +#else + orr(tmp_reg, tmp_reg, TREG); +#endif + ld_ptr(swap_reg, saved_mark_addr); + + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + delayed()->nop(); + } + + b(done); + delayed()->nop(); + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + ld_ptr(swap_reg, saved_mark_addr); + + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + + bind(cas_label); + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + daddiu(AT, R0, markOopDesc::biased_lock_pattern); + + beq(AT, temp_reg, done); + delayed()->nop(); +} + +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf +// this method will handle the stack problem, you need not to preserve the stack space for the argument now +void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { + Label L, E; + + assert(number_of_arguments <= 4, "just check"); + + andi(AT, SP, 0xf); + beq(AT, R0, L); + delayed()->nop(); + daddiu(SP, SP, -8); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + daddiu(SP, SP, 8); + b(E); + delayed()->nop(); + + bind(L); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + bind(E); +} + + +void MacroAssembler::jmp(address entry) { + patchable_set48(T9, (long)entry); + jr(T9); +} + +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::runtime_call_type: + case relocInfo::none: + jmp(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_set48(T9, (long)entry); + jr(T9); + } + break; + } +} + +void MacroAssembler::jmp_far(Label& L) { + if (L.is_bound()) { + address entry = target(L); + assert(entry != NULL, "jmp most probably wrong"); + InstructionMark im(this); + + relocate(relocInfo::internal_word_type); + patchable_set48(T9, (long)entry); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + + relocate(relocInfo::internal_word_type); + patchable_set48(T9, (long)pc()); + } + + jr(T9); + delayed()->nop(); +} +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_set48(AT, (long)obj); + sd(AT, dst); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_set48(dst, (long)obj); +} + +void MacroAssembler::call(address entry) { +// c/c++ code assume T9 is entry point, so we just always move entry to t9 +// maybe there is some more graceful method to handle this. FIXME +// For more info, see class NativeCall. + patchable_set48(T9, (long)entry); + jalr(T9); +} + +void MacroAssembler::call(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::runtime_call_type: + case relocInfo::none: + call(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + call(entry); + } + break; + } +} + +void MacroAssembler::call(address entry, RelocationHolder& rh) +{ + switch (rh.type()) { + case relocInfo::runtime_call_type: + case relocInfo::none: + call(entry); + break; + default: + { + InstructionMark im(this); + relocate(rh); + call(entry); + } + break; + } +} + +void MacroAssembler::ic_call(address entry, jint method_index) { + RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); + patchable_set48(IC_Klass, (long)Universe::non_oop_word()); + assert(entry != NULL, "call most probably wrong"); + InstructionMark im(this); + trampoline_call(AddressLiteral(entry, rh)); +} + +void MacroAssembler::c2bool(Register r) { + sltu(r, R0, r); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + + } + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); +} + + +void MacroAssembler::stop(const char* msg) { + li(A0, (long)msg); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + brk(17); +} + +void MacroAssembler::warn(const char* msg) { + pushad(); + li(A0, (long)msg); + push(S2); + move(AT, -(StackAlignmentInBytes)); + move(S2, SP); // use S2 as a sender SP holder + andr(SP, SP, AT); // align stack as required by ABI + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + move(SP, S2); // use S2 as a sender SP holder + pop(S2); + popad(); +} + +void MacroAssembler::increment(Register reg, int imm) { + if (!imm) return; + if (is_simm16(imm)) { + daddiu(reg, reg, imm); + } else { + move(AT, imm); + daddu(reg, reg, AT); + } +} + +void MacroAssembler::decrement(Register reg, int imm) { + increment(reg, -imm); +} + + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); + assert(arg_2 != A1, "smashed argument"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + + address before_call_pc; + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); + + // set last Java frame before call + before_call_pc = (address)pc(); + set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); + + // do the call + move(A0, java_thread); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + get_thread(java_thread); +#else +#ifdef ASSERT + { + Label L; + get_thread(AT); + beq(java_thread, AT, L); + delayed()->nop(); + stop("MacroAssembler::call_VM_base: TREG not callee saved?"); + bind(L); + } +#endif +#endif + + // discard thread and arguments + ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // reset last Java frame + reset_last_Java_frame(java_thread, false); + + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + Label L; + ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + beq(AT, R0, L); + delayed()->nop(); + li(AT, before_call_pc); + push(AT); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + delayed()->nop(); + bind(L); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); + sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); + verify_oop(oop_result); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + + move(V0, SP); + //we also reserve space for java_thread here + move(AT, -(StackAlignmentInBytes)); + andr(SP, SP, AT); + call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); + +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + if (arg_0 != A0) move(A0, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); + call_VM_leaf(entry_point, 3); +} +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 0); +} + + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1) { + if (arg_1 != A0) move(A0, arg_1); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2, + Register arg_3) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any (non-CC) registers + // NOTE: cmpl is plenty here to provoke a segv + lw(AT, reg, 0); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::enter() { + push2(RA, FP); + move(FP, SP); +} + +void MacroAssembler::leave() { + move(SP, FP); + pop2(RA, FP); +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +void MacroAssembler::get_thread(Register thread) { +#ifdef MINIMIZE_RAM_USAGE +// +// In MIPS64, we don't use full 64-bit address space. +// Only a small range is actually used. +// +// Example: +// $ cat /proc/13352/maps +// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java +// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java +// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] +// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so +// 555d598000-555d59c000 rw-p 00000000 00:00 0 +// ...... +// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 +// 558b23c000-558b248000 ---p 00000000 00:00 0 +// 558b248000-558b28c000 rwxp 00000000 00:00 0 +// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] +// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] +// +// All stacks are positioned at 0x55________. +// Therefore, we can utilize the same algorithm used in 32-bit. + // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); + // Thread* thread = _sp_map[index]; + Register tmp; + + if (thread == AT) + tmp = T9; + else + tmp = AT; + + move(thread, SP); + shr(thread, PAGE_SHIFT); + + push(tmp); + li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); + andr(thread, thread, tmp); + shl(thread, Address::times_ptr); // sizeof(Thread *) + li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); + addu(tmp, tmp, thread); + ld_ptr(thread, tmp, 0); + pop(tmp); +#else + if (thread != V0) { + push(V0); + } + pushad_except_v0(); + + push(S5); + move(S5, SP); + move(AT, -StackAlignmentInBytes); + andr(SP, SP, AT); + call(CAST_FROM_FN_PTR(address, Thread::current)); + //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); + delayed()->nop(); + move(SP, S5); + pop(S5); + + popad_except_v0(); + if (thread != V0) { + move(thread, V0); + pop(V0); + } +#endif // MINIMIZE_RAM_USAGE +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T1; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // we must set sp to zero to clear frame + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is possible + // that we need it only for debugging + if(clear_fp) { + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // we must set sp to zero to clear frame + sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + int mask = os::vm_page_size() - sizeof(int); + assert_different_registers(AT, tmp); + assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); + srl(AT, thread, os::get_serialize_page_shift_count()); + andi(AT, AT, mask); + li(tmp, os::get_memory_serialize_page()); + addu(tmp, tmp, AT); + sw(R0, tmp, 0); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); + andi(AT, AT, SafepointMechanism::poll_bit()); + bne(AT, R0, slow_path); + delayed()->nop(); + } else { + li(AT, SafepointSynchronize::address_of_state()); + lw(AT, AT, 0); + addiu(AT, AT, -SafepointSynchronize::_not_synchronized); + bne(AT, R0, slow_path); + delayed()->nop(); + } +} + +// Just like safepoint_poll, but use an acquiring load for thread- +// local polling. +// +// We need an acquire here to ensure that any subsequent load of the +// global SafepointSynchronize::_state flag is ordered after this load +// of the local Thread::_polling page. We don't want this poll to +// return false (i.e. not safepointing) and a later poll of the global +// SafepointSynchronize::_state spuriously to return true. +// +// This is to avoid a race when we're in a native->Java transition +// racing the code which wakes up from a safepoint. +// +void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); + sync(); + andi(AT, AT, SafepointMechanism::poll_bit()); + bne(AT, R0, slow_path); + delayed()->nop(); + } else { + safepoint_poll(slow_path, thread_reg); + } +} + +// Calls to C land +// +// When entering C land, the fp, & sp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); + patchable_set48(AT, (long)last_java_pc); + st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // last_java_fp is optional + if (last_java_fp->is_valid()) { + sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); + patchable_set48(AT, (long)last_java_pc); + st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + + sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Register t2, Label& slow_case) { + Unimplemented(); + //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Register t2, Label& slow_case) { + Unimplemented(); + //assert_different_registers(obj, var_size_in_bytes, t1, AT); + //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); +} + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { +#ifndef OPT_THREAD + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#else + thread = TREG; +#endif + } + + ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); + if (var_size_in_bytes->is_valid()) { + addu(AT, AT, var_size_in_bytes); + } else { + addiu(AT, AT, con_size_in_bytes); + } + st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +void MacroAssembler::li(Register rd, long imm) { + if (imm <= max_jint && imm >= min_jint) { + li32(rd, (int)imm); + } else if (julong(imm) <= 0xFFFFFFFF) { + assert_not_delayed(); + // lui sign-extends, so we can't use that. + ori(rd, R0, julong(imm) >> 16); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); + } else if ((imm > 0) && is_simm16(imm >> 32)) { + // A 48-bit address + li48(rd, imm); + } else { + li64(rd, imm); + } +} + +void MacroAssembler::li32(Register reg, int imm) { + if (is_simm16(imm)) { + addiu(reg, R0, imm); + } else { + lui(reg, split_low(imm >> 16)); + if (split_low(imm)) + ori(reg, reg, split_low(imm)); + } +} + +void MacroAssembler::set64(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + } else { + lui(d, split_low(value >> 16)); + if (split_low(value)) { + ori(d, d, split_low(value)); + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + if (split_low(value)) { + ori(d, d, split_low(value)); + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + li48(d, value); + } else { // li64 + // 6 insts + li64(d, value); + } +} + + +int MacroAssembler::insts_for_set64(jlong value) { + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + //daddiu(d, R0, value); + count++; + } else { + //lui(d, split_low(value >> 16)); + count++; + if (split_low(value)) { + //ori(d, d, split_low(value)); + count++; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + //ori(d, R0, julong(value) >> 16); + //dsll(d, d, 16); + count += 2; + if (split_low(value)) { + //ori(d, d, split_low(value)); + count++; + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + //li48(d, value); + count += 4; + } else { // li64 + // 6 insts + //li64(d, value); + count += 6; + } + + return count; +} + +void MacroAssembler::patchable_set48(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + count += 2; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + li48(d, value); + count += 4; + } else { // li64 + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + nop(); + count++; + } +} + +void MacroAssembler::patchable_set32(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + count += 2; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 3) { + nop(); + count++; + } +} + +void MacroAssembler::patchable_call32(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 2) { + nop(); + count++; + } +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + long narrowKlass = (long)Klass::encode_klass(k); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_set48(dst, narrowKlass); +} + + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_set48(dst, oop_index); +} + +// ((OopHandle)result).resolve(); +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); +} + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + // get mirror + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ld_ptr(mirror, method, in_bytes(Method::const_offset())); + ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); + ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); + ld_ptr(mirror, mirror, mirror_offset); + resolve_oop_handle(mirror, tmp); +} + +void MacroAssembler::li64(Register rd, long imm) { + assert_not_delayed(); + lui(rd, split_low(imm >> 48)); + ori(rd, rd, split_low(imm >> 32)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm >> 16)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); +} + +void MacroAssembler::li48(Register rd, long imm) { + assert_not_delayed(); + assert(is_simm16(imm >> 32), "Not a 48-bit address"); + lui(rd, imm >> 32); + ori(rd, rd, split_low(imm >> 16)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + const char * b = NULL; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + pushad(); + move(A1, reg); + li(A0, (long)b); + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld(T9, AT, 0); + jalr(T9); + delayed()->nop(); + popad(); +} + + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) { + nop(); + return; + } + // Pass register number to verify_oop_subroutine + const char * b = NULL; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + + addiu(SP, SP, - 7 * wordSize); + st_ptr(T0, SP, 6 * wordSize); + st_ptr(T1, SP, 5 * wordSize); + st_ptr(RA, SP, 4 * wordSize); + st_ptr(A0, SP, 3 * wordSize); + st_ptr(A1, SP, 2 * wordSize); + st_ptr(AT, SP, 1 * wordSize); + st_ptr(T9, SP, 0); + + // addr may contain sp so we will have to adjust it based on the + // pushes that we just did. + if (addr.uses(SP)) { + lea(A1, addr); + ld_ptr(A1, Address(A1, 7 * wordSize)); + } else { + ld_ptr(A1, addr); + } + li(A0, (long)b); + // call indirectly to solve generation ordering problem + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld_ptr(T9, AT, 0); + jalr(T9); + delayed()->nop(); + ld_ptr(T0, SP, 6* wordSize); + ld_ptr(T1, SP, 5* wordSize); + ld_ptr(RA, SP, 4* wordSize); + ld_ptr(A0, SP, 3* wordSize); + ld_ptr(A1, SP, 2* wordSize); + ld_ptr(AT, SP, 1* wordSize); + ld_ptr(T9, SP, 0* wordSize); + addiu(SP, SP, 7 * wordSize); +} + +// used registers : T0, T1 +void MacroAssembler::verify_oop_subroutine() { + // RA: ra + // A0: char* error message + // A1: oop object to verify + + Label exit, error; + // increment counter + li(T0, (long)StubRoutines::verify_oop_count_addr()); + lw(AT, T0, 0); + daddiu(AT, AT, 1); + sw(AT, T0, 0); + + // make sure object is 'reasonable' + beq(A1, R0, exit); // if obj is NULL it is ok + delayed()->nop(); + + // Check if the oop is in the right area of memory + // const int oop_mask = Universe::verify_oop_mask(); + // const int oop_bits = Universe::verify_oop_bits(); + const uintptr_t oop_mask = Universe::verify_oop_mask(); + const uintptr_t oop_bits = Universe::verify_oop_bits(); + li(AT, oop_mask); + andr(T0, A1, AT); + li(AT, oop_bits); + bne(T0, AT, error); + delayed()->nop(); + + // make sure klass is 'reasonable' + // add for compressedoops + reinit_heapbase(); + // add for compressedoops + load_klass(T0, A1); + beq(T0, R0, error); // if klass is NULL it is broken + delayed()->nop(); + // return if everything seems ok + bind(exit); + + jr(RA); + delayed()->nop(); + + // handle errors + bind(error); + pushad(); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + popad(); + jr(RA); + delayed()->nop(); +} + +void MacroAssembler::verify_tlab(Register t1, Register t2) { +#ifdef ASSERT + assert_different_registers(t1, t2, AT); + if (UseTLAB && VerifyOops) { + Label next, ok; + + get_thread(t1); + + ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); + sltu(AT, t2, AT); + beq(AT, R0, next); + delayed()->nop(); + + stop("assert(top >= start)"); + + bind(next); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); + sltu(AT, AT, t2); + beq(AT, R0, ok); + delayed()->nop(); + + stop("assert(top <= end)"); + + bind(ok); + + } +#endif +} +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + Unimplemented(); + //AddressLiteral a(delayed_value_addr); + // load indirectly to solve generation ordering problem + //movptr(tmp, ExternalAddress((address) delayed_value_addr)); + //ld(tmp, a); + if (offset != 0) + daddiu(tmp,tmp, offset); + + return RegisterOrConstant(tmp); +} + +void MacroAssembler::hswap(Register reg) { + //short + //andi(reg, reg, 0xffff); + srl(AT, reg, 8); + sll(reg, reg, 24); + sra(reg, reg, 16); + orr(reg, reg, AT); +} + +void MacroAssembler::huswap(Register reg) { + dsrl(AT, reg, 8); + dsll(reg, reg, 24); + dsrl(reg, reg, 16); + orr(reg, reg, AT); + andi(reg, reg, 0xffff); +} + +// something funny to do this will only one more register AT +// 32 bits +void MacroAssembler::swap(Register reg) { + srl(AT, reg, 8); + sll(reg, reg, 24); + orr(reg, reg, AT); + //reg : 4 1 2 3 + srl(AT, AT, 16); + xorr(AT, AT, reg); + andi(AT, AT, 0xff); + //AT : 0 0 0 1^3); + xorr(reg, reg, AT); + //reg : 4 1 2 1 + sll(AT, AT, 16); + xorr(reg, reg, AT); + //reg : 4 3 2 1 +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register resflag, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + bind(again); + lld(resflag, addr); + bne(resflag, oldval, fail); + delayed()->nop(); + move(resflag, newval); + scd(resflag, addr); + beq(resflag, R0, again); + delayed()->nop(); + b(succ); + delayed()->nop(); + bind(fail); + if (barrier) + sync(); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + lld(tmp, addr); + bne(tmp, oldval, neq); + delayed()->nop(); + move(tmp, newval); + scd(tmp, addr); + beq(tmp, R0, again); + delayed()->nop(); + b(succ); + delayed()->nop(); + + bind(neq); + if (barrier) + sync(); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) { + b(*fail); + delayed()->nop(); + } +} + + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, + Register resflag, bool sign, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + bind(again); + ll(resflag, addr); + if (!sign) + dinsu(resflag, R0, 32, 32); + bne(resflag, oldval, fail); + delayed()->nop(); + + move(resflag, newval); + sc(resflag, addr); + beq(resflag, R0, again); + delayed()->nop(); + b(succ); + delayed()->nop(); + + bind(fail); + if (barrier) + sync(); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + ll(tmp, addr); + if (!sign) + dinsu(tmp, R0, 32, 32); + bne(tmp, oldval, neq); + delayed()->nop(); + move(tmp, newval); + sc(tmp, addr); + beq(tmp, R0, again); + delayed()->nop(); + b(succ); + delayed()->nop(); + + bind(neq); + if (barrier) + sync(); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) { + b(*fail); + delayed()->nop(); + } +} + +void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { + Label done, again, nequal; + + Register x_reg = x_regLo; + dsll32(x_regHi, x_regHi, 0); + dsll32(x_regLo, x_regLo, 0); + dsrl32(x_regLo, x_regLo, 0); + orr(x_reg, x_regLo, x_regHi); + + Register c_reg = c_regLo; + dsll32(c_regHi, c_regHi, 0); + dsll32(c_regLo, c_regLo, 0); + dsrl32(c_regLo, c_regLo, 0); + orr(c_reg, c_regLo, c_regHi); + + bind(again); + + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + lld(AT, dest); + bne(AT, c_reg, nequal); + delayed()->nop(); + + //move(AT, x_reg); + daddu(AT, x_reg, R0); + scd(AT, dest); + beq(AT, R0, again); + delayed()->nop(); + b(done); + delayed()->nop(); + + // not xchged + bind(nequal); + sync(); + //move(c_reg, AT); + //move(AT, R0); + daddu(c_reg, AT, R0); + daddu(AT, R0, R0); + bind(done); +} + +// be sure the three register is different +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + assert_different_registers(tmp, fs, ft); + div_s(tmp, fs, ft); + trunc_l_s(tmp, tmp); + cvt_s_l(tmp, tmp); + mul_s(tmp, tmp, ft); + sub_s(fd, fs, tmp); +} + +// be sure the three register is different +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + assert_different_registers(tmp, fs, ft); + div_d(tmp, fs, ft); + trunc_l_d(tmp, tmp); + cvt_d_l(tmp, tmp); + mul_d(tmp, tmp, ft); + sub_d(fd, fs, tmp); +} + +#ifdef COMPILER2 +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (Obj, Self, box, Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. +// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + + +// obj: object to lock +// box: on-stack box address (displaced header location) +// tmp: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label IsInflated, DONE, DONE_SET; + + // Ensure the register assignents are disjoint + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastLock"); + + if (PrintBiasedLockingStatistics) { + atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); + } + + if (EmitSync & 1) { + move(AT, 0x0); + return; + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); + } + + ld(tmpReg, Address(objReg, 0)) ; // fetch markword + ori(tmpReg, tmpReg, 0x1); + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg + delayed()->nop(); + + // Recursive locking + dsubu(tmpReg, tmpReg, SP); + li(AT, (7 - os::vm_page_size() )); + andr(tmpReg, tmpReg, AT); + sd(tmpReg, Address(boxReg, 0)); + bind(DONE_LABEL) ; + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); + b(fail); + delayed()->nop(); + bind(succ); + b(DONE); + delayed()->ori(resReg, R0, 1); + bind(fail); + } + + ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. + andi(AT, tmpReg, markOopDesc::monitor_value); + bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias + delayed()->nop(); + + // Attempt stack-locking ... + ori(tmpReg, tmpReg, markOopDesc::unlocked_value); + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + if (PrintBiasedLockingStatistics) { + Label SUCC, FAIL; + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg + bind(SUCC); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + b(DONE); + delayed()->ori(resReg, R0, 1); + bind(FAIL); + } else { + // If cmpxchg is succ, then scrReg = 1 + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg + } + + // Recursive locking + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + dsubu(tmpReg, tmpReg, SP); + li(AT, 7 - os::vm_page_size()); + andr(tmpReg, tmpReg, AT); + sd(tmpReg, Address(boxReg, 0)); + + if (PrintBiasedLockingStatistics) { + Label L; + // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ + bne(tmpReg, R0, L); + delayed()->nop(); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + bind(L); + } + b(DONE); + delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 + + bind(IsInflated); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + // Without cast to int32_t a movptr will destroy r10 which is typically obj + li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); + sd(AT, Address(boxReg, 0)); + + ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + // if (m->owner != 0) => AT = 0, goto slow path. + bne(AT, R0, DONE_SET); + delayed()->ori(scrReg, R0, 0); + +#ifndef OPT_THREAD + get_thread(TREG); +#endif + // It's inflated and appears unlocked + cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ; + // Intentional fall-through into DONE ... + + bind(DONE_SET); + move(resReg, scrReg); + + // DONE is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE); + // At DONE the resReg is set as follows ... + // Fast_Unlock uses the same protocol. + // resReg == 1 -> Success + // resREg == 0 -> Failure - force control through the slow-path + + // Avoid branch-to-branch on AMD processors + // This appears to be superstition. + if (EmitSync & 32) nop() ; + + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. +// tmp: killed tmp; cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label DONE, DONE_SET, Stacked, Inflated; + + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastUnlock"); + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + move(AT, 0x0); + return; + } else + if (EmitSync & 8) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // classic stack-locking code ... + ld(tmpReg, Address(boxReg, 0)) ; + beq(tmpReg, R0, DONE_LABEL) ; + move(AT, 0x1); // delay slot + + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + bind(DONE_LABEL); + } else { + Label CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_exit(objReg, tmpReg, succ); + b(fail); + delayed()->nop(); + bind(succ); + b(DONE); + delayed()->ori(resReg, R0, 1); + bind(fail); + } + + ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header + beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock + delayed()->sltiu(AT, tmpReg, 1); + + ld(tmpReg, Address(objReg, 0)); // Examine the object's markword + andi(AT, tmpReg, markOopDesc::monitor_value); + beq(AT, R0, Stacked); // Inflated? + delayed()->nop(); + + bind(Inflated); + // It's inflated. + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). +#ifndef OPT_THREAD + get_thread(TREG); +#endif + + // It's inflated + ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ; + xorr(scrReg, scrReg, TREG); + + ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ; + orr(scrReg, scrReg, AT); + + bne(scrReg, R0, DONE_SET); + delayed()->ori(AT, R0, 0); + + ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); + ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); + orr(scrReg, scrReg, AT); + + bne(scrReg, R0, DONE_SET); + delayed()->ori(AT, R0, 0); + + sync(); + sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + b(DONE); + delayed()->ori(resReg, R0, 1); + + bind(Stacked); + ld(tmpReg, Address(boxReg, 0)); + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + + bind(DONE_SET); + move(resReg, AT); + + if (EmitSync & 65536) { + bind (CheckSucc); + } + + bind(DONE); + + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { nop() ; } + } +} +#endif // COMPILER2 + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + //Unimplemented(); +} + +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; +Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; + +//In MIPS64, F0~23 are all caller-saved registers +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; + +// We preserve all caller-saved register +void MacroAssembler::pushad(){ + int i; + + // Fixed-point registers + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) + { + sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) + { + sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +}; + +void MacroAssembler::popad(){ + int i; + + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) + { + ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + for (i = 0; i < len; i++) + { + ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); +}; + +// We preserve all caller-saved register except V0 +void MacroAssembler::pushad_except_v0() { + int i; + + // Fixed-point registers + int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +} + +void MacroAssembler::popad_except_v0() { + int i; + + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) { + ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + for (i = 0; i < len; i++) { + ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); +} + +void MacroAssembler::push2(Register reg1, Register reg2) { + daddiu(SP, SP, -16); + sd(reg1, SP, 8); + sd(reg2, SP, 0); +} + +void MacroAssembler::pop2(Register reg1, Register reg2) { + ld(reg1, SP, 8); + ld(reg2, SP, 0); + daddiu(SP, SP, 16); +} + +// for UseCompressedOops Option +void MacroAssembler::load_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else + ld(dst, src, oopDesc::klass_offset_in_bytes()); +} + +void MacroAssembler::store_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + encode_klass_not_null(src); + sw(src, dst, oopDesc::klass_offset_in_bytes()); + } else { + sd(src, dst, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp2) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); + } else { + bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +// Doesn't do verfication, generates fixed size code +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register tmp2, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +} +#endif + + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + return; + } + + movz(r, S5_heapbase, r); + dsubu(r, r, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(src, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsrl(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } else { + if (dst == src) { + movz(dst, S5_heapbase, dst); + dsubu(dst, dst, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + } else { + dsubu(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + movz(dst, R0, src); + } + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(r, R0, ok); + delayed()->nop(); + stop("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + dsubu(r, r, S5_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(src, R0, ok); + delayed()->nop(); + stop("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + + if (Universe::narrow_oop_base() != NULL) { + dsubu(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + } else { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsrl(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } +} + +void MacroAssembler::decode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + } else { + move(AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + daddu(r, r, S5_heapbase); + movz(r, R0, AT); + } + verify_oop(r, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (dst != src) nop(); // DON'T DELETE THIS GUY. + dsll(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } else { + if (dst == src) { + move(AT, dst); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(dst, LogMinObjAlignmentInBytes); + } + daddu(dst, dst, S5_heapbase); + movz(dst, R0, AT); + } else { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsll(dst, src, LogMinObjAlignmentInBytes); + daddu(dst, dst, S5_heapbase); + } else { + daddu(dst, src, S5_heapbase); + } + movz(dst, R0, src); + } + } + verify_oop(dst, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + daddu(r, r, S5_heapbase); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + dsll(dst, src, LogMinObjAlignmentInBytes); + daddu(dst, dst, S5_heapbase); + } else { + dsll(dst, src, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + daddu(dst, dst, S5_heapbase); + } + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + move(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + assert(r != AT, "Encoding a klass in AT"); + set64(AT, (int64_t)Universe::narrow_klass_base()); + dsubu(r, r, AT); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(r, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + } else { + if (Universe::narrow_klass_base() != NULL) { + set64(dst, (int64_t)Universe::narrow_klass_base()); + dsubu(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(dst, LogKlassAlignmentInBytes); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + dsrl(dst, src, LogKlassAlignmentInBytes); + } else { + move(dst, src); + } + } + } +} + +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedClassPointers, "only for compressed klass ptrs"); + if (Universe::narrow_klass_base() != NULL) { + // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). + return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); + } else { + // longest load decode klass function, mov64, leaq + return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert(r != AT, "Decoding a klass in AT"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shl(r, LogKlassAlignmentInBytes); + } + if (Universe::narrow_klass_base() != NULL) { + set64(AT, (int64_t)Universe::narrow_klass_base()); + daddu(r, r, AT); + //Not neccessary for MIPS at all. + //reinit_heapbase(); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + + if (dst == src) { + decode_klass_not_null(dst); + } else { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + set64(dst, (int64_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + dsll(AT, src, Address::times_8); + daddu(dst, dst, AT); + } else { + daddu(dst, src, dst); + } + } +} + +void MacroAssembler::incrementl(Register reg, int value) { + if (value == min_jint) { + move(AT, value); + addu32(reg, reg, AT); + return; + } + if (value < 0) { decrementl(reg, -value); return; } + if (value == 0) { ; return; } + + move(AT, value); + addu32(reg, reg, AT); +} + +void MacroAssembler::decrementl(Register reg, int value) { + if (value == min_jint) { + move(AT, value); + subu32(reg, reg, AT); + return; + } + if (value < 0) { incrementl(reg, -value); return; } + if (value == 0) { ; return; } + + move(AT, value); + subu32(reg, reg, AT); +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + if (Universe::narrow_oop_base() == NULL) { + move(S5_heapbase, R0); + } else { + set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); + } + } else { + set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); + ld(S5_heapbase, S5_heapbase, 0); + } + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { +//implement ind gen_subtype_check + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + delayed()->nop(); + // Check the supertype display: + if (must_load_sco) { + lwu(temp_reg, super_klass, sco_offset); + super_check_offset = RegisterOrConstant(temp_reg); + } + daddu(AT, sub_klass, super_check_offset.register_or_noreg()); + ld(AT, AT, super_check_offset.constant_or_zero()); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + addiu(AT, super_check_offset.as_register(), -sc_offset); + if (L_failure == &L_fallthrough) { + beq(AT, R0, *L_slow_path); + delayed()->nop(); + } else { + bne_far(AT, R0, *L_failure); + delayed()->nop(); + b(*L_slow_path); + delayed()->nop(); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + } else { + bne(super_klass, AT, *L_slow_path); + delayed()->nop(); + b(*L_success); + delayed()->nop(); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + } else { + bne_far(super_klass, AT, *L_failure); + delayed()->nop(); + b(*L_success); + delayed()->nop(); + } + } + + bind(L_fallthrough); + +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (temp2_reg == noreg) + temp2_reg = TSR; + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); +#endif //PRODUCT + + // We will consult the secondary-super array. + ld(temp_reg, secondary_supers_addr); + // Load the array length. + lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); + // Skip to start of data. + daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); + + // OpenJDK8 never compresses klass pointers in secondary-super array. + Label Loop, subtype; + bind(Loop); + beq(temp2_reg, R0, *L_failure); + delayed()->nop(); + ld(AT, temp_reg, 0); + beq(AT, super_klass, subtype); + delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); + b(Loop); + delayed()->daddiu(temp2_reg, temp2_reg, -1); + + bind(subtype); + sd(super_klass, super_cache_addr); + if (L_success != &L_fallthrough) { + b(*L_success); + delayed()->nop(); + } + + // Success. Cache the super we found and proceed in triumph. +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + sd(R0, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = NOREG; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times_8; + } + // We don't push RA on stack in prepare_invoke. + // offset += wordSize; // return PC is on stack + if(scale_reg==NOREG) return Address(SP, offset); + else { + dsll(scale_reg, scale_reg, scale_factor); + daddu(scale_reg, SP, scale_reg); + return Address(scale_reg, offset); + } +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld(dst, src); break; + case 4: lw(dst, src); break; + case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; + case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: sd(src, dst); break; + case 4: sw(src, dst); break; + case 2: sh(src, dst); break; + case 1: sb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp, AT); + assert_different_registers(method_result, intf_klass, scan_temp, AT); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); + + // %%% Could store the aligned, prescaled offset in the klassoop. + dsll(scan_temp, scan_temp, times_vte_scale); + daddu(scan_temp, recv_klass, scan_temp); + daddiu(scan_temp, scan_temp, vtable_base); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for InstanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (itable_index.is_constant()) { + set64(AT, (int)itable_index.is_constant()); + dsll(AT, AT, (int)Address::times_ptr); + } else { + dsll(AT, itable_index.as_register(), (int)Address::times_ptr); + } + daddu(AT, AT, recv_klass); + daddiu(recv_klass, AT, itentry_off); + } + + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + + if (peel) { + beq(intf_klass, method_result, found_method); + delayed()->nop(); + } else { + bne(intf_klass, method_result, search); + delayed()->nop(); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + beq(method_result, R0, L_no_such_interface); + delayed()->nop(); + daddiu(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + if (UseLEXT1) { + gsldx(method_result, recv_klass, scan_temp, 0); + } else { + daddu(AT, recv_klass, scan_temp); + ld(method_result, AT, 0); + } + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + Register tmp = GP; + push(tmp); + + if (vtable_index.is_constant()) { + assert_different_registers(recv_klass, method_result, tmp); + } else { + assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); + } + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + if (vtable_index.is_constant()) { + set64(AT, vtable_index.as_constant()); + dsll(AT, AT, (int)Address::times_ptr); + } else { + dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); + } + set64(tmp, base + vtableEntry::method_offset_in_bytes()); + daddu(tmp, tmp, AT); + daddu(tmp, tmp, recv_klass); + ld(method_result, tmp, 0); + + pop(tmp); +} + +void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { + switch (type) { + case T_LONG: + st_ptr(src_reg, tmp_reg, disp); + break; + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { + sw(src_reg, tmp_reg, disp); + } else { + st_ptr(src_reg, tmp_reg, disp); + } + break; + case T_ADDRESS: + st_ptr(src_reg, tmp_reg, disp); + break; + case T_INT: + sw(src_reg, tmp_reg, disp); + break; + case T_CHAR: + case T_SHORT: + sh(src_reg, tmp_reg, disp); + break; + case T_BYTE: + case T_BOOLEAN: + sb(src_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } +} + +void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { + switch (type) { + case T_DOUBLE: + sdc1(src_reg, tmp_reg, disp); + break; + case T_FLOAT: + swc1(src_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); + } +} + +void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { + switch (type) { + case T_LONG: + ld_ptr(dst_reg, tmp_reg, disp); + break; + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { + lwu(dst_reg, tmp_reg, disp); + } else { + ld_ptr(dst_reg, tmp_reg, disp); + } + break; + case T_ADDRESS: + if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { + lwu(dst_reg, tmp_reg, disp); + } else { + ld_ptr(dst_reg, tmp_reg, disp); + } + break; + case T_INT: + lw(dst_reg, tmp_reg, disp); + break; + case T_CHAR: + lhu(dst_reg, tmp_reg, disp); + break; + case T_SHORT: + lh(dst_reg, tmp_reg, disp); + break; + case T_BYTE: + case T_BOOLEAN: + lb(dst_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { + int code_offset = 0; + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } + + return code_offset; +} + +#ifdef COMPILER2 +// Compare strings, used for char[] and byte[]. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + int ae) { + Label L, Loop, haveResult, done; + + bool isLL = ae == StrIntrinsicNode::LL; + bool isLU = ae == StrIntrinsicNode::LU; + bool isUL = ae == StrIntrinsicNode::UL; + + bool str1_isL = isLL || isLU; + bool str2_isL = isLL || isUL; + + if (!str1_isL) srl(cnt1, cnt1, 1); + if (!str2_isL) srl(cnt2, cnt2, 1); + + // compute the and difference of lengths (in result) + subu(result, cnt1, cnt2); // result holds the difference of two lengths + + // compute the shorter length (in cnt1) + slt(AT, cnt2, cnt1); + movn(cnt1, cnt2, AT); + + // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register + bind(Loop); // Loop begin + beq(cnt1, R0, done); + if (str1_isL) { + delayed()->lbu(AT, str1, 0); + } else { + delayed()->lhu(AT, str1, 0); + } + + // compare current character + if (str2_isL) { + lbu(cnt2, str2, 0); + } else { + lhu(cnt2, str2, 0); + } + bne(AT, cnt2, haveResult); + delayed()->addiu(str1, str1, str1_isL ? 1 : 2); + addiu(str2, str2, str2_isL ? 1 : 2); + b(Loop); + delayed()->addiu(cnt1, cnt1, -1); // Loop end + + bind(haveResult); + subu(result, AT, cnt2); + + bind(done); +} + +// Compare char[] or byte[] arrays or substrings. +void MacroAssembler::arrays_equals(Register str1, Register str2, + Register cnt, Register tmp, Register result, + bool is_char) { + Label Loop, True, False; + + beq(str1, str2, True); // same char[] ? + delayed()->daddiu(result, R0, 1); + + beq(cnt, R0, True); + delayed()->nop(); // count == 0 + + bind(Loop); + + // compare current character + if (is_char) { + lhu(AT, str1, 0); + lhu(tmp, str2, 0); + } else { + lbu(AT, str1, 0); + lbu(tmp, str2, 0); + } + bne(AT, tmp, False); + delayed()->addiu(str1, str1, is_char ? 2 : 1); + addiu(cnt, cnt, -1); + bne(cnt, R0, Loop); + delayed()->addiu(str2, str2, is_char ? 2 : 1); + + b(True); + delayed()->nop(); + + bind(False); + daddiu(result, R0, 0); + + bind(True); +} +#endif // COMPILER2 + +void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { + switch (type) { + case T_DOUBLE: + ldc1(dst_reg, tmp_reg, disp); + break; + case T_FLOAT: + lwc1(dst_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { + int code_offset = 0; + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); + } + + return code_offset; +} + +void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { + const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); + STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code + // The inverted mask is sign-extended + move(AT, inverted_jweak_mask); + andr(possibly_jweak, AT, possibly_jweak); +} + +void MacroAssembler::resolve_jobject(Register value, + Register thread, + Register tmp) { + assert_different_registers(value, thread, tmp); + Label done, not_weak; + beq(value, R0, done); // Use NULL as-is. + delayed()->nop(); + move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. + andr(AT, value, AT); + beq(AT, R0, not_weak); + delayed()->nop(); + // Resolve jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); + verify_oop(value); + b(done); + delayed()->nop(); + bind(not_weak); + // Resolve (untagged) jobject. + access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); + verify_oop(value); + bind(done); +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp, + bool is_signed) { + switch (cmp) { + case EQ: + subu(AT, op1, op2); + movz(dst, src, AT); + break; + + case NE: + subu(AT, op1, op2); + movn(dst, src, AT); + break; + + case GT: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + movn(dst, src, AT); + break; + + case GE: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + movz(dst, src, AT); + break; + + case LT: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + movn(dst, src, AT); + break; + + case LE: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + movz(dst, src, AT); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (is_float) { + c_eq_s(op1, op2); + } else { + c_eq_d(op1, op2); + } + movt(dst, src); + break; + + case NE: + if (is_float) { + c_eq_s(op1, op2); + } else { + c_eq_d(op1, op2); + } + movf(dst, src); + break; + + case GT: + if (is_float) { + c_ule_s(op1, op2); + } else { + c_ule_d(op1, op2); + } + movf(dst, src); + break; + + case GE: + if (is_float) { + c_ult_s(op1, op2); + } else { + c_ult_d(op1, op2); + } + movf(dst, src); + break; + + case LT: + if (is_float) { + c_ult_s(op1, op2); + } else { + c_ult_d(op1, op2); + } + movt(dst, src); + break; + + case LE: + if (is_float) { + c_ule_s(op1, op2); + } else { + c_ule_d(op1, op2); + } + movt(dst, src); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (!is_float) { + c_eq_d(op1, op2); + movt_d(dst, src); + } else { + c_eq_s(op1, op2); + movt_s(dst, src); + } + break; + + case NE: + if (!is_float) { + c_eq_d(op1, op2); + movf_d(dst, src); + } else { + c_eq_s(op1, op2); + movf_s(dst, src); + } + break; + + case GT: + if (!is_float) { + c_ule_d(op1, op2); + movf_d(dst, src); + } else { + c_ule_s(op1, op2); + movf_s(dst, src); + } + break; + + case GE: + if (!is_float) { + c_ult_d(op1, op2); + movf_d(dst, src); + } else { + c_ult_s(op1, op2); + movf_s(dst, src); + } + break; + + case LT: + if (!is_float) { + c_ult_d(op1, op2); + movt_d(dst, src); + } else { + c_ult_s(op1, op2); + movt_s(dst, src); + } + break; + + case LE: + if (!is_float) { + c_ule_d(op1, op2); + movt_d(dst, src); + } else { + c_ule_s(op1, op2); + movt_s(dst, src); + } + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + Label L; + + switch(cmp) { + case EQ: + bne(op1, op2, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case NE: + beq(op1, op2, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case GT: + slt(AT, op2, op1); + beq(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case GE: + slt(AT, op1, op2); + bne(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case LT: + slt(AT, op1, op2); + beq(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case LE: + slt(AT, op2, op1); + bne(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) { + switch (type) { + case STORE_BYTE: + gssbx(reg, base, index, disp); + break; + case STORE_CHAR: + case STORE_SHORT: + gsshx(reg, base, index, disp); + break; + case STORE_INT: + gsswx(reg, base, index, disp); + break; + case STORE_LONG: + gssdx(reg, base, index, disp); + break; + case LOAD_BYTE: + gslbx(reg, base, index, disp); + break; + case LOAD_SHORT: + gslhx(reg, base, index, disp); + break; + case LOAD_INT: + gslwx(reg, base, index, disp); + break; + case LOAD_LONG: + gsldx(reg, base, index, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) { + switch (type) { + case STORE_FLOAT: + gsswxc1(reg, base, index, disp); + break; + case STORE_DOUBLE: + gssdxc1(reg, base, index, disp); + break; + case LOAD_FLOAT: + gslwxc1(reg, base, index, disp); + break; + case LOAD_DOUBLE: + gsldxc1(reg, base, index, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { + switch (type) { + case STORE_BYTE: + sb(reg, base, disp); + break; + case STORE_CHAR: + case STORE_SHORT: + sh(reg, base, disp); + break; + case STORE_INT: + sw(reg, base, disp); + break; + case STORE_LONG: + sd(reg, base, disp); + break; + case LOAD_BYTE: + lb(reg, base, disp); + break; + case LOAD_U_BYTE: + lbu(reg, base, disp); + break; + case LOAD_SHORT: + lh(reg, base, disp); + break; + case LOAD_U_SHORT: + lhu(reg, base, disp); + break; + case LOAD_INT: + lw(reg, base, disp); + break; + case LOAD_U_INT: + lwu(reg, base, disp); + break; + case LOAD_LONG: + ld(reg, base, disp); + break; + case LOAD_LINKED_LONG: + lld(reg, base, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { + switch (type) { + case STORE_FLOAT: + swc1(reg, base, disp); + break; + case STORE_DOUBLE: + sdc1(reg, base, disp); + break; + case LOAD_FLOAT: + lwc1(reg, base, disp); + break; + case LOAD_DOUBLE: + ldc1(reg, base, disp); + break; + default: + ShouldNotReachHere(); + } +} diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp new file mode 100644 index 00000000000..55ec29e91bf --- /dev/null +++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp @@ -0,0 +1,818 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP + +#include "asm/assembler.hpp" +#include "runtime/rtmLocking.hpp" +#include "utilities/macros.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class Runtime1; // as_Address() + + public: + // Compare code + typedef enum { + EQ = 0x01, + NE = 0x02, + GT = 0x03, + GE = 0x04, + LT = 0x05, + LE = 0x06 + } CMCompare; + + protected: + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + #define VIRTUAL virtual + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than sp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // helpers for FPU flag access + // tmp is a temporary register, if none is available use noreg + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr); + + static intptr_t i[32]; + static float f[32]; + static void print(outputStream *s); + + static int i_offset(unsigned int k); + static int f_offset(unsigned int k); + + static void save_registers(MacroAssembler *masm); + static void restore_registers(MacroAssembler *masm); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + + // Support for inc/dec with optimal instruction selection depending on value + void incrementl(Register reg, int value = 1); + void decrementl(Register reg, int value = 1); + + + // Alignment + void align(int modulus); + + + // Stack frame creation/removal + void enter(); + void leave(); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + // thread in the default location (S6) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + void reset_last_Java_frame(Register thread, bool clear_fp); + + // thread in the default location (S6) + void reset_last_Java_frame(bool clear_fp); + + // jobjects + void clear_jweak_tag(Register possibly_jweak); + void resolve_jobject(Register value, Register thread, Register tmp); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + void resolve_oop_handle(Register result, Register tmp); + void load_mirror(Register dst, Register method, Register tmp); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp2); + + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register tmp2 = noreg, DecoratorSet decorators = 0); + + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + void encode_heap_oop(Register r); + void encode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r); + void decode_heap_oop(Register dst, Register src); + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + + // Returns the byte size of the instructions generated by decode_klass_not_null() + // when compressed klass pointers are being used. + static int instr_size_for_decode_klass_not_null(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_oop(Register dst, jobject obj); + + + + + // Sign extension + void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } + void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } + void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + void verify_oop_subroutine(); + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + + #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) + #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug(char* msg/*, RegistersForDebugging* regs*/); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void print_reg(Register reg); + void print_reg(FloatRegister reg); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = ""); + + void should_not_reach_here() { stop("should not reach here"); } + + void print_CPU_state(); + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + if (offset <= 32768) { + sw(A0, SP, -offset); + } else { + li(AT, offset); + dsubu(AT, SP, AT); + sw(A0, AT, 0); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + // Check for reserved stack access in method being exited (for JIT) + void reserved_stack_check(); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + void safepoint_poll(Label& slow_path, Register thread_reg); + void safepoint_poll_acquire(Label& slow_path, Register thread_reg); + + //void verify_tlab(); + void verify_tlab(Register t1, Register t2); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will + // be killed; if not supplied, push/pop will be used internally to + // allocate a temporary (inefficient, avoid if possible). + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); +#ifdef COMPILER2 + void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); + void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); +#endif + + + // Arithmetics + // Regular vs. d* versions + inline void addu_long(Register rd, Register rs, Register rt) { + daddu(rd, rs, rt); + } + inline void addu_long(Register rd, Register rs, long imm32_64) { + daddiu(rd, rs, imm32_64); + } + + void round_to(Register reg, int modulus) { + assert_different_registers(reg, AT); + increment(reg, modulus - 1); + move(AT, - modulus); + andr(reg, reg, AT); + } + + // the follow two might use AT register, be sure you have no meanful data in AT before you call them + void increment(Register reg, int imm); + void decrement(Register reg, int imm); + + void shl(Register reg, int sa) { dsll(reg, reg, sa); } + void shr(Register reg, int sa) { dsrl(reg, reg, sa); } + void sar(Register reg, int sa) { dsra(reg, reg, sa); } + + // Helper functions for statistics gathering. + void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); + + // Calls + void call(address entry); + void call(address entry, relocInfo::relocType rtype); + void call(address entry, RelocationHolder& rh); + + address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); + + // Emit the CompiledIC call idiom + void ic_call(address entry, jint method_index = 0); + + // Jumps + void jmp(address entry); + void jmp(address entry, relocInfo::relocType rtype); + void jmp_far(Label& L); // always long jumps + + /* branches may exceed 16-bit offset */ + void b_far(address entry); + void b_far(Label& L); + + void bne_far (Register rs, Register rt, address entry); + void bne_far (Register rs, Register rt, Label& L); + + void beq_far (Register rs, Register rt, address entry); + void beq_far (Register rs, Register rt, Label& L); + + // For C2 to support long branches + void beq_long (Register rs, Register rt, Label& L); + void bne_long (Register rs, Register rt, Label& L); + void bc1t_long (Label& L); + void bc1f_long (Label& L); + + void patchable_call(address target); + void general_call(address target); + + void patchable_jump(address target); + void general_jump(address target); + + static int insts_for_patchable_call(address target); + static int insts_for_general_call(address target); + + static int insts_for_patchable_jump(address target); + static int insts_for_general_jump(address target); + + // Floating + // Data + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs + inline void ld_ptr(Register rt, Address a) { + ld(rt, a); + } + + inline void ld_ptr(Register rt, Register base, int offset16) { + ld(rt, base, offset16); + } + + // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs + inline void st_ptr(Register rt, Address a) { + sd(rt, a); + } + + inline void st_ptr(Register rt, Register base, int offset16) { + sd(rt, base, offset16); + } + + void ld_ptr(Register rt, Register base, Register offset); + void st_ptr(Register rt, Register base, Register offset); + + // swap the two byte of the low 16-bit halfword + // this directive will use AT, be sure the high 16-bit of reg is zero + void hswap(Register reg); + void huswap(Register reg); + + // convert big endian integer to little endian integer + void swap(Register reg); + + // implement the x86 instruction semantic + // if c_reg == *dest then *dest <= x_reg + // else c_reg <= *dest + // the AT indicate if xchg occurred, 1 for xchged, else 0 + void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, + bool retold, bool barrier); + void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, + bool retold, bool barrier, Label& succ, Label* fail = NULL); + void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, + bool sign, bool retold, bool barrier); + void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); + void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); + + //pop & push + void extend_sign(Register rh, Register rl) { stop("extend_sign"); } + void neg(Register reg) { dsubu(reg, R0, reg); } + void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } + void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } + void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } + void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } + void pop () { daddiu(SP, SP, 8); } + void pop2 () { daddiu(SP, SP, 16); } + void push2(Register reg1, Register reg2); + void pop2 (Register reg1, Register reg2); + void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } + void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } + //we need 2 fun to save and resotre general register + void pushad(); + void popad(); + void pushad_except_v0(); + void popad_except_v0(); + + //move an 32-bit immediate to Register + void move(Register reg, int imm32) { li32(reg, imm32); } + void li (Register rd, long imm); + void li (Register rd, address addr) { li(rd, (long)addr); } + //replace move(Register reg, int imm) + void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 + void set64(Register d, jlong value); + static int insts_for_set64(jlong value); + + void patchable_set48(Register d, jlong value); + void patchable_set32(Register d, jlong value); + + void patchable_call32(Register d, jlong value); + + static int call_size(address target, bool far, bool patchable); + + static bool reachable_from_cache(address target); + static bool reachable_from_cache(); + + + void dli(Register rd, long imm) { li(rd, imm); } + void li64(Register rd, long imm); + void li48(Register rd, long imm); + + void move(Register rd, Register rs) { daddu(rd, rs, R0); } + void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } + void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + + void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); + void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); + void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); + void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); + void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); + void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); + int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); + int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); + +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch) { + jint stub_inst = *(jint*) branch; + print_instruction(stub_inst); + ::tty->print("%s", " (unresolved)"); + + } +#endif + + //FIXME + void empty_FPU_stack(){/*need implemented*/}; + +#ifdef COMPILER2 + // Compare strings. + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + int ae); + + // Compare char[] or byte[] arrays. + void arrays_equals(Register str1, Register str2, + Register cnt, Register tmp, Register result, + bool is_char); +#endif + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + // Conditional move + void cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_signed = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + +#undef VIRTUAL + +public: + +// Memory Data Type +#define INT_TYPE 0x100 +#define FLOAT_TYPE 0x200 +#define SIGNED_TYPE 0x10 +#define UNSIGNED_TYPE 0x20 + + typedef enum { + LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, + LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, + LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, + LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, + LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, + STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, + STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, + STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, + STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, + STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, + LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, + + LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, + LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, + LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, + + LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, + LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, + STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x3, + STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x4 + } CMLoadStoreDataType; + + void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { + assert((type & INT_TYPE), "must be General reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + + void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { + assert((type & FLOAT_TYPE), "must be Float reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + +private: + + template + void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { + if (index != 0) { + if (Assembler::is_simm16(disp)) { + if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) { + if (scale == 0) { + gs_loadstore(reg, as_Register(base), as_Register(index), disp, type); + } else { + dsll(AT, as_Register(index), scale); + gs_loadstore(reg, as_Register(base), AT, disp, type); + } + } else { + if (scale == 0) { + addu(AT, as_Register(base), as_Register(index)); + } else { + dsll(AT, as_Register(index), scale); + addu(AT, as_Register(base), AT); + } + loadstore(reg, AT, disp, type); + } + } else { + if (scale == 0) { + addu(AT, as_Register(base), as_Register(index)); + } else { + dsll(AT, as_Register(index), scale); + addu(AT, as_Register(base), AT); + } + move(RT9, disp); + if (UseLEXT1 && (type & SIGNED_TYPE)) { + gs_loadstore(reg, AT, RT9, 0, type); + } else { + addu(AT, AT, RT9); + loadstore(reg, AT, 0, type); + } + } + } else { + if (Assembler::is_simm16(disp)) { + loadstore(reg, as_Register(base), disp, type); + } else { + move(RT9, disp); + if (UseLEXT1 && (type & SIGNED_TYPE)) { + gs_loadstore(reg, as_Register(base), RT9, 0, type); + } else { + addu(AT, as_Register(base), RT9); + loadstore(reg, AT, 0, type); + } + } + } + } + void loadstore(Register reg, Register base, int disp, int type); + void loadstore(FloatRegister reg, Register base, int disp, int type); + void gs_loadstore(Register reg, Register base, Register index, int disp, int type); + void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type); +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { +private: + MacroAssembler* _masm; + Label _label; + +public: + inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) + : _masm(masm) { + _masm->li(AT, (address)flag_addr); + _masm->lb(AT, AT, 0); + if (value) { + _masm->bne(AT, R0, _label); + } else { + _masm->beq(AT, R0, _label); + } + _masm->delayed()->nop(); + } + + ~SkipIfEqual(); +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + + +#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp new file mode 100644 index 00000000000..92c05fb726a --- /dev/null +++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp new file mode 100644 index 00000000000..e9788ac52c3 --- /dev/null +++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp @@ -0,0 +1,576 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/preserveException.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ andr(temp, temp, AT); + __ move(AT, ref_kind); + __ beq(temp, AT, L); + __ delayed()->nop(); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ STOP(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == Rmethod, "interpreter calling convention"); + + Label L_no_such_method; + __ beq(method, R0, L_no_such_method); + __ delayed()->nop(); + + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + Register rthread = TREG; + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); + __ beq(AT, R0, run_compiled_code); + __ delayed()->nop(); + __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); + __ jr(T9); + __ delayed()->nop(); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld(T9, method, in_bytes(entry_offset)); + __ jr(T9); + __ delayed()->nop(); + + __ bind(L_no_such_method); + address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); + __ jmp(wrong_method, relocInfo::runtime_call_type); + __ delayed()->nop(); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == Rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); + __ verify_oop(method_temp); + __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + Address recv_addr = __ argument_address(temp2, -1); + __ ld(AT, recv_addr); + __ beq(recv, AT, L); + __ delayed()->nop(); + + recv_addr = __ argument_address(temp2, -1); + __ ld(V0, recv_addr); + __ STOP("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("empty stubs make SG sick"); + return NULL; + } + + // Rmethod: Method* + // T9: argument locator (parameter slot count, added to sp) + // S7: used as temp to hold mh or receiver + Register t9_argp = T9; // argument list ptr, live on error paths + Register s7_mh = S7; // MH receiver; dies quickly and is recycled + Register rm_method = Rmethod; // eventual target of this invocation + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); + guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); + __ addiu(AT, AT, -1 * (int) iid); + __ beq(AT, R0, L); + __ delayed()->nop(); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ STOP("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address t9_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld(t9_argp, Address(rm_method, Method::const_offset())); + __ load_sized_value(t9_argp, + Address(t9_argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + t9_first_arg_addr = __ argument_address(t9_argp, -1); + } else { + DEBUG_ONLY(t9_argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ld(s7_mh, t9_first_arg_addr); + DEBUG_ONLY(t9_argp = noreg); + } + + // t9_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register r_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld(r_recv = T2, t9_first_arg_addr); + } + DEBUG_ONLY(t9_argp = noreg); + Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now + __ pop(rm_member); // extract last argument + generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register rm_method = Rmethod; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + Register j_rarg5 = A4; + + Register temp1 = T8; + Register temp2 = T9; + Register temp3 = V0; + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + } + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); + Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz, temp3); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ load_heap_oop(rm_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ load_heap_oop(rm_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L_index_ok; + __ slt(AT, R0, temp2_index); + __ bne(AT, R0, L_index_ok); + __ delayed()->nop(); + __ STOP("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rm_index = rm_method; + __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L; + __ slt(AT, rm_index, R0); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ STOP("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rm_index, rm_method, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + + // Live at this point: + // rm_method + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r_recv be shifted out. + __ verify_method_ptr(rm_method); + jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); + __ jmp(icce_entry, relocInfo::runtime_call_type); + __ delayed()->nop(); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; + tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(entry_sp)); + + if (Verbose) { + tty->print_cr("Registers:"); + const int saved_regs_count = RegisterImpl::number_of_registers; + for (int i = 0; i < saved_regs_count; i++) { + Register r = as_Register(i); + // The registers are stored in reverse order on the stack (by pusha). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); + if ((i + 1) % 4 == 0) { + tty->cr(); + } else { + tty->print(", "); + } + } + tty->cr(); + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // Current C frame + frame cur_frame = os::current_frame(); + + // Robust search of trace_calling_frame (independant of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); + while (trace_calling_frame.fp() < saved_regs) { + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + intptr_t *dump_fp = trace_calling_frame.link(); + + bool walkable = has_mh; // whether the traced frame shoud be walkable + + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, dump_fp); + dump_frame.describe(values, 1); + } else { + // Stack may not be walkable (invalid PC above FP): + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp for #1"); + } + values.describe(-1, entry_sp, "raw top of stack"); + + tty->print_cr("Stack layout:"); + values.print(p); + } + if (has_mh && oopDesc::is_oop(mh)) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { + trace_method_handle_stub(args->adaptername, + args->mh, + args->saved_regs, + args->entry_sp); +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { +} +#endif //PRODUCT diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp new file mode 100644 index 00000000000..03b65fc8ef2 --- /dev/null +++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 150000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return I29; + } diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad new file mode 100644 index 00000000000..3563bbe0e59 --- /dev/null +++ b/src/hotspot/cpu/mips/mips.ad @@ -0,0 +1,25 @@ +// +// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad new file mode 100644 index 00000000000..b4acbd83f7f --- /dev/null +++ b/src/hotspot/cpu/mips/mips_64.ad @@ -0,0 +1,12243 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// GodSon3 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +// format: +// reg_def name (call convention, c-call convention, ideal type, encoding); +// call convention : +// NS = No-Save +// SOC = Save-On-Call +// SOE = Save-On-Entry +// AS = Always-Save +// ideal type : +// see opto/opcodes.hpp for more info +// reg_class name (reg, ...); +// alloc_class name (reg, ...); +register %{ + +// General Registers +// Integer Registers + reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); + reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); + reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); + reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); + reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); + reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); + reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); + reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); + reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); + reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); + reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); + reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); + reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); + reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); + reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); + reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); + reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); + reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); + reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); + reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); + reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); + reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); + reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); + reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); + reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); + reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); + reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); + reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); + reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); + reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); + reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); + reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); + reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); + reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); + reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); + reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); + reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); + reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); + reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); + reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); + reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); + reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); + reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); + reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); + reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); + reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); + reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); + reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); + reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); + reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); + reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); + +// Special Registers + reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); + reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); + reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); + reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); + reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); + reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); + reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); + reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); + reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); + reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); + +// Floating registers. +reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); +reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); +reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); +reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); +reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); +reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); +reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); +reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); +reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); +reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); +reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); +reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); +reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); +reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); +reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); +reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); +reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); +reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); +reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); +reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); +reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); +reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); +reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); +reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); +reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); +reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); +reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); +reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); +reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); +reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); +reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); +reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); +reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); +reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); +reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); +reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); +reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); +reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); +reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); +reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); +reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); +reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); +reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); +reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); +reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); +reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); +reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); +reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); +reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); +reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); +reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); +reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); +reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); +reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); +reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); +reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); +reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); +reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); +reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); +reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); +reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); +reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); +reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); +reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); + + +// ---------------------------- +// Special Registers +//S6 is used for get_thread(S6) +//S5 is uesd for heapbase of compressed oop +alloc_class chunk0( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S5, S5_H, + S6, S6_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T8, T8_H, + T9, T9_H, + T1, T1_H, // inline_cache_reg + V1, V1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + V0, V0_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + GP, GP_H + RA, RA_H, + SP, SP_H, // stack_pointer + FP, FP_H // frame_pointer + ); + +alloc_class chunk1( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F19, F19_H, + F18, F18_H, + F17, F17_H, + F16, F16_H, + F15, F15_H, + F14, F14_H, + F13, F13_H, + F12, F12_H, + F29, F29_H, + F30, F30_H, + F31, F31_H); + +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); +reg_class s0_reg( S0 ); +reg_class s1_reg( S1 ); +reg_class s2_reg( S2 ); +reg_class s3_reg( S3 ); +reg_class s4_reg( S4 ); +reg_class s5_reg( S5 ); +reg_class s6_reg( S6 ); +reg_class s7_reg( S7 ); + +reg_class t_reg( T0, T1, T2, T3, T8, T9 ); +reg_class t0_reg( T0 ); +reg_class t1_reg( T1 ); +reg_class t2_reg( T2 ); +reg_class t3_reg( T3 ); +reg_class t8_reg( T8 ); +reg_class t9_reg( T9 ); + +reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); +reg_class a0_reg( A0 ); +reg_class a1_reg( A1 ); +reg_class a2_reg( A2 ); +reg_class a3_reg( A3 ); +reg_class a4_reg( A4 ); +reg_class a5_reg( A5 ); +reg_class a6_reg( A6 ); +reg_class a7_reg( A7 ); + +reg_class v0_reg( V0 ); +reg_class v1_reg( V1 ); + +reg_class sp_reg( SP, SP_H ); +reg_class fp_reg( FP, FP_H ); + +reg_class v0_long_reg( V0, V0_H ); +reg_class v1_long_reg( V1, V1_H ); +reg_class a0_long_reg( A0, A0_H ); +reg_class a1_long_reg( A1, A1_H ); +reg_class a2_long_reg( A2, A2_H ); +reg_class a3_long_reg( A3, A3_H ); +reg_class a4_long_reg( A4, A4_H ); +reg_class a5_long_reg( A5, A5_H ); +reg_class a6_long_reg( A6, A6_H ); +reg_class a7_long_reg( A7, A7_H ); +reg_class t0_long_reg( T0, T0_H ); +reg_class t1_long_reg( T1, T1_H ); +reg_class t2_long_reg( T2, T2_H ); +reg_class t3_long_reg( T3, T3_H ); +reg_class t8_long_reg( T8, T8_H ); +reg_class t9_long_reg( T9, T9_H ); +reg_class s0_long_reg( S0, S0_H ); +reg_class s1_long_reg( S1, S1_H ); +reg_class s2_long_reg( S2, S2_H ); +reg_class s3_long_reg( S3, S3_H ); +reg_class s4_long_reg( S4, S4_H ); +reg_class s5_long_reg( S5, S5_H ); +reg_class s6_long_reg( S6, S6_H ); +reg_class s7_long_reg( S7, S7_H ); + +reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); + +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); + +reg_class p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class no_T8_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class long_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + + +// Floating point registers. +// F31 are not used as temporary registers in D2I +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); +reg_class dbl_reg( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F31, F31_H); + +reg_class flt_arg0( F12 ); +reg_class dbl_arg0( F12, F12_H ); +reg_class dbl_arg1( F14, F14_H ); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + + // Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + // we use jr instruction to construct call, so more expensive + int_def CALL_COST ( 500, DEFAULT_COST * 5); +/* + int_def EQUAL ( 1, 1 ); + int_def NOT_EQUAL ( 2, 2 ); + int_def GREATER ( 3, 3 ); + int_def GREATER_EQUAL ( 4, 4 ); + int_def LESS ( 5, 5 ); + int_def LESS_EQUAL ( 6, 6 ); +*/ +%} + + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + // NativeCall instruction size is the same as NativeJump. + // exception handler starts out as jump and can be patched to + // a call be deoptimization. (4932387) + // Note that this value is also credited (in output.cpp) to + // the size of the code section. + int size = NativeCall::instruction_size; + const uintx m = 16 - 1; + return mask_bits(size + m, ~m); + //return round_to(size, 16); + } + + static uint size_deopt_handler() { + int size = NativeCall::instruction_size; + const uintx m = 16 - 1; + return mask_bits(size + m, ~m); + //return round_to(size, 16); + } +}; + +%} // end source_hpp + +source %{ + +#define NO_INDEX 0 +#define RELOC_IMM64 Assembler::imm_operand +#define RELOC_DISP32 Assembler::disp32_operand + + +#define __ _masm. + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + + +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_exception_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); + __ align(16); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_deopt_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call(SharedRuntime::deopt_blob()->unpack()); + __ align(16); + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + if (!UseCountLeadingZerosInstructionMIPS64) + return false; + break; + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UseCountTrailingZerosInstructionMIPS64) + return false; + break; + } + + return true; // Per default match rules are supported. +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + +const bool Matcher::has_predicated_vectors(void) { + return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + Unimplemented(); + return default_pressure_threshold; +} + +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + int offs = offset - br_size + 4; + // To be conservative on MIPS + // branch node should be end with: + // branch inst + // delay slot + const int safety_zone = 3 * BytesPerInstWord; + return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); +} + + +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// No CMOVF/CMOVD with SSE2 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +const bool Matcher::need_masked_shift_count = false; + +bool Matcher::narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; +} + +bool Matcher::const_oop_prefer_decode() { + // Prefer ConN+DecodeN over ConP. + return true; +} + +bool Matcher::const_klass_prefer_decode() { + // TODO: Either support matching DecodeNKlass (heap-based) in operand + // or condisider the following: + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + //return Universe::narrow_klass_base() == NULL; + return true; +} + +// This is UltraSparc specific, true just means we have fast l2f conversion +const bool Matcher::convL2FSupported(void) { + return true; +} + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + if (MaxVectorSize == 0) + return 0; + assert(MaxVectorSize == 8, ""); + return 8; +} + +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); + switch(size) { + case 8: return Op_VecD; + } + ShouldNotReachHere(); + return 0; +} + +// Only lowest bits of xmm reg are used for vector shift count. +const uint Matcher::vector_shift_count_ideal_reg(int size) { + fatal("vector shift is not supported"); + return Node::NotAMachineReg; +} + + +const bool Matcher::convi2l_type_required = true; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// MIPS supports misaligned vectors store/load? FIXME +const bool Matcher::misaligned_vectors_ok() { + return false; + //return !AlignVector; // can be changed by flag +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + return true; +} + + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() + if ( reg == T0_num || reg == T0_H_num + || reg == A0_num || reg == A0_H_num + || reg == A1_num || reg == A1_H_num + || reg == A2_num || reg == A2_H_num + || reg == A3_num || reg == A3_H_num + || reg == A4_num || reg == A4_H_num + || reg == A5_num || reg == A5_H_num + || reg == A6_num || reg == A6_H_num + || reg == A7_num || reg == A7_H_num ) + return true; + + if ( reg == F12_num || reg == F12_H_num + || reg == F13_num || reg == F13_H_num + || reg == F14_num || reg == F14_H_num + || reg == F15_num || reg == F15_H_num + || reg == F16_num || reg == F16_H_num + || reg == F17_num || reg == F17_H_num + || reg == F18_num || reg == F18_H_num + || reg == F19_num || reg == F19_H_num ) + return true; + + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + +// MIPS doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +int CallStaticJavaDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallLeafDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +int CallRuntimeDirectNode::compute_padding(int current_offset) const { + const uintx m = alignment_required() - 1; + return mask_bits(current_offset + m, ~m) - current_offset; +} + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = false; +// Do floats take an entire double register or just half? +//const bool Matcher::float_in_double = true; +bool Matcher::float_in_double() { return false; } +// Do ints take an entire long register or just half? +const bool Matcher::int_in_long = true; +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Indicate if the safepoint node needs the polling page as an input. +// it does if the polling page is more than disp32 away. +bool SafePointNode::needs_polling_address_input() { + return SafepointMechanism::uses_thread_local_poll(); +} + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("BRK"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + __ brk(5); +} + +uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + return MachNode::size(ra_); +} + + + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +int MachCallStaticJavaNode::ret_addr_offset() { + //lui + //ori + //nop + //nop + //jalr + //nop + return 24; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + //lui IC_Klass, + //ori IC_Klass, + //dsll IC_Klass + //ori IC_Klass + + //lui T9 + //ori T9 + //nop + //nop + //jalr T9 + //nop + return 4 * 4 + 4 * 6; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if( !OptoReg::is_valid(reg) ) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + + if( src_first == dst_first && src_second == dst_second ) + return 0; // Self copy, no move + + if (src_first_rc == rc_stack) { + // mem -> + if (dst_first_rc == rc_stack) { + // mem -> mem + assert(src_second != dst_first, "overlap"); + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld(AT, Address(SP, src_offset)); + __ sd(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" + "sd AT, [SP + #%d]", + src_offset, dst_offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + // No pushl/popl, so: + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ lw(AT, Address(SP, src_offset)); + __ sw(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("lw AT, [SP + #%d] spill 2\n\t" + "sw AT, [SP + #%d]\n\t", + src_offset, dst_offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // mem -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ld %s, [SP + #%d]\t# spill 3", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + else + __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + if (this->ideal_reg() == Op_RegI) + st->print("lw %s, [SP + #%d]\t# spill 4", + Matcher::regName[dst_first], + offset); + else + st->print("lwu %s, [SP + #%d]\t# spill 5", + Matcher::regName[dst_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // mem-> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ldc1 %s, [SP + #%d]\t# spill 6", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("lwc1 %s, [SP + #%d]\t# spill 7", + Matcher::regName[dst_first], + offset); +#endif + } + } + return 0; + } + } else if (src_first_rc == rc_int) { + // gpr -> + if (dst_first_rc == rc_stack) { + // gpr -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("sd %s, [SP + #%d] # spill 8", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("sw %s, [SP + #%d]\t# spill 9", + Matcher::regName[src_first], offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // gpr -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ move(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("move(64bit) %s <-- %s\t# spill 10", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); + else + __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("move(32-bit) %s <-- %s\t# spill 11", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } + } else if (dst_first_rc == rc_float) { + // gpr -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("dmtc1 %s, %s\t# spill 12", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("mtc1 %s, %s\t# spill 13", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } else if (src_first_rc == rc_float) { + // xmm -> + if (dst_first_rc == rc_stack) { + // xmm -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("sdc1 %s, [SP + #%d]\t# spill 14", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("swc1 %s, [SP + #%d]\t# spill 15", + Matcher::regName[src_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // xmm -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("dmfc1 %s, %s\t# spill 16", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("mfc1 %s, %s\t# spill 17", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // xmm -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("mov_d %s <-- %s\t# spill 18", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("mov_s %s <-- %s\t# spill 19", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } + + assert(0," foo "); + Unimplemented(); + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile *C = ra_->C; + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); + st->print("\t"); + if (UseLEXT1) { + st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); + } else { + st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); + st->print("\t"); + st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); + } + + if( do_polling() && C->is_method_compilation() ) { + st->print("\t"); + if (SafepointMechanism::uses_thread_local_poll()) { + st->print_cr("ld AT, poll_offset[thread] #polling_page_address\n\t" + "lw AT, [AT]\t" + "# Safepoint: poll for GC"); + } else { + st->print_cr("Poll Safepoint # MachEpilogNode"); + } + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile *C = ra_->C; + MacroAssembler _masm(&cbuf); + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int"); + + if (UseLEXT1) { + __ gslq(RA, FP, SP, framesize - wordSize * 2); + } else { + __ ld(RA, SP, framesize - wordSize ); + __ ld(FP, SP, framesize - wordSize * 2); + } + __ daddiu(SP, SP, framesize); + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if( do_polling() && C->is_method_compilation() ) { + if (SafepointMechanism::uses_thread_local_poll()) { + __ ld(AT, thread, in_bytes(Thread::polling_page_offset())); + __ relocate(relocInfo::poll_return_type); + __ lw(AT, AT, 0); + } else { + __ set64(AT, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + __ lw(AT, AT, 0); + } + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug +} + +int MachEpilogNode::reloc() const { + return 0; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { return 0; } + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); +} +#endif + + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + return 4; +} + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + __ addiu(as_Register(reg), SP, offset); +} + + +//static int sizeof_FFree_Float_Stack_All = -1; + +int MachCallRuntimeNode::ret_addr_offset() { + //lui + //ori + //dsll + //ori + //jalr + //nop + assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); + return NativeCall::instruction_size; +} + + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + int i = 0; + for(i = 0; i < _count; i++) + __ nop(); +} + +uint MachNopNode::size(PhaseRegAlloc *) const { + return 4 * _count; +} +const Pipeline* MachNopNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + st->print_cr("load_klass(T9, T0)"); + st->print_cr("\tbeq(T9, iCache, L)"); + st->print_cr("\tnop"); + st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); + st->print_cr("\tnop"); + st->print_cr("\tnop"); + st->print_cr(" L:"); +} +#endif + + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int ic_reg = Matcher::inline_cache_reg_encode(); + Label L; + Register receiver = T0; + Register iCache = as_Register(ic_reg); + + __ load_klass(T9, receiver); + __ beq(T9, iCache, L); + __ delayed()->nop(); + __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ bind(L); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register Rtoc = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = __ code()->consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be equal"); + + if (consts_section->size()) { + // Materialize the constant table base. + address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); + // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); + __ relocate(relocInfo::internal_word_type); + __ patchable_set48(Rtoc, (long)baseaddr); + } +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + // patchable_set48 (4 insts) + return 4 * 4; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Register r = as_Register(ra_->get_encode(this)); + st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); +} +#endif + + +//============================================================================= +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile* C = ra_->C; + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("# stack bang"); st->print("\t"); + } + if (UseLEXT1) { + st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + } else { + st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); + st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + } + st->print("daddiu FP, SP, -%d \n\t", wordSize*2); + st->print("daddiu SP, SP, -%d \t",framesize); +} +#endif + + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int"); + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + __ daddiu(SP, SP, -framesize); + if (UseLEXT1) { + __ gssq(RA, FP, SP, framesize - wordSize * 2); + } else { + __ sd(RA, SP, framesize - wordSize); + __ sd(FP, SP, framesize - wordSize * 2); + } + __ daddiu(FP, SP, framesize - wordSize * 2); + + C->set_frame_complete(cbuf.insts_size()); + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachPrologNode::reloc() const { + return 0; // a large enough number +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes generate functions which are called by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// Instructions specify two basic values for encoding. They use the +// ins_encode keyword to specify their encoding class (which must be one of +// the class names specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf + MacroAssembler _masm(&cbuf); + // This is the instruction starting address for relocation info. + __ block_comment("Java_To_Runtime"); + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call((address)$meth$$method); + %} + + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + MacroAssembler _masm(&cbuf); + address addr = (address)$meth$$method; + address call; + __ block_comment("Java_Static_Call"); + + if ( !_method ) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); + + // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + + // + // [Ref: LIR_Assembler::ic_call() ] + // + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL + MacroAssembler _masm(&cbuf); + __ block_comment("Java_Dynamic_Call"); + __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); + %} + + + enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ + Register result = $result$$Register; + Register sub = $sub$$Register; + Register super = $super$$Register; + Register length = $tmp$$Register; + Register tmp = T9; + Label miss; + + // result may be the same as sub + // 47c B40: # B21 B41 <- B20 Freq: 0.155379 + // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 + // 4bc mov S2, NULL #@loadConP + // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 + // + MacroAssembler _masm(&cbuf); + Label done; + __ check_klass_subtype_slow_path(sub, super, length, tmp, + NULL, &miss, + /*set_cond_codes:*/ true); + // Refer to X86_64's RDI + __ move(result, 0); + __ b(done); + __ delayed()->nop(); + + __ bind(miss); + __ move(result, 1); + __ bind(done); + %} + +%} + + +//---------MIPS FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add SharedInfo::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | old | | 3 +// | | SP-+--------+----> Matcher::_old_SP, even aligned +// v | | ret | 3 return address +// Owned by +--------+ +// Self | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> SharedInfo::stack0, even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by new | | +// Callee SP-+--------+----> Matcher::_new_SP, even aligned +// | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + + +frame %{ + + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention + // for more information. + + inline_cache_reg(T1); // Inline Cache Register + interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset32); + + // Number of stack slots consumed by locking an object + // generate Compile::sync_stack_slots + sync_stack_slots(2); + + frame_pointer(SP); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + + interpreter_frame_pointer(FP); + + // generate Matcher::stack_alignment + stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); + return_addr(REG RA); + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) + // StartNode::calling_convention call this. + calling_convention %{ + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + + + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // SEE CallRuntimeNode::calling_convention for more information. + c_calling_convention %{ + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + + // Location of C & interpreter return values + // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. + // SEE Matcher::match. + c_return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + + // Location of return values + // register(s) contain(s) return value for Op_StartC2I and Op_Start. + // SEE Matcher::match. + + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(0); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(100); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_pc_relative(0); // Required PC Relative flag +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) + // specifies the alignment that some part of the instruction (not + // necessarily the start) requires. If > 1, a compute_padding() + // function must be provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors +operand vecD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand FlagsReg() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegFlags); + + format %{ "T0" %} + interface(REG_INTER); +%} + +//----------Simple Operands---------------------------------------------------- +// TODO: Should we need to define some more special immediate number ? +// Immediate Operands +// Integer Immediate +operand immI() %{ + match(ConI); + // TODO: should not match immI8 here LEE + match(immI8); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immI8() %{ + predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI16() %{ + predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M65536() %{ + predicate(n->get_int() == -65536); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immI_M1() %{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for test vs zero +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for increment +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constants for increment +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for long shifts +operand immI_32() %{ + predicate(n->get_int() == 32); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immI_255() %{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_MaxI() %{ + predicate(n->get_int() == 2147483647); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M32767_32768() %{ + predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes +operand immI_0_3() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_31() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_32767() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 32767); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_65535() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 65535); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive integer mask +operand immI_nonneg_mask() %{ + predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate +operand immL() %{ + match(ConL); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 8-bit +operand immL8() %{ + predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer for polling page +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + op_cost(5); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL16() %{ + predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit signed +operand immL32() %{ + predicate(n->get_long() == (int)(n->get_long())); + match(ConL); + + op_cost(15); + format %{ %} + interface(CONST_INTER); +%} + +// bit 3..6 zero +operand immL_M121() %{ + predicate(n->get_long() == -121L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..2 zero +operand immL_M8() %{ + predicate(n->get_long() == -8L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 1..2 zero +operand immL_M7() %{ + predicate(n->get_long() == -7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 2 zero +operand immL_M5() %{ + predicate(n->get_long() == -5L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..1 zero +operand immL_M4() %{ + predicate(n->get_long() == -4L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M1() %{ + predicate(n->get_long() == -1L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate zero +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_7() %{ + predicate(n->get_long() == 7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_MaxUI() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(20); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M32767_32768() %{ + predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_0_65535() %{ + predicate(n->get_long() >= 0 && n->get_long() <= 65535); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive long mask +operand immL_nonneg_mask() %{ + predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP() %{ + match(ConP); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP_0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +operand immP_no_oop_cheap() %{ + predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN_0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point zero +operand immF_0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point immediate +operand immD() %{ + match(ConD); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point zero +operand immD_0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Register Operands +// Integer Register +operand mRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegI() %{ + constraint(ALLOC_IN_RC(no_Ax_int_reg)); + match(RegI); + match(mRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand mS0RegI() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegI); + match(mRegI); + + format %{ "S0" %} + interface(REG_INTER); +%} + +operand mS1RegI() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegI); + match(mRegI); + + format %{ "S1" %} + interface(REG_INTER); +%} + +operand mS3RegI() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegI); + match(mRegI); + + format %{ "S3" %} + interface(REG_INTER); +%} + +operand mS4RegI() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegI); + match(mRegI); + + format %{ "S4" %} + interface(REG_INTER); +%} + +operand mS5RegI() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegI); + match(mRegI); + + format %{ "S5" %} + interface(REG_INTER); +%} + +operand mS6RegI() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegI); + match(mRegI); + + format %{ "S6" %} + interface(REG_INTER); +%} + +operand mS7RegI() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegI); + match(mRegI); + + format %{ "S7" %} + interface(REG_INTER); +%} + + +operand mT0RegI() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegI); + match(mRegI); + + format %{ "T0" %} + interface(REG_INTER); +%} + +operand mT1RegI() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegI); + match(mRegI); + + format %{ "T1" %} + interface(REG_INTER); +%} + +operand mT2RegI() %{ + constraint(ALLOC_IN_RC(t2_reg)); + match(RegI); + match(mRegI); + + format %{ "T2" %} + interface(REG_INTER); +%} + +operand mT3RegI() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegI); + match(mRegI); + + format %{ "T3" %} + interface(REG_INTER); +%} + +operand mT8RegI() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegI); + match(mRegI); + + format %{ "T8" %} + interface(REG_INTER); +%} + +operand mT9RegI() %{ + constraint(ALLOC_IN_RC(t9_reg)); + match(RegI); + match(mRegI); + + format %{ "T9" %} + interface(REG_INTER); +%} + +operand mA0RegI() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegI); + match(mRegI); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand mA1RegI() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegI); + match(mRegI); + + format %{ "A1" %} + interface(REG_INTER); +%} + +operand mA2RegI() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegI); + match(mRegI); + + format %{ "A2" %} + interface(REG_INTER); +%} + +operand mA3RegI() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegI); + match(mRegI); + + format %{ "A3" %} + interface(REG_INTER); +%} + +operand mA4RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(mRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + +operand mA5RegI() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegI); + match(mRegI); + + format %{ "A5" %} + interface(REG_INTER); +%} + +operand mA6RegI() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegI); + match(mRegI); + + format %{ "A6" %} + interface(REG_INTER); +%} + +operand mA7RegI() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegI); + match(mRegI); + + format %{ "A7" %} + interface(REG_INTER); +%} + +operand mV0RegI() %{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegI); + match(mRegI); + + format %{ "V0" %} + interface(REG_INTER); +%} + +operand mV1RegI() %{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegI); + match(mRegI); + + format %{ "V1" %} + interface(REG_INTER); +%} + +operand mRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegN() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegN() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegN() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegN() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegN() %{ + constraint(ALLOC_IN_RC(t9_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegN() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegN() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegN() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegN() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegN() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a5_RegN() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegN() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegN() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s0_RegN() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegN() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegN() %{ + constraint(ALLOC_IN_RC(s2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegN() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegN() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegN() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegN() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegN() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegN() %{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegN() %{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand mRegP() %{ + constraint(ALLOC_IN_RC(p_reg)); + match(RegP); + match(a0_RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_T8_mRegP() %{ + constraint(ALLOC_IN_RC(no_T8_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegP() +%{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegP() +%{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegP() +%{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegP() +%{ + constraint(ALLOC_IN_RC(s5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegP() +%{ + constraint(ALLOC_IN_RC(s6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegP() +%{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegP() +%{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegP() +%{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t2_RegP() +%{ + constraint(ALLOC_IN_RC(t2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegP() +%{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegP() +%{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegP() +%{ + constraint(ALLOC_IN_RC(t9_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegP() +%{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegP() +%{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegP() +%{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + + +operand a5_RegP() +%{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegP() +%{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegP() +%{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegP() +%{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegP() +%{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +/* +operand mSPRegP(mRegP reg) %{ + constraint(ALLOC_IN_RC(sp_reg)); + match(reg); + + format %{ "SP" %} + interface(REG_INTER); +%} + +operand mFPRegP(mRegP reg) %{ + constraint(ALLOC_IN_RC(fp_reg)); + match(reg); + + format %{ "FP" %} + interface(REG_INTER); +%} +*/ + +operand mRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v0RegL() %{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v1RegL() %{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a0RegL() %{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegL); + match(mRegL); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand a1RegL() %{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a2RegL() %{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a3RegL() %{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t0RegL() %{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t1RegL() %{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t3RegL() %{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t8RegL() %{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a4RegL() %{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a5RegL() %{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a6RegL() %{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a7RegL() %{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s0RegL() %{ + constraint(ALLOC_IN_RC(s0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s1RegL() %{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s3RegL() %{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s4RegL() %{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s7RegL() %{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Floating register operands +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +//Double Precision Floating register operands +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- +// Indirect Memory Operand +operand indirect(mRegP reg) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(reg); + + format %{ "[$reg] @ indirect" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset8(mRegP reg, immL8 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg off); + + op_cost(10); + format %{ "[$reg + $off (8-bit)] @ indOffset8" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg (LShiftL lreg scale)); + + op_cost(10); + format %{"[$reg + $lreg << $scale] @ indIndexScale" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + + +// [base + index + offset] +operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(5); + match(AddP (AddP base index) off); + + format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($off); + %} +%} + +// [base + index + offset] +operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(5); + match(AddP (AddP base (ConvI2L index)) off); + + format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($off); + %} +%} + +// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); + op_cost(10); + match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); + + format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale($scale); + disp($off); + %} +%} + +//FIXME: I think it's better to limit the immI to be 16-bit at most! +// Indirect Memory Plus Long Offset Operand +operand indOffset32(mRegP reg, immL32 off) %{ + constraint(ALLOC_IN_RC(p_reg)); + op_cost(20); + match(AddP reg off); + + format %{ "[$reg + $off (32-bit)] @ indOffset32" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register +operand indIndex(mRegP addr, mRegL index) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP addr index); + + op_cost(20); + format %{"[$addr + $index] @ indIndex" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +operand indirectNarrowKlass(mRegN reg) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeNKlass reg); + + format %{ "[$reg] @ indirectNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffset8NarrowKlass(mRegN reg, immL8 off) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeNKlass reg) off); + + format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +operand indOffset32NarrowKlass(mRegN reg, immL32 off) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeNKlass reg) off); + + format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) +%{ + predicate(UseLEXT1); + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (AddP (DecodeNKlass reg) lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +operand indIndexNarrowKlass(mRegN reg, mRegL lreg) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (DecodeNKlass reg) lreg); + + op_cost(10); + format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Operand +operand indirectNarrow(mRegN reg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeN reg); + + format %{ "[$reg] @ indirectNarrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset8Narrow(mRegN reg, immL8 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeN reg) off); + + format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) +%{ + predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (AddP (DecodeN reg) lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +// Comparision Code +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +// Comparision Code +// Comparison Code, unsigned compare. Used by FP also, with +// C2 (unordered) turned into GT or LT already. The other bits +// C0 and C3 are turned into Carry & Zero flags. +operand cmpOpU() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + + +//------------------------OPERAND CLASSES-------------------------------------- +//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); +opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); + + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +pipeline %{ + + //----------ATTRIBUTES--------------------------------------------------------- + attributes %{ + fixed_size_instructions; // Fixed size instructions + branch_has_delay_slot; // branch have delay slot in gs2 + max_instructions_per_bundle = 1; // 1 instruction per bundle + max_bundles_per_cycle = 4; // Up to 4 bundles per cycle + bundle_unit_size=4; + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( MachNop ); + %} + + //----------RESOURCES---------------------------------------------------------- + // Resources are the functional units available to the machine + + resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); + + //----------PIPELINE DESCRIPTION----------------------------------------------- + // Pipeline Description specifies the stages in the machine's pipeline + + // IF: fetch + // ID: decode + // RD: read + // CA: caculate + // WB: write back + // CM: commit + + pipe_desc(IF, ID, RD, CA, WB, CM); + + + //----------PIPELINE CLASSES--------------------------------------------------- + // Pipeline Classes describe the stages in which input and output are + // referenced by the hardware pipeline. + + //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ + single_instruction; + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+1; + DECODE : ID; + ALU : CA; + %} + + //No.19 Integer mult operation : dst <-- reg1 mult reg2 + pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer div operation : dst <-- reg1 div reg2 + pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer mod operation : dst <-- reg1 mod reg2 + pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //no.16 load Long from memory : + pipe_class ialu_loadL(mRegL dst, memory mem) %{ + instruction_count(2); + mem : RD(read); + dst : WB(write)+5; + DECODE : ID; + MEM : RD; + %} + + //No.17 Store Long to Memory : + pipe_class ialu_storeL(mRegL src, memory mem) %{ + instruction_count(2); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ + single_instruction; + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.3 Integer move operation : dst <-- reg + pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.4 No instructions : do nothing + pipe_class empty( ) %{ + instruction_count(0); + %} + + //No.5 UnConditional branch : + pipe_class pipe_jump( label labl ) %{ + multiple_bundles; + DECODE : ID; + BR : RD; + %} + + //No.6 ALU Conditional branch : + pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + + //no.7 load integer from memory : + pipe_class ialu_loadI(mRegI dst, memory mem) %{ + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.8 Store Integer to Memory : + pipe_class ialu_storeI(mRegI src, memory mem) %{ + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + + //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 + pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + //No.22 Floating div operation : dst <-- reg1 div reg2 + pipe_class fpu_div(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + pipe_class fcvt_I2D(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class fcvt_D2I(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class pipe_mfc1(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD; + %} + + pipe_class pipe_mtc1(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD(5); + %} + + //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 + pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + //No.11 Load Floating from Memory : + pipe_class fpu_loadF(regF dst, memory mem) %{ + instruction_count(1); + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.12 Store Floating to Memory : + pipe_class fpu_storeF(regF src, memory mem) %{ + instruction_count(1); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.13 FPU Conditional branch : + pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + +//No.14 Floating FPU reg operation : dst <-- op reg + pipe_class fpu1_regF(regF dst, regF src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + pipe_class long_memory_op() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(30); + %} + + pipe_class simple_call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + BR : RD; + %} + + pipe_class call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + %} + + //FIXME: + //No.9 Piple slow : for multi-instructions + pipe_class pipe_slow( ) %{ + instruction_count(20); + force_serialization; + multiple_bundles; + fixed_latency(50); + %} + +%} + + + +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// respectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + + +// Load Integer +instruct loadI(mRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + + ins_cost(125); + format %{ "lw $dst, $mem #@loadI" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadI_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(125); + format %{ "lw $dst, $mem #@loadI_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Integer (32 bit signed) to Byte (8 bit signed) +instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Short (16 bit signed) +instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + + ins_cost(125); + format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) +instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Long. +instruct loadL(mRegL dst, memory mem) %{ +// predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + + ins_cost(250); + format %{ "ld $dst, $mem #@loadL" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(mRegL dst, memory mem) %{ + match(Set dst (LoadL_unaligned mem)); + + // FIXME: Need more effective ldl/ldr + ins_cost(450); + format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Store Long +instruct storeL_reg(memory mem, mRegL src) %{ + match(Set mem (StoreL mem src)); + + ins_cost(200); + format %{ "sd $mem, $src #@storeL_reg\n" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +instruct storeL_immL_0(memory mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + + ins_cost(180); + format %{ "sd zero, $mem #@storeL_immL_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +// Load Compressed Pointer +instruct loadN(mRegN dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2P(mRegP dst, memory mem) +%{ + match(Set dst (DecodeN (LoadN mem))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# @ loadN2P" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Pointer +instruct loadP(mRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + + ins_cost(125); + format %{ "ld $dst, $mem #@loadP" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Klass Pointer +instruct loadKlass(mRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(mRegN dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2PKlass(mRegP dst, memory mem) +%{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Constant +instruct loadConI(mRegI dst, immI src) %{ + match(Set dst src); + + ins_cost(150); + format %{ "mov $dst, $src #@loadConI" %} + ins_encode %{ + Register dst = $dst$$Register; + int value = $src$$constant; + __ move(dst, value); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct loadConL_set64(mRegL dst, immL src) %{ + match(Set dst src); + ins_cost(120); + format %{ "li $dst, $src @ loadConL_set64" %} + ins_encode %{ + __ set64($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConL16(mRegL dst, immL16 src) %{ + match(Set dst src); + ins_cost(105); + format %{ "mov $dst, $src #@loadConL16" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + int value = $src$$constant; + __ daddiu(dst_reg, R0, value); + %} + ins_pipe( ialu_regL_regL ); +%} + + +instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ + match(Set dst src); + ins_cost(100); + format %{ "mov $dst, zero #@loadConL_immL_0" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + __ daddu(dst_reg, R0, R0); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Load Range +instruct loadRange(mRegI dst, memory mem) %{ + match(Set dst (LoadRange mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadRange" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct storeP(memory mem, mRegP src ) %{ + match(Set mem (StoreP mem src)); + + ins_cost(125); + format %{ "sd $src, $mem #@storeP" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +// Store NULL Pointer, mark word, or other simple pointer constant. +instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ + match(Set mem (StoreP mem zero)); + + ins_cost(125); + format %{ "mov $mem, $zero #@storeImmP_immP_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Compressed Pointer +instruct storeN(memory mem, mRegN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2N(memory mem, mRegP src) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# @ storeP2N" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeNKlass(memory mem, mRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2NKlass(memory mem, mRegP src) +%{ + match(Set mem (StoreNKlass mem (EncodePKlass src))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# @ storeP2NKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmN_immN_0(memory mem, immN_0 zero) +%{ + match(Set mem (StoreN mem zero)); + + ins_cost(125); // XXX + format %{ "storeN0 zero, $mem\t# compressed ptr" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Byte +instruct storeB_immB_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreB mem zero)); + + format %{ "mov $mem, zero #@storeB_immB_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeB(memory mem, mRegI src) %{ + match(Set mem (StoreB mem src)); + + ins_cost(125); + format %{ "sb $src, $mem #@storeB" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeB_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreB mem (ConvL2I src))); + + ins_cost(125); + format %{ "sb $src, $mem #@storeB_convL2I" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Byte (8bit signed) +instruct loadB(mRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + + ins_cost(125); + format %{ "lb $dst, $mem #@loadB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "lb $dst, $mem #@loadB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Byte (8bit UNsigned) +instruct loadUB(mRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + + ins_cost(125); + format %{ "lbu $dst, $mem #@loadUB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "lbu $dst, $mem #@loadUB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16bit signed) +instruct loadS(mRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "lh $dst, $mem #@loadS" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "lh $dst, $mem #@loadS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Integer Immediate +instruct storeI_immI_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreI mem zero)); + + format %{ "mov $mem, zero #@storeI_immI_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Integer +instruct storeI(memory mem, mRegI src) %{ + match(Set mem (StoreI mem src)); + + ins_cost(125); + format %{ "sw $mem, $src #@storeI" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeI_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreI mem (ConvL2I src))); + + ins_cost(125); + format %{ "sw $mem, $src #@storeI_convL2I" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(150); + format %{ "loadF $dst, $mem #@loadF" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadConP_general(mRegP dst, immP src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "li $dst, $src #@loadConP_general" %} + + ins_encode %{ + Register dst = $dst$$Register; + long* value = (long*)$src$$constant; + + if($src->constant_reloc() == relocInfo::metadata_type){ + int klass_index = __ oop_recorder()->find_index((Klass*)value); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec); + __ patchable_set48(dst, (long)value); + } else if($src->constant_reloc() == relocInfo::oop_type){ + int oop_index = __ oop_recorder()->find_index((jobject)value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + __ relocate(rspec); + __ patchable_set48(dst, (long)value); + } else if ($src->constant_reloc() == relocInfo::none) { + __ set64(dst, (long)value); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ + match(Set dst src); + + ins_cost(80); + format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} + + ins_encode %{ + __ set64($dst$$Register, $src$$constant); + %} + + ins_pipe(ialu_regI_regI); +%} + + +instruct loadConP_poll(mRegP dst, immP_poll src) %{ + match(Set dst src); + + ins_cost(50); + format %{ "li $dst, $src #@loadConP_poll" %} + + ins_encode %{ + Register dst = $dst$$Register; + intptr_t value = (intptr_t)$src$$constant; + + __ set64(dst, (jlong)value); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_immP_0(mRegP dst, immP_0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "mov $dst, R0\t# ptr" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + __ daddu(dst_reg, R0, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ + match(Set dst src); + format %{ "move $dst, R0\t# compressed NULL ptr" %} + ins_encode %{ + __ move($dst$$Register, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN(mRegN dst, immN src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_oop(dst, (jobject)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +instruct loadConNKlass(mRegN dst, immNKlass src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_klass(dst, (Klass*)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +//FIXME +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ + match(TailCall jump_target method_oop ); + ins_cost(300); + format %{ "JMP $jump_target \t# @TailCalljmpInd" %} + + ins_encode %{ + Register target = $jump_target$$Register; + Register oop = $method_oop$$Register; + + // RA will be used in generate_forward_exception() + __ push(RA); + + __ move(S3, oop); + __ jr(target); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( a0_RegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + + // use the following format syntax + format %{ "# exception oop is in A0; no code emitted @CreateException" %} + ins_encode %{ + // X86 leaves this function empty + __ block_comment("CreateException is empty in MIPS"); + %} + ins_pipe( empty ); +// ins_pipe( pipe_jump ); +%} + + +/* The mechanism of exception handling is clear now. + +- Common try/catch: + [stubGenerator_mips.cpp] generate_forward_exception() + |- V0, V1 are created + |- T9 <= SharedRuntime::exception_handler_for_return_address + `- jr T9 + `- the caller's exception_handler + `- jr OptoRuntime::exception_blob + `- here +- Rethrow(e.g. 'unwind'): + * The callee: + |- an exception is triggered during execution + `- exits the callee method through RethrowException node + |- The callee pushes exception_oop(T0) and exception_pc(RA) + `- The callee jumps to OptoRuntime::rethrow_stub() + * In OptoRuntime::rethrow_stub: + |- The VM calls _rethrow_Java to determine the return address in the caller method + `- exits the stub with tailjmpInd + |- pops exception_oop(V0) and exception_pc(V1) + `- jumps to the return address(usually an exception_handler) + * The caller: + `- continues processing the exception_blob with V0/V1 +*/ + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + // use the following format syntax + format %{ "JMP rethrow_stub #@RethrowException" %} + ins_encode %{ + __ block_comment("@ RethrowException"); + + cbuf.set_insts_mark(); + cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); + + // call OptoRuntime::rethrow_stub to get the exception handler in parent method + __ patchable_jump((address)OptoRuntime::rethrow_stub()); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Branch Instructions --- long offset versions + +// Jump Direct +instruct jmpDir_long(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_long" %} + + ins_encode %{ + Label* L = $labl$$label; + __ jmp_far(*L); + %} + + ins_pipe( pipe_jump ); + //ins_pc_relative(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + __ move(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} + + ins_encode %{ + Label* L = $labl$$label; + switch($cop$$cmpcode) { + case 0x01: //equal + __ bne_long($cr$$Register, R0, *L); + break; + case 0x02: //not equal + __ beq_long($cr$$Register, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +// Conditional jumps +instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_long" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_long" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1_reg, op2_reg, *L); + break; + case 0x02: //not_equal + __ bne_long(op1_reg, op2_reg, *L); + break; + case 0x03: //above + __ sltu(AT, op2_reg, op1_reg); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1_reg, op2_reg); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1_reg, op2_reg); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2_reg, op1_reg); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //above + __ sltu(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, AT); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, AT); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //greater + __ slt(AT, R0, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //greater_equal + __ slt(AT, op1, R0); + __ beq_long(AT, R0, *L); + break; + case 0x05: //less + __ slt(AT, op1, R0); + __ bne_long(R0, AT, *L); + break; + case 0x06: //less_equal + __ slt(AT, R0, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //greater + __ slt(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //greater_equal + __ slt(AT, op1, AT); + __ beq_long(AT, R0, *L); + break; + case 0x05: //less + __ slt(AT, op1, AT); + __ bne_long(R0, AT, *L); + break; + case 0x06: //less_equal + __ slt(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //above + __ bne_long(R0, op1, *L); + break; + case 0x04: //above_equal + __ beq_long(R0, R0, *L); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + __ beq_long(op1, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + ins_cost(180); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ move(AT, val); + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ move(AT, val); + __ bne_long(op1, AT, *L); + break; + case 0x03: //above + __ move(AT, val); + __ sltu(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltiu(AT, op1, val); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltiu(AT, op1, val); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ move(AT, val); + __ sltu(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match(If cmp (CmpUL src1 zero)); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + case 0x04: // greater_equal + case 0x06: // less_equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + case 0x03: // greater + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x05: // less + __ beq_long(R0, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: // equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +//FIXME +instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: // not_equal + __ c_eq_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: // greater + __ c_ule_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: // greater_equal + __ c_ult_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: // less + __ c_ult_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: // less_equal + __ c_ule_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + +instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: // not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ c_eq_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: // greater + __ c_ule_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: // greater_equal + __ c_ult_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: // less + __ c_ult_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: // less_equal + __ c_ule_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + + +// ============================================================================ +// Branch Instructions -- short offset versions + +// Jump Direct +instruct jmpDir_short(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + if(&L) + __ b(L); + else + __ b(int(0)); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + __ move(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + switch($cop$$cmpcode) { + case 0x01: //equal + if (&L) + __ bne($cr$$Register, R0, L); + else + __ bne($cr$$Register, R0, (int)0); + break; + case 0x02: //not equal + if (&L) + __ beq($cr$$Register, R0, L); + else + __ beq($cr$$Register, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Conditional jumps +instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_short" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1_reg, op2_reg, L); + else + __ beq(op1_reg, op2_reg, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1_reg, op2_reg, L); + else + __ bne(op1_reg, op2_reg, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2_reg, op1_reg); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1_reg, op2_reg); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1_reg, op2_reg); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2_reg, op1_reg); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //above + __ sltu(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, AT); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, AT); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, R0, L); + else + __ bne(op1, R0, (int)0); + break; + case 0x03: //greater + if(&L) + __ bgtz(op1, L); + else + __ bgtz(op1, (int)0); + break; + case 0x04: //greater_equal + if(&L) + __ bgez(op1, L); + else + __ bgez(op1, (int)0); + break; + case 0x05: //less + if(&L) + __ bltz(op1, L); + else + __ bltz(op1, (int)0); + break; + case 0x06: //less_equal + if(&L) + __ blez(op1, L); + else + __ blez(op1, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //greater + __ slt(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //greater_equal + __ slt(AT, op1, AT); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //less + __ slt(AT, op1, AT); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //less_equal + __ slt(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, R0, L); + else + __ bne(op1, R0, (int)0); + break; + case 0x03: //above + if(&L) + __ bne(R0, op1, L); + else + __ bne(R0, op1, (int)0); + break; + case 0x04: //above_equal + if(&L) + __ beq(R0, R0, L); + else + __ beq(R0, R0, (int)0); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + if(&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + ins_cost(180); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ move(AT, val); + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + __ move(AT, val); + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //above + __ move(AT, val); + __ sltu(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltiu(AT, op1, val); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltiu(AT, op1, val); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ move(AT, val); + __ sltu(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x02: // not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, R0, target); + else + __ beq(opr1_reg, R0, int(0)); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, R0, target); + else + __ bne(opr1_reg, R0, (int)0); + break; + + case 0x03: //greater + if(&target) + __ bgtz(opr1_reg, target); + else + __ bgtz(opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if(&target) + __ bgez(opr1_reg, target); + else + __ bgez(opr1_reg, (int)0); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, R0); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ blez(opr1_reg, target); + else + __ blez(opr1_reg, int(0)); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match(If cmp (CmpUL src1 zero)); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + case 0x04: // greater_equal + case 0x06: // less_equal + if (&target) + __ beq(opr1_reg, R0, target); + else + __ beq(opr1_reg, R0, int(0)); + break; + + case 0x02: // not_equal + case 0x03: // greater + if(&target) + __ bne(opr1_reg, R0, target); + else + __ bne(opr1_reg, R0, (int)0); + break; + + case 0x05: // less + if(&target) + __ beq(R0, R0, target); + else + __ beq(R0, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: // equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: // not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +//FIXME +instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label& L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_s(reg_op1, reg_op2); + if (&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x02: // not_equal + __ c_eq_s(reg_op1, reg_op2); + if (&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x03: // greater + __ c_ule_s(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x04: // greater_equal + __ c_ult_s(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x05: // less + __ c_ult_s(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x06: // less_equal + __ c_ule_s(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label& L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_d(reg_op1, reg_op2); + if (&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x02: // not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ c_eq_d(reg_op1, reg_op2); + if (&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x03: // greater + __ c_ule_d(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x04: // greater_equal + __ c_ult_d(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x05: // less + __ c_ult_d(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x06: // less_equal + __ c_ule_d(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +// =================== End of branch instructions ========================== + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime ); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,runtime #@CallRuntimeDirect" %} + ins_encode( Java_To_Runtime( meth ) ); + ins_pipe( pipe_slow ); + ins_alignment(16); +%} + + + +//------------------------MemBar Instructions------------------------------- +//Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(400); + + format %{ "MEMBAR-acquire @ membar_acquire" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(empty); +%} + +instruct load_fence() %{ + match(LoadFence); + ins_cost(400); + + format %{ "MEMBAR @ load_fence" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_acquire_lock() +%{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(400); + + format %{ "MEMBAR-release @ membar_release" %} + + ins_encode %{ + // Attention: DO NOT DELETE THIS GUY! + __ sync(); + %} + + ins_pipe(pipe_slow); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(400); + + format %{ "MEMBAR @ store_fence" %} + + ins_encode %{ + __ sync(); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_release_lock() +%{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} + ins_encode(); + ins_pipe(empty); +%} + + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(400); + + format %{ "MEMBAR-volatile" %} + ins_encode %{ + if( !os::is_MP() ) return; // Not needed on single CPU + __ sync(); + + %} + ins_pipe(pipe_slow); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + + ins_cost(400); + format %{ "MEMBAR-storestore @ membar_storestore" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(empty); +%} + +//----------Move Instructions-------------------------------------------------- +instruct castX2P(mRegP dst, mRegL src) %{ + match(Set dst (CastX2P src)); + format %{ "castX2P $dst, $src @ castX2P" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_cost(10); + ins_pipe( ialu_regI_mov ); +%} + +instruct castP2X(mRegL dst, mRegP src ) %{ + match(Set dst (CastP2X src)); + + format %{ "mov $dst, $src\t #@castP2X" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_pipe( ialu_regI_mov ); +%} + +instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ mfc1(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ mtc1(src, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ dmfc1(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + Register src = as_Register($src$$reg); + + __ dmtc1(src, dst); + %} + ins_pipe( pipe_slow ); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +//FIXME +instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +// Manifest a CmpL result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(1000); + format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} + ins_encode %{ + Register opr1 = as_Register($src1$$reg); + Register opr2 = as_Register($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ slt(AT, opr1, opr2); + __ slt(dst, opr2, opr1); + __ subu(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +// +// less_rsult = -1 +// greater_result = 1 +// equal_result = 0 +// nan_result = -1 +// +instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ + match(Set dst (CmpF3 src1 src2)); + ins_cost(1000); + format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ ori(dst, R0, 1); + __ ori(AT, R0, 1); + __ c_olt_s(src2, src1); + __ movf(dst, R0); + __ c_ult_s(src1, src2); + __ movf(AT, R0); + __ subu(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ + match(Set dst (CmpD3 src1 src2)); + ins_cost(1000); + format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ ori(dst, R0, 1); + __ ori(AT, R0, 1); + __ c_olt_d(src2, src1); + __ movf(dst, R0); + __ c_ult_d(src1, src2); + __ movf(AT, R0); + __ subu(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + Register num = $cnt$$Register; + Label Loop, done; + + __ beq(num, R0, done); + __ delayed()->daddu(AT, base, R0); + + __ move(T9, num); /* T9 = words */ + + __ bind(Loop); + __ sd(R0, AT, 0); + __ daddiu(T9, T9, -1); + __ bne(T9, R0, Loop); + __ delayed()->daddiu(AT, AT, wordSize); + + __ bind(done); + %} + ins_pipe( pipe_slow ); +%} + +instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::LL); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::UU); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::LU); + %} + + ins_pipe( pipe_slow ); +%} + +instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + StrIntrinsicNode::UL); + %} + + ins_pipe( pipe_slow ); +%} + +// intrinsic optimization +instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); + + format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} + ins_encode %{ + __ arrays_equals($str1$$Register, $str2$$Register, + $cnt$$Register, $temp$$Register, $result$$Register, + false/* byte */); + %} + + ins_pipe( pipe_slow ); +%} + +//----------Arithmetic Instructions------------------------------------------- +//----------Addition Instructions--------------------------------------------- +instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ addu32(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_imm" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + if(Assembler::is_simm16(imm)) { + __ addiu32(dst, src1, imm); + } else { + __ move(AT, imm); + __ addu32(dst, src1, AT); + } + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ daddu(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ + match(Set dst (AddP src1 (ConvI2L src2))); + + format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ daddu(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_imm(mRegP dst, mRegP src1, immL16 src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm" %} + ins_encode %{ + Register src1 = $src1$$Register; + long src2 = $src2$$constant; + Register dst = $dst$$Register; + + __ daddiu(dst, src1, src2); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +// Add Long Register with Register +instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) +%{ + match(Set dst (AddL src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ daddiu(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) +%{ + match(Set dst (AddL (ConvI2L src1) src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ daddiu(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ + match(Set dst (AddL (ConvI2L src1) src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ + match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (AddL src1 (ConvI2L src2))); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +//----------Abs Instructions------------------------------------------- + +// Integer Absolute Instructions +instruct absI_rReg(mRegI dst, mRegI src) +%{ + match(Set dst (AbsI src)); + effect(TEMP dst); + format %{ "AbsI $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ sra(AT, src, 31); + __ xorr(dst, src, AT); + __ subu32(dst, dst, AT); + %} + + ins_pipe(ialu_regI_regI); +%} + +// Long Absolute Instructions +instruct absL_rReg(mRegL dst, mRegL src) +%{ + match(Set dst (AbsL src)); + effect(TEMP dst); + format %{ "AbsL $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dsra32(AT, src, 31); + __ xorr(dst, src, AT); + __ subu(dst, dst, AT); + %} + + ins_pipe(ialu_regL_regL); +%} + +//----------Subtraction Instructions------------------------------------------- +// Integer Subtraction Instructions +instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(100); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ subu32(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addiu32(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ + match(Set dst (SubI zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negI_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subu32(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ + match(Set dst (SubL zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negL_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subu(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ daddiu(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Subtract Long Register with Register. +instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (SubL src1 (ConvI2L src2))); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ + match(Set dst (SubL (ConvI2L src1) src2)); + ins_cost(200); + format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ + match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); + ins_cost(200); + format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Integer MOD with Register +instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(300); + format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + //if (UseLEXT1) { + if (0) { + // Experiments show that gsmod is slower that div+mfhi. + // So I just disable it here. + __ gsmod(dst, src1, src2); + } else { + __ div(src1, src2); + __ mfhi(dst); + } + %} + + //ins_pipe( ialu_mod ); + ins_pipe( ialu_regI_regI ); +%} + +instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (ModL src1 src2)); + format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmod(dst, op1, op2); + } else { + __ ddiv(op1, op2); + __ mfhi(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(300); + format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mul(dst, src1, src2); + %} + ins_pipe( ialu_mult ); +%} + +instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ + match(Set dst (AddI (MulI src1 src2) src3)); + + ins_cost(999); + format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register src3 = $src3$$Register; + Register dst = $dst$$Register; + + __ mtlo(src3); + __ madd(src1, src2); + __ mflo(dst); + %} + ins_pipe( ialu_mult ); +%} + +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (DivI src1 src2)); + + ins_cost(300); + format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + // In MIPS, div does not cause exception. + // We must trap an exception manually. + __ teq(R0, src2, 0x7); + + if (UseLEXT1) { + __ gsdiv(dst, src1, src2); + } else { + __ div(src1, src2); + + __ nop(); + __ nop(); + __ mflo(dst); + } + %} + ins_pipe( ialu_mod ); +%} + +instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(300); + format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + /* Here do we need to trap an exception manually ? */ + __ div_s(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(300); + format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + /* Here do we need to trap an exception manually ? */ + __ div_d(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (MulL src1 src2)); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmult(dst, op1, op2); + } else { + __ dmult(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (MulL src1 (ConvI2L src2))); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmult(dst, op1, op2); + } else { + __ dmult(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (DivL src1 src2)); + format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsddiv(dst, op1, op2); + } else { + __ ddiv(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ add_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sub_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ add_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sub_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + format %{ "negF $dst, $src @negF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ neg_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + format %{ "negD $dst, $src @negD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ neg_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ mul_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Mul two double precision floating piont number +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ mul_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + ins_cost(100); + format %{ "absF $dst, $src @absF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ abs_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// intrinsics for math_native. +// AbsD SqrtD CosD SinD TanD LogD Log10D + +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + ins_cost(100); + format %{ "absD $dst, $src @absD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ abs_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + ins_cost(100); + format %{ "SqrtD $dst, $src @sqrtD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sqrt_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + ins_cost(100); + format %{ "SqrtF $dst, $src @sqrtF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sqrt_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +// src1 * src2 + src3 +instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary src1 src2))); + + format %{ "madd_s $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 + src3 +instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary src1 src2))); + + format %{ "madd_d $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 - src3 +instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary src1 src2))); + + format %{ "msub_s $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// src1 * src2 - src3 +instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary src1 src2))); + + format %{ "msub_d $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 - src3 +instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); + match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); + + format %{ "nmadds $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 - src3 +instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); + match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); + + format %{ "nmaddd $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 + src3 +instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary (NegF src1) src2))); + match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); + + format %{ "nmsubs $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} + +// -src1 * src2 + src3 +instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary (NegD src1) src2))); + match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); + + format %{ "nmsubd $dst, $src3, $src2, $src1" %} + + ins_encode %{ + __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + + ins_pipe(fpu_regF_regF); +%} +//----------------------------------Logical Instructions---------------------- +//__________________________________Integer Logical Instructions------------- + +//And Instuctions +// And Register with Immediate +instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ move(AT, val); + __ andr(dst, src, AT); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ + match(Set dst (AndI src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ + match(Set dst (AndI src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_int_mask($mask$$constant); + + __ ext(dst, src, 0, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ + match(Set dst (AndL src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_jlong_mask($mask$$constant); + + __ dext(dst, src, 0, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ + match(Set dst (XorI src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ + match(Set dst (XorI src1 M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ + match(Set dst (XorI (ConvL2I src1) M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* +instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ + match(Set dst (XorL src1 M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI mask (LoadB mem))); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_lmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadB mem) mask)); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_rmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ andr(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI src1 (XorI src2 M1))); + predicate(UseLEXT3); + + format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI src1 (XorI src2 M1))); + predicate(UseLEXT3); + + format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI (XorI src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI (XorI src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Long Register with Register +instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (AndL src1 (ConvI2L src2))); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* +instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (AndL src1 (XorL src2 M1))); + predicate(UseLEXT3); + + format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (OrL src1 (XorL src2 M1))); + predicate(UseLEXT3); + + format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (AndL (XorL src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (OrL (XorL src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ + match(Set dst (AndL dst M8)); + ins_cost(60); + + format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 0, 3); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ + match(Set dst (AndL dst M5)); + ins_cost(60); + + format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 2, 1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ + match(Set dst (AndL dst M7)); + ins_cost(60); + + format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 1, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ + match(Set dst (AndL dst M4)); + ins_cost(60); + + format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 0, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ + match(Set dst (AndL dst M121)); + ins_cost(60); + + format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 3, 4); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Or Long Register with Register +instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (OrL src1 src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ + match(Set dst (OrL (CastP2X src1) src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Long Register with Register +instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (XorL src1 src2)); + format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ xorr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ + match(Set dst (AndI (LShiftI src shift) mask)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ sll(dst, src, 16); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); + + format %{ "andi $dst, $src, 7\t# @land7_2_s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ andi(dst, src, 7); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. +// This idiom is used by the compiler the i2s bytecode. +instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); + + format %{ "i2s $dst, $src\t# @i2s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ seh(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. +// This idiom is used by the compiler for the i2b bytecode. +instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) +%{ + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); + + format %{ "i2b $dst, $src\t# @i2b" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ seb(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + + +instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shamt = $shift$$Register; + __ sllv(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + + +// Shift Left Long +instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + if (__ is_simm(shamt, 5)) + __ dsll(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsll(dst_reg, src_reg, sa); + } else { + __ dsll32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ + match(Set dst (LShiftL (ConvI2L src) shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + if (__ is_simm(shamt, 5)) + __ dsll(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsll(dst_reg, src_reg, sa); + } else { + __ dsll32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left Long +instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsllv(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long +instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = ($shift$$constant & 0x3f); + if (__ is_simm(shamt, 5)) + __ dsra(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsra(dst_reg, src_reg, sa); + } else { + __ dsra32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (RShiftL src shift))); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsra32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long arithmetically +instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsrav(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long logically +instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(100); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsrlv(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ + match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); + ins_cost(80); + format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dext(dst_reg, src_reg, shamt, 31); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (URShiftL src shift))); + predicate(n->in(1)->in(2)->get_int() > 32); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Instructions +// Xor Register with Register +instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ xorr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Instructions +instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} + ins_encode %{ + __ ori($dst$$Register, $src1$$Register, $src2$$constant); + %} + + ins_pipe( ialu_regI_regI ); +%} +// Or Register with Register +instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); + predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); + + format %{ "rotr $dst, $src, 1 ...\n\t" + "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int rshift = $rshift$$constant; + + __ rotr(dst, src, 1); + if (rshift - 1) { + __ srl(dst, dst, rshift - 1); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right by 8-bit immediate +instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (URShiftI src shift)); + //effect(KILL cr); + + format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + + __ srl(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ + match(Set dst (AndI (URShiftI src shift) mask)); + + format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int pos = $shift$$constant; + int size = Assembler::is_int_mask($mask$$constant); + + __ ext(dst, src, pos, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); + + ins_cost(100); + format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + int sa = $rshift$$constant; + + __ rotr(dst, dst, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr32(dst, src, sa - 32); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr32(dst, src, sa - 32); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right +instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (URShiftI src shift)); + + format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srlv(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + __ sra(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srav(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +//----------Convert Int to Boolean--------------------------------------------- + +instruct convI2B(mRegI dst, mRegI src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convI2B $dst, $src @ convI2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ daddiu(dst, R0, 1); + __ movz(dst, R0, src); + } else { + __ move(AT, src); + __ daddiu(dst, R0, 1); + __ movz(dst, R0, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct convI2L_reg( mRegL dst, mRegI src) %{ + match(Set dst (ConvI2L src)); + + ins_cost(100); + format %{ "SLL $dst, $src @ convI2L_reg\t" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if(dst != src) __ sll(dst, src, 0); + %} + ins_pipe( ialu_regL_regL ); +%} + + +instruct convL2I_reg( mRegI dst, mRegL src ) %{ + match(Set dst (ConvL2I src)); + + format %{ "MOV $dst, $src @ convL2I_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ sll(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ + match(Set dst (ConvI2L (ConvL2I src))); + + format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ sll(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2D_reg( regD dst, mRegL src ) %{ + match(Set dst (ConvL2D src)); + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ dmtc1(src, dst); + __ cvt_d_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convD2L_reg_fast( mRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(150); + format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + Label Done; + + __ trunc_l_d(F30, src); + // max_long: 0x7fffffffffffffff + // __ set64(AT, 0x7fffffffffffffff); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(dst, F30); + + __ bne(dst, AT, Done); + __ delayed()->mtc1(R0, F30); + + __ cvt_d_w(F30, F30); + __ c_ult_d(src, F30); + __ bc1f(Done); + __ delayed()->daddiu(T9, R0, -1); + + __ c_un_d(src, src); //NaN? + __ subu(dst, T9, AT); + __ movt(dst, R0); + + __ bind(Done); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convD2L_reg_slow( mRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(250); + format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + Label L; + + __ c_un_d(src, src); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dst, R0); + + __ trunc_l_d(F30, src); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->dmfc1(dst, F30); + + __ mov_d(F12, src); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); + __ move(dst, V0); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2I_reg_fast( mRegI dst, regF src ) %{ + match(Set dst (ConvF2I src)); + ins_cost(150); + format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ trunc_w_s(F30, fval); + __ move(AT, 0x7fffffff); + __ mfc1(dreg, F30); + __ c_un_s(fval, fval); //NaN? + __ movt(dreg, R0); + + __ bne(AT, dreg, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, fval); + __ andr(AT, AT, T9); + + __ movn(dreg, T9, AT); + + __ bind(L); + + %} + + ins_pipe( pipe_slow ); +%} + + + +instruct convF2I_reg_slow( mRegI dst, regF src ) %{ + match(Set dst (ConvF2I src)); + ins_cost(250); + format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ c_un_s(fval, fval); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dreg, R0); + + __ trunc_w_s(F30, fval); + + /* Call SharedRuntime:f2i() to do valid convention */ + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->mfc1(dreg, F30); + + __ mov_s(F12, fval); + + //This bug was found when running ezDS's control-panel. + // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 + // + // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. + // V0 is corrupted during call_VM_leaf(), and should be preserved. + // + __ push(fval); + if(dreg != V0) { + __ push(V0); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); + if(dreg != V0) { + __ move(dreg, V0); + __ pop(V0); + } + __ pop(fval); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_fast( mRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(150); + format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ trunc_l_s(F30, fval); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(dreg, F30); + __ c_un_s(fval, fval); //NaN? + __ movt(dreg, R0); + + __ bne(AT, dreg, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, fval); + __ andr(AT, AT, T9); + + __ dsll32(T9, T9, 0); + __ movn(dreg, T9, AT); + + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_slow( mRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(250); + format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ c_un_s(fval, fval); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dst, R0); + + __ trunc_l_s(F30, fval); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->dmfc1(dst, F30); + + __ mov_s(F12, fval); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); + __ move(dst, V0); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convL2F_reg( regF dst, mRegL src ) %{ + match(Set dst (ConvL2F src)); + format %{ "convl2f $dst, $src @ convL2F_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + Label L; + + __ dmtc1(src, dst); + __ cvt_s_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convI2F_reg( regF dst, mRegI src ) %{ + match(Set dst (ConvI2F src)); + format %{ "convi2f $dst, $src @ convI2F_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + + __ mtc1(src, dst); + __ cvt_s_w(dst, dst); + %} + + ins_pipe( fpu_regF_regF ); +%} + +instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ + match(Set dst (CmpLTMask p zero)); + ins_cost(100); + + format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} + ins_encode %{ + Register src = $p$$Register; + Register dst = $dst$$Register; + + __ sra(dst, src, 31); + %} + ins_pipe( pipe_slow ); +%} + + +instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ + match(Set dst (CmpLTMask p q)); + ins_cost(400); + + format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} + ins_encode %{ + Register p = $p$$Register; + Register q = $q$$Register; + Register dst = $dst$$Register; + + __ slt(dst, p, q); + __ subu(dst, R0, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct convP2B(mRegI dst, mRegP src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convP2B $dst, $src @ convP2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ daddiu(dst, R0, 1); + __ movz(dst, R0, src); + } else { + __ move(AT, src); + __ daddiu(dst, R0, 1); + __ movz(dst, R0, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + + +instruct convI2D_reg_reg(regD dst, mRegI src) %{ + match(Set dst (ConvI2D src)); + format %{ "conI2D $dst, $src @convI2D_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ mtc1(src, dst); + __ cvt_d_w(dst, dst); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convF2D_reg_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ cvt_d_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convD2F_reg_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ cvt_s_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// Convert a double to an int. If the double is a NAN, stuff a zero in instead. +instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(150); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + + Label Done; + + __ trunc_w_d(F30, src); + // max_int: 2147483647 + __ move(AT, 0x7fffffff); + __ mfc1(dst, F30); + + __ bne(dst, AT, Done); + __ delayed()->mtc1(R0, F30); + + __ cvt_d_w(F30, F30); + __ c_ult_d(src, F30); + __ bc1f(Done); + __ delayed()->addiu(T9, R0, -1); + + __ c_un_d(src, src); //NaN? + __ subu32(dst, T9, AT); + __ movt(dst, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + + +instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(250); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + Label L; + + __ trunc_w_d(F30, src); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->mfc1(dst, F30); + + __ mov_d(F12, src); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); + __ move(dst, V0); + __ bind(L); + + %} + ins_pipe( pipe_slow ); +%} + +// Convert oop pointer into compressed form +instruct encodeHeapOop(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop $dst,$src" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ encode_heap_oop(dst, src); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + + __ decode_heap_oop(d, s); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_heap_oop_not_null(d, s); + } else { + __ decode_heap_oop_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_heap_klass_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_klass_not_null(d, s); + } else { + __ decode_klass_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +//FIXME +instruct tlsLoadP(mRegP dst) %{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + format %{ " get_thread in $dst #@tlsLoadP" %} + ins_encode %{ + Register dst = $dst$$Register; +#ifdef OPT_THREAD + __ move(dst, TREG); +#else + __ get_thread(dst); +#endif + %} + + ins_pipe( ialu_loadI ); +%} + + +instruct checkCastPP( mRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} + ins_encode( /*empty encoding*/ ); + ins_pipe( empty ); +%} + +instruct castPP(mRegP dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(empty); +%} + +instruct castII( mRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "#castII of $dst empty encoding" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe( empty ); +%} + +// Return Instruction +// Remove the return address & jump to it. +instruct Ret() %{ + match(Return); + format %{ "RET #@Ret" %} + + ins_encode %{ + __ jr(RA); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); +%} + +/* +// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. +instruct jumpXtnd(mRegL switch_val) %{ + match(Jump switch_val); + + ins_cost(350); + + format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" + "jr T9\n\t" + "nop" %} + ins_encode %{ + Register table_base = $constanttablebase; + int con_offset = $constantoffset; + Register switch_reg = $switch_val$$Register; + + if (UseLEXT1) { + if (Assembler::is_simm(con_offset, 8)) { + __ gsldx(T9, table_base, switch_reg, con_offset); + } else if (Assembler::is_simm16(con_offset)) { + __ daddu(T9, table_base, switch_reg); + __ ld(T9, T9, con_offset); + } else { + __ move(T9, con_offset); + __ daddu(AT, table_base, switch_reg); + __ gsldx(T9, AT, T9, 0); + } + } else { + if (Assembler::is_simm16(con_offset)) { + __ daddu(T9, table_base, switch_reg); + __ ld(T9, T9, con_offset); + } else { + __ move(T9, con_offset); + __ daddu(AT, table_base, switch_reg); + __ daddu(AT, T9, AT); + __ ld(T9, AT, 0); + } + } + + __ jr(T9); + __ delayed()->nop(); + + %} + ins_pipe(pipe_jump); +%} +*/ + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +//FIXME +instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(200); + format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} + ins_encode %{ + Register target = $jump_target$$Register; + + // V0, V1 are indicated in: + // [stubGenerator_mips.cpp] generate_forward_exception() + // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() + // + Register oop = $ex_oop$$Register; + Register exception_oop = V0; + Register exception_pc = V1; + + __ move(exception_pc, RA); + __ move(exception_oop, oop); + + __ jr(target); + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Procedure Call/Return Instructions +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,static #@CallStaticJavaDirect " %} + ins_encode( Java_Static_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(300); + format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" + "CallDynamic @ CallDynamicJavaDirect" %} + ins_encode( Java_Dynamic_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF_NOFP,runtime " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Prefetch instructions for allocation. + +instruct prefetchAllocNTA( memory mem ) %{ + match(PrefetchAllocation mem); + ins_cost(125); + format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(pipe_slow); +%} + + +// Call runtime without safepoint +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Load Char (16bit unsigned) +instruct loadUS(mRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadC" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Char (16bit unsigned) +instruct storeC(memory mem, mRegI src) %{ + match(Set mem (StoreC mem src)); + + ins_cost(125); + format %{ "storeC $src, $mem @ storeC" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeC_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreC mem zero)); + + ins_cost(125); + format %{ "storeC $zero, $mem @ storeC_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct loadConF_immF_0(regF dst, immF_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConF_immF_0\n"%} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + + __ mtc1(R0, dst); + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConF(regF dst, immF src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm16(con_offset)) { + __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ set64(AT, con_offset); + if (UseLEXT1) { + __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); + } else { + __ daddu(AT, $constanttablebase, AT); + __ lwc1($dst$$FloatRegister, AT, 0); + } + } + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConD_immD_0(regD dst, immD_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConD_immD_0"%} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ dmtc1(R0, dst); + %} + ins_pipe( fpu_loadF ); +%} + +instruct loadConD(regD dst, immD src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm16(con_offset)) { + __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ set64(AT, con_offset); + if (UseLEXT1) { + __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); + } else { + __ daddu(AT, $constanttablebase, AT); + __ ldc1($dst$$FloatRegister, AT, 0); + } + } + %} + ins_pipe( fpu_loadF ); +%} + +// Store register Float value (it is faster than store from FPU register) +instruct storeF_reg( memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeF_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeF_immF_0( memory mem, immF_0 zero) %{ + match(Set mem (StoreF mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + + ins_cost(150); + format %{ "loadD $dst, $mem #@loadD" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(250); + // FIXME: Need more effective ldl/ldr + format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeD_reg( memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeD_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeD_immD_0( memory mem, immD_0 zero) %{ + match(Set mem (StoreD mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +instruct loadSSI(mRegI dst, stackSlotI src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "lw $dst, $src\t# int stk @ loadSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); + __ lw($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSI(stackSlotI dst, mRegI src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sw $dst, $src\t# int stk @ storeSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); + __ sw($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSL(mRegL dst, stackSlotL src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld $dst, $src\t# long stk @ loadSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); + __ ld($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSL(stackSlotL dst, mRegL src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# long stk @ storeSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); + __ sd($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSP(mRegP dst, stackSlotP src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); + __ ld($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSP(stackSlotP dst, mRegP src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); + __ sd($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSF(regF dst, stackSlotF src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); + __ lwc1($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSF(stackSlotF dst, regF src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); + __ swc1($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +// Use the same format since predicate() can not be used here. +instruct loadSSD(regD dst, stackSlotD src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); + __ ldc1($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSD(stackSlotD dst, regD src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); + __ sdc1($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +// Store CMS card-mark Immediate 0 +instruct storeImmCM(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + + ins_cost(150); + format %{ "MEMBAR\n\t" + "sb $mem, zero\t! CMS card-mark imm0" %} + ins_encode %{ + __ sync(); + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(300); + + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ + // Here we should emit illtrap ! + + __ stop("in ShoudNotReachHere"); + + %} + ins_pipe( pipe_jump ); +%} + +instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + + __ daddiu(dst, base, disp); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + Register index = as_Register($mem$$index); + int scale = $mem$$scale; + int disp = $mem$$disp; + + if (scale == 0) { + __ daddu(AT, base, index); + __ daddiu(dst, AT, disp); + } else { + __ dsll(AT, index, scale); + __ daddu(AT, base, AT); + __ daddiu(dst, AT, disp); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPIdxScale(mRegP dst, indIndexScale mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + Register index = as_Register($mem$$index); + int scale = $mem$$scale; + + if (scale == 0) { + __ daddu(dst, base, index); + } else { + __ dsll(AT, index, scale); + __ daddu(dst, base, AT); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// NZ for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL tmp); + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} + + ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); + ins_pipe( pipe_slow ); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. + +instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + format %{ "move AT, $newval\n\t" + "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" + "move $cr, AT\n" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); + + int index = $heap_top_ptr$$index; + int scale = $heap_top_ptr$$scale; + int disp = $heap_top_ptr$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if (index != 0) { + __ stop("in storePConditional: index != 0"); + } else { + __ move(AT, newval); + __ scd(AT, addr); + __ move($cr$$Register, AT); + } + %} + ins_pipe(long_memory_op); +%} + +// Conditional-store of an int value. +// AT flag is set on success, reset otherwise. +instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} + + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg32(addr, oldval, newval, cr, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(cr, AT); + } + } +%} + + ins_pipe(long_memory_op); +%} + +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + + format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg(addr, oldval, newval, cr, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(cr, AT); + } + } + %} + ins_pipe(long_memory_op); +%} + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +instruct loadPLocked(mRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "lld $dst, $mem #@loadPLocked\n\t" %} + size(12); + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, false, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, false, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +//----------Max and Min-------------------------------------------------------- +// Min Instructions +//// +// *** Min and Max using the conditional move are slower than the +// *** branch version on a Pentium III. +// // Conditional move for min +//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +// effect( USE_DEF op2, USE op1, USE cr ); +// format %{ "CMOVlt $op2,$op1\t! min" %} +// opcode(0x4C,0x0F); +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); +// ins_pipe( pipe_cmov_reg ); +//%} +// +//// Min Register with Register (P6 version) +//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ +// predicate(VM_Version::supports_cmov() ); +// match(Set op2 (MinI op1 op2)); +// ins_cost(200); +// expand %{ +// eFlagsReg cr; +// compI_eReg(cr,op1,op2); +// cmovI_reg_lt(op2,op1,cr); +// %} +//%} + +// Min Register with Register (generic version) +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MinI dst src)); + //effect(KILL flags); + ins_cost(80); + + format %{ "MIN $dst, $src @minI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, src, dst); + __ movn(dst, src, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +// Max Register with Register +// *** Min and Max using the conditional move are slower than the +// *** branch version on a Pentium III. +// // Conditional move for max +//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +// effect( USE_DEF op2, USE op1, USE cr ); +// format %{ "CMOVgt $op2,$op1\t! max" %} +// opcode(0x4F,0x0F); +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); +// ins_pipe( pipe_cmov_reg ); +//%} +// +// // Max Register with Register (P6 version) +//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ +// predicate(VM_Version::supports_cmov() ); +// match(Set op2 (MaxI op1 op2)); +// ins_cost(200); +// expand %{ +// eFlagsReg cr; +// compI_eReg(cr,op1,op2); +// cmovI_reg_gt(op2,op1,cr); +// %} +//%} + +// Max Register with Register (generic version) +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MaxI dst src)); + ins_cost(80); + + format %{ "MAX $dst, $src @maxI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, dst, src); + __ movn(dst, src, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ + match(Set dst (MaxI dst zero)); + ins_cost(50); + + format %{ "MAX $dst, 0 @maxI_Reg_zero" %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ slt(AT, dst, R0); + __ movn(dst, R0, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL src mask)); + + format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) +%{ + match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); + + format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + if (src1 == dst) { + __ dinsu(dst, src2, 32, 32); + } else if (src2 == dst) { + __ dsll32(dst, dst, 0); + __ dins(dst, src1, 0, 32); + } else { + __ dext(dst, src1, 0, 32); + __ dinsu(dst, src2, 32, 32); + } + %} + ins_pipe(ialu_regI_regI); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +// Match loading integer and casting it to unsigned int in long register. +// LoadI + ConvI2L + AndL 0xffffffff. +instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL mask (ConvI2L (LoadI mem)))); + + format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + + +// ============================================================================ +// Safepoint Instruction + +instruct safePoint_poll() %{ + predicate(SafepointMechanism::uses_global_page_poll()); + match(SafePoint); + + ins_cost(105); + format %{ "poll for GC @ safePoint_poll" %} + + ins_encode %{ + __ block_comment("Safepoint:"); + __ set64(T9, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_type); + __ lw(AT, T9, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +instruct safePoint_poll_tls(mRegP poll) %{ + match(SafePoint poll); + predicate(SafepointMechanism::uses_thread_local_poll()); + effect(USE poll); + + ins_cost(125); + format %{ "lw AT, [$poll]\t" + "Safepoint @ [$poll] : poll for GC" %} + size(4); + ins_encode %{ + Register poll_reg = $poll$$Register; + + __ block_comment("Safepoint:"); + __ relocate(relocInfo::poll_type); + address pre_pc = __ pc(); + __ lw(AT, poll_reg, 0); + assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); + %} + + ins_pipe( ialu_storeI ); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- + +instruct roundFloat_nop(regF dst) +%{ + match(Set dst (RoundFloat dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +instruct roundDouble_nop(regD dst) +%{ + match(Set dst (RoundDouble dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +//---------- Zeros Count Instructions ------------------------------------------ +// CountLeadingZerosINode CountTrailingZerosINode +instruct countLeadingZerosI(mRegI dst, mRegI src) %{ + predicate(UseCountLeadingZerosInstructionMIPS64); + match(Set dst (CountLeadingZerosI src)); + + format %{ "clz $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countLeadingZerosL(mRegI dst, mRegL src) %{ + predicate(UseCountLeadingZerosInstructionMIPS64); + match(Set dst (CountLeadingZerosL src)); + + format %{ "dclz $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ dclz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosI(mRegI dst, mRegI src) %{ + predicate(UseCountTrailingZerosInstructionMIPS64); + match(Set dst (CountTrailingZerosI src)); + + format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + // ctz and dctz is gs instructions. + __ ctz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosL(mRegI dst, mRegL src) %{ + predicate(UseCountTrailingZerosInstructionMIPS64); + match(Set dst (CountTrailingZerosL src)); + + format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ dctz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vectors (8 bytes long) +instruct loadV8(vecD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "load $dst, $mem\t! load vector (8 bytes)" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( fpu_loadF ); +%} + +// Store vectors (8 bytes long) +instruct storeV8(memory mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "store $mem, $src\t! store vector (8 bytes)" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); + %} + ins_pipe( fpu_storeF ); +%} + +instruct Repl8B_DSP(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8 && UseLEXT3); + match(Set dst (ReplicateB src)); + ins_cost(100); + format %{ "replv_ob AT, $src\n\t" + "dmtc1 AT, $dst\t! replicate8B" %} + ins_encode %{ + __ replv_ob(AT, $src$$Register); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(140); + format %{ "move AT, $src\n\t" + "dins AT, AT, 8, 8\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate8B" %} + ins_encode %{ + __ move(AT, $src$$Register); + __ dins(AT, AT, 8, 8); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_imm_DSP(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp()); + match(Set dst (ReplicateB con)); + ins_cost(110); + format %{ "repl_ob AT, [$con]\n\t" + "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} + ins_encode %{ + int val = $con$$constant; + __ repl_ob(AT, val); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(150); + format %{ "move AT, [$con]\n\t" + "dins AT, AT, 8, 8\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} + ins_encode %{ + __ move(AT, $con$$constant); + __ dins(AT, AT, 8, 8); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB zero)); + ins_cost(90); + format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB M1)); + ins_cost(80); + format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_DSP(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); + match(Set dst (ReplicateS src)); + ins_cost(100); + format %{ "replv_qh AT, $src\n\t" + "dmtc1 AT, $dst\t! replicate4S" %} + ins_encode %{ + __ replv_qh(AT, $src$$Register); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(120); + format %{ "move AT, $src \n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate4S" %} + ins_encode %{ + __ move(AT, $src$$Register); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_imm_DSP(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); + match(Set dst (ReplicateS con)); + ins_cost(100); + format %{ "repl_qh AT, [$con]\n\t" + "dmtc1 AT, $dst\t! replicate4S($con)" %} + ins_encode %{ + int val = $con$$constant; + if ( Assembler::is_simm(val, 10)) { + //repl_qh supports 10 bits immediate + __ repl_qh(AT, val); + } else { + __ li32(AT, val); + __ replv_qh(AT, AT); + } + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(110); + format %{ "move AT, [$con]\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate4S($con)" %} + ins_encode %{ + __ move(AT, $con$$constant); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS zero)); + format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS M1)); + format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar to be vector +instruct Repl2I(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + format %{ "dins AT, $src, 0, 32\n\t" + "dinsu AT, $src, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate2I" %} + ins_encode %{ + __ dins(AT, $src$$Register, 0, 32); + __ dinsu(AT, $src$$Register, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. +instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + effect(KILL tmp); + format %{ "li32 AT, [$con], 32\n\t" + "dinsu AT, AT\n\t" + "dmtc1 AT, $dst\t! replicate2I($con)" %} + ins_encode %{ + int val = $con$$constant; + __ li32(AT, val); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar zero to be vector +instruct Repl2I_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar -1 to be vector +instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI M1)); + format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate float (4 byte) scalar to be vector +instruct Repl2F(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} + ins_encode %{ + __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate float (4 byte) scalar zero to be vector +instruct Repl2F_zero(vecD dst, immF_0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF zero)); + format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +// Floats vector add +// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. +instruct vadd2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF dst src)); + format %{ "add.ps $dst,$src\t! add packed2F" %} + ins_encode %{ + __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} + ins_encode %{ + __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- SUB -------------------------------------- + +// Floats vector sub +instruct vsub2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF dst src)); + format %{ "sub.ps $dst,$src\t! sub packed2F" %} + ins_encode %{ + __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- MUL -------------------------------------- + +// Floats vector mul +instruct vmul2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF dst src)); + format %{ "mul.ps $dst, $src\t! mul packed2F" %} + ins_encode %{ + __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} + ins_encode %{ + __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- DIV -------------------------------------- +// MIPS do not have div.ps + +// --------------------------------- MADD -------------------------------------- +// Floats vector madd +//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ +// predicate(n->as_Vector()->length() == 2); +// match(Set dst (AddVF (MulVF src1 src2) src3)); +// ins_cost(50); +// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} +// ins_encode %{ +// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); +// %} +// ins_pipe( fpu_regF_regF ); +//%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceeding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch ( incI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( decI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addI_eReg_imm movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addP_eReg_imm movP ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +//peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +//%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp new file mode 100644 index 00000000000..96a147eaa54 --- /dev/null +++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp @@ -0,0 +1,1821 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "compiler/disassembler.hpp" +#include "code/codeCache.hpp" +#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_mips.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" + +#include + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +void NativeInstruction::wrote(int offset) { + ICache::invalidate_word(addr_at(offset)); +} + +void NativeInstruction::set_long_at(int offset, long i) { + address addr = addr_at(offset); + *(long*)addr = i; + ICache::invalidate_range(addr, 8); +} + +static int illegal_instruction_bits = 0; + +int NativeInstruction::illegal_instruction() { + if (illegal_instruction_bits == 0) { + ResourceMark rm; + char buf[40]; + CodeBuffer cbuf((address)&buf[0], 20); + MacroAssembler* a = new MacroAssembler(&cbuf); + address ia = a->pc(); + a->brk(11); + int bits = *(int*)ia; + illegal_instruction_bits = bits; + } + return illegal_instruction_bits; +} + +bool NativeInstruction::is_int_branch() { + switch(Assembler::opcode(insn_word())) { + case Assembler::beq_op: + case Assembler::beql_op: + case Assembler::bgtz_op: + case Assembler::bgtzl_op: + case Assembler::blez_op: + case Assembler::blezl_op: + case Assembler::bne_op: + case Assembler::bnel_op: + return true; + case Assembler::regimm_op: + switch(Assembler::rt(insn_word())) { + case Assembler::bgez_op: + case Assembler::bgezal_op: + case Assembler::bgezall_op: + case Assembler::bgezl_op: + case Assembler::bltz_op: + case Assembler::bltzal_op: + case Assembler::bltzall_op: + case Assembler::bltzl_op: + return true; + } + } + + return false; +} + +bool NativeInstruction::is_float_branch() { + if (!is_op(Assembler::cop1_op) || + !is_rs((Register)Assembler::bc1f_op)) return false; + + switch(Assembler::rt(insn_word())) { + case Assembler::bcf_op: + case Assembler::bcfl_op: + case Assembler::bct_op: + case Assembler::bctl_op: + return true; + } + + return false; +} + + +void NativeCall::verify() { + // make sure code pattern is actually a call instruction + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return; + } + + // jal targe + // nop + if ( is_op(int_at(0), Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + // FIXME: why add jr_op here? + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //daddiu dst, R0, imm16 + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) + return; + + fatal("not a call"); +} + +address NativeCall::target_addr_for_insn() const { + // jal target + // nop + if ( is_op(int_at(0), Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop()) { + int instr_index = int_at(0) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( nativeInstruction_at(addr_at(0))->is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop()) { + int instr_index = int_at(16) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ld dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ld_op) ) { + + address dest = (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); + tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); + Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); + tty->print_cr("======= End of decoding ======="); + fatal("not a call"); + return NULL; // unreachable +} + +// Extract call destination from a NativeCall. The call might use a trampoline stub. +address NativeCall::destination() const { + address addr = (address)this; + address destination = target_addr_for_insn(); + // Do we use a trampoline stub for this call? + // Trampoline stubs are located behind the main code. + if (destination > addr) { + // Filter out recursive method invocation (call to verified/unverified entry point). + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + NativeInstruction* ni = nativeInstruction_at(addr); + if (nm->stub_contains(destination) && ni->is_trampoline_call()) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + } + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + address addr_call = addr_at(0); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + // Patch the constant in the call's trampoline stub. + if (MacroAssembler::reachable_from_cache()) { + set_destination(dest); + } else { + address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); + assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + } +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + if (code->is_nmethod()) { + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); + } + return NULL; +} + +// manual implementation of GSSQ +// +// 00000001200009c0 : +// 1200009c0: 0085202d daddu a0, a0, a1 +// 1200009c4: e8860027 gssq a2, a3, 0(a0) +// 1200009c8: 03e00008 jr ra +// 1200009cc: 00000000 nop +// +typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); + +static int *buf; + +static atomic_store128_ptr get_atomic_store128_func() { + assert(UseLEXT1, "UseLEXT1 must be true"); + static atomic_store128_ptr p = NULL; + if (p != NULL) + return p; + + buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + buf[0] = 0x0085202d; + buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ + buf[2] = 0x03e00008; + buf[3] = 0; + + asm("sync"); + p = (atomic_store128_ptr)buf; + return p; +} + +void NativeCall::patch_on_jal_only(address dst) { + long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint jal_inst = (Assembler::jal_op << 26) | dest; + set_int_at(0, jal_inst); + ICache::invalidate_range(addr_at(0), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_jal_gs(address dst) { + long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint jal_inst = (Assembler::jal_op << 26) | dest; + set_int_at(16, jal_inst); + ICache::invalidate_range(addr_at(16), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_jal(address dst) { + patch_on_jal_gs(dst); +} + +void NativeCall::patch_on_trampoline(address dest) { + assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); + jlong dst = (jlong) dest; + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ld dst, dst, imm16 + if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { + dst += (dst & 0x8000) << 1; + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); + + ICache::invalidate_range(addr_at(0), 24); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_jalr_gs(address dst) { + patch_set48_gs(dst); +} + +void NativeCall::patch_on_jalr(address dst) { + patch_set48(dst); +} + +void NativeCall::patch_set48_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + int count = 0; + int insts[4] = {0, 0, 0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + insts[count] = 0; + count++; + } + + guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); + atomic_store128_ptr func = get_atomic_store128_func(); + (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeCall::patch_set32_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + int insts[2] = {0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 2) { + //nop(); + //set_int_at(count << 2, 0); + insts[count] = 0; + count++; + } + + long inst = insts[1]; + inst = inst << 32; + inst = inst + insts[0]; + + set_long_at(0, inst); +} + +void NativeCall::patch_set48(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + //ori(d, R0, julong(value) >> 16); + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + //dsll(d, d, 16); + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + //lui(d, value >> 32); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + //ori(d, d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + //dsll(d, d, 16); + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + //nop(); + set_int_at(count << 2, 0); + count++; + } + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeCall::patch_set32(address dest) { + patch_set32_gs(dest); +} + +void NativeCall::set_destination(address dest) { + OrderAccess::fence(); + + // li64 + if (is_special_op(int_at(16), Assembler::dsll_op)) { + int first_word = int_at(0); + set_int_at(0, 0x1000ffff); /* .1: b .1 */ + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); + set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); + set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); + ICache::invalidate_range(addr_at(0), 24); + } else if (is_op(int_at(16), Assembler::jal_op)) { + if (UseLEXT1) { + patch_on_jal_gs(dest); + } else { + patch_on_jal(dest); + } + } else if (is_op(int_at(0), Assembler::jal_op)) { + patch_on_jal_only(dest); + } else if (is_special_op(int_at(16), Assembler::jalr_op)) { + if (UseLEXT1) { + patch_on_jalr_gs(dest); + } else { + patch_on_jalr(dest); + } + } else if (is_special_op(int_at(8), Assembler::jalr_op)) { + guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); + if (UseLEXT1) { + patch_set32_gs(dest); + } else { + patch_set32(dest); + } + ICache::invalidate_range(addr_at(0), 8); + } else { + fatal("not a call"); + } +} + +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { + NativeCall *call = nativeCall_at(code_pos); + CodeBuffer cb(call->addr_at(0), instruction_size); + MacroAssembler masm(&cb); +#define __ masm. + __ li48(T9, (long)entry); + __ jalr (); + __ delayed()->nop(); +#undef __ + + ICache::invalidate_range(call->addr_at(0), instruction_size); +} + +// MT-safe patching of a call instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +//------------------------------------------------------------------- + +void NativeMovConstReg::verify() { + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + return; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + fatal("not a mov reg, imm64/imm48"); +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +intptr_t NativeMovConstReg::data() const { + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + fatal("not a mov reg, imm64/imm48"); + return 0; // unreachable +} + +void NativeMovConstReg::patch_set48(intptr_t x) { + jlong value = (jlong) x; + int rt_reg = (int_at(0) & (0x1f << 16)); + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + set_int_at(count << 2, 0); + count++; + } +} + +void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { + // li64 or li48 + if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); + set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); + } else { + patch_set48(x); + } + + ICache::invalidate_range(addr_at(0), 24); + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); + nmethod* nm = blob->as_nmethod_or_null(); + if (nm != NULL) { + o = o ? o : x; + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(o); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)o; + break; + } + } + } +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::offset() const{ + if (is_immediate()) + return (short)(int_at(instruction_offset)&0xffff); + else + return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); +} + +void NativeMovRegMem::set_offset(int x) { + if (is_immediate()) { + assert(Assembler::is_simm16(x), "just check"); + set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); + if (is_64ldst()) { + assert(Assembler::is_simm16(x+4), "just check"); + set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); + } + } else { + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); + } + ICache::invalidate_range(addr_at(0), 8); +} + +void NativeMovRegMem::verify() { + int offset = 0; + + if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { + + if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { + fatal ("not a mov [reg+offs], reg instruction"); + } + + offset += 12; + } + + switch(Assembler::opcode(int_at(offset))) { + case Assembler::lb_op: + case Assembler::lbu_op: + case Assembler::lh_op: + case Assembler::lhu_op: + case Assembler::lw_op: + case Assembler::lwu_op: + case Assembler::ld_op: + case Assembler::lwc1_op: + case Assembler::ldc1_op: + case Assembler::sb_op: + case Assembler::sh_op: + case Assembler::sw_op: + case Assembler::sd_op: + case Assembler::swc1_op: + case Assembler::sdc1_op: + break; + default: + fatal ("not a mov [reg+offs], reg instruction"); + } +} + + +void NativeMovRegMem::print() { + tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == NativeIllegalInstruction::instruction_code; +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = instruction_code; + ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeJump::verify() { + assert(((NativeInstruction *)this)->is_jump() || + ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); +} + +void NativeJump::patch_set48_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + int insts[4] = {0, 0, 0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + insts[count] = 0; + count++; + } + + guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); + atomic_store128_ptr func = get_atomic_store128_func(); + (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_set48(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + set_int_at(count << 2, 0); + count++; + } + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_on_j_only(address dst) { + long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint j_inst = (Assembler::j_op << 26) | dest; + set_int_at(0, j_inst); + ICache::invalidate_range(addr_at(0), 4); + } else { + ShouldNotReachHere(); + } +} + + +void NativeJump::patch_on_j_gs(address dst) { + long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint j_inst = (Assembler::j_op << 26) | dest; + set_int_at(16, j_inst); + ICache::invalidate_range(addr_at(16), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeJump::patch_on_j(address dst) { + patch_on_j_gs(dst); +} + +void NativeJump::patch_on_jr_gs(address dst) { + patch_set48_gs(dst); + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_on_jr(address dst) { + patch_set48(dst); + ICache::invalidate_range(addr_at(0), 16); +} + + +void NativeJump::set_jump_destination(address dest) { + OrderAccess::fence(); + + if (is_short()) { + assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); + set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); + ICache::invalidate_range(addr_at(0), 4); + } else if (is_b_far()) { + int offset = dest - addr_at(12); + set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); + set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); + } else { + if (is_op(int_at(16), Assembler::j_op)) { + if (UseLEXT1) { + patch_on_j_gs(dest); + } else { + patch_on_j(dest); + } + } else if (is_op(int_at(0), Assembler::j_op)) { + patch_on_j_only(dest); + } else if (is_special_op(int_at(16), Assembler::jr_op)) { + if (UseLEXT1) { + //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); + //patch_on_jr_gs(dest); + patch_on_jr(dest); + } else { + patch_on_jr(dest); + } + } else { + fatal("not a jump"); + } + } +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler masm(&cb); +#define __ masm. + if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { + __ b(entry); + __ delayed()->nop(); + } else { + // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. + int offset = entry - code_pos; + + Label L; + __ bgezal(R0, L); + __ delayed()->lui(T9, (offset - 8) >> 16); + __ bind(L); + __ ori(T9, T9, (offset - 8) & 0xffff); + __ daddu(T9, T9, RA); + __ jr(T9); + __ delayed()->nop(); + } + +#undef __ + + ICache::invalidate_range(code_pos, instruction_size); +} + +bool NativeJump::is_b_far() { +// +// 0x000000556809f198: daddu at, ra, zero +// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 +// +// 0x000000556809f1a0: nop +// 0x000000556809f1a4: lui t9, 0xfffffffd +// 0x000000556809f1a8: ori t9, t9, 0x14dc +// 0x000000556809f1ac: daddu t9, t9, ra +// 0x000000556809f1b0: daddu ra, at, zero +// 0x000000556809f1b4: jr t9 +// 0x000000556809f1b8: nop +// ;; ImplicitNullCheckStub slow case +// 0x000000556809f1bc: lui t9, 0x55 +// + return is_op(int_at(12), Assembler::lui_op); +} + +address NativeJump::jump_destination() { + if ( is_short() ) { + return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; + } + // Assembler::merge() is not correct in MIPS_64! + // + // Example: + // hi16 = 0xfffd, + // lo16 = f7a4, + // + // offset=0xfffdf7a4 (Right) + // Assembler::merge = 0xfffcf7a4 (Wrong) + // + if ( is_b_far() ) { + int hi16 = int_at(12)&0xffff; + int low16 = int_at(16)&0xffff; + address target = addr_at(12) + (hi16 << 16) + low16; + return target; + } + + // nop + // nop + // nop + // nop + // j target + // nop + if ( nativeInstruction_at(addr_at(0))->is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::j_op) && + nativeInstruction_at(addr_at(20))->is_nop()) { + int instr_index = int_at(16) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // j target + // nop + if ( is_op(int_at(0), Assembler::j_op) && + nativeInstruction_at(addr_at(4))->is_nop()) { + int instr_index = int_at(0) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + fatal("not a jump"); + return NULL; // unreachable +} + +// MT-safe patching of a long jump instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); + assert((int)instruction_size == (int)NativeCall::instruction_size, + "note::Runtime1::patch_code uses NativeCall::instruction_size"); + + // ensure 100% atomicity + guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); + + int *p = (int *)instr_addr; + int jr_word = p[4]; + + p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ + memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); + *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); +} + +// Must ensure atomicity +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); + + if (MacroAssembler::reachable_from_cache(dest)) { + CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.j(dest); + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie + NativeIllegalInstruction::insert(verified_entry); + } + + ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); +} + +bool NativeInstruction::is_jump() +{ + if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) + return true; + if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far + return true; + if (is_op(int_at(12), Assembler::lui_op)) // original b_far + return true; + + // nop + // nop + // nop + // nop + // j target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return true; + } + + // lui rd, imm(63...48); + // ori rd, rd, imm(47...32); + // dsll rd, rd, 16; + // ori rd, rd, imm(31...16); + // dsll rd, rd, 16; + // ori rd, rd, imm(15...0); + // jr rd + // nop + if (is_op(int_at(0), Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if (is_op(int_at(0), Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + return false; +} + +bool NativeInstruction::is_dtrace_trap() { + //return (*(int32_t*)this & 0xff) == 0xcc; + Unimplemented(); + return false; +} + +bool NativeInstruction::is_safepoint_poll() { + // + // 390 li T2, 0x0000000000400000 #@loadConP + // 394 sw [SP + #12], V1 # spill 9 + // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 + // + // 0x000000ffe5815130: lui t2, 0x40 + // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} + // ;*goto + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + + // Since there may be some spill instructions between the safePoint_poll and loadConP, + // we check the safepoint instruction like the this. + return is_op(Assembler::lw_op) && is_rt(AT); +} diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp new file mode 100644 index 00000000000..fb4f99c9c6b --- /dev/null +++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp @@ -0,0 +1,734 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP +#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "runtime/safepointMechanism.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeJump +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativeReturn +// - - NativeReturnX (return with argument) +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction { + friend class Relocation; + + public: + enum mips_specific_constants { + nop_instruction_code = 0, + nop_instruction_size = 4, + sync_instruction_code = 0xf + }; + + bool is_nop() { return long_at(0) == nop_instruction_code; } + bool is_sync() { return long_at(0) == sync_instruction_code; } + bool is_dtrace_trap(); + inline bool is_call(); + inline bool is_illegal(); + inline bool is_return(); + bool is_jump(); + inline bool is_cond_jump(); + bool is_safepoint_poll(); + + //mips has no instruction to generate a illegal instrucion exception + //we define ours: break 11 + static int illegal_instruction(); + + bool is_int_branch(); + bool is_float_branch(); + + inline bool is_trampoline_call(); + + //We use an illegal instruction for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(BytesPerInstWord); } + address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + int long_at(int offset) const { return *(jint*)addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + void set_long_at(int offset, long i); + + int insn_word() const { return long_at(0); } + static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } + bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } + bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } + bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } + bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } + bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } + + static bool is_special_op (int insn, Assembler::special_ops op) { + return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; + } + bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } + + void wrote(int offset); + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + //inst->verify(); +#endif + return inst; +} + +inline NativeCall* nativeCall_at(address address); +// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 +// instructions (used to manipulate inline caches, primitive & dll calls, etc.). +// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: +// 32 bits: +// lui rt, imm16 +// addiu rt, rt, imm16 +// jalr rt +// nop +// +// 64 bits: +// lui rd, imm(63...48); +// ori rd, rd, imm(47...32); +// dsll rd, rd, 16; +// ori rd, rd, imm(31...16); +// dsll rd, rd, 16; +// ori rd, rd, imm(15...0); +// jalr rd +// nop +// + +// we just consider the above for instruction as one call instruction +class NativeCall: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + instruction_size = 6 * BytesPerInstWord, + return_address_offset_short = 4 * BytesPerInstWord, + return_address_offset_long = 6 * BytesPerInstWord, + displacement_offset = 0 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + + address next_instruction_address() const { + if (is_special_op(int_at(8), Assembler::jalr_op)) { + return addr_at(return_address_offset_short); + } else { + return addr_at(return_address_offset_long); + } + } + + address return_address() const { + return next_instruction_address(); + } + + address target_addr_for_insn() const; + address destination() const; + void set_destination(address dest); + + void patch_set48_gs(address dest); + void patch_set48(address dest); + + void patch_on_jalr_gs(address dest); + void patch_on_jalr(address dest); + + void patch_on_jal_gs(address dest); + void patch_on_jal(address dest); + + void patch_on_trampoline(address dest); + + void patch_on_jal_only(address dest); + + void patch_set32_gs(address dest); + void patch_set32(address dest); + + void verify_alignment() { } + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); + } + + static bool is_call_to(address instr, address target) { + return nativeInstruction_at(instr)->is_call() && +nativeCall_at(instr)->destination() == target; + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate jal + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = NULL; + if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { + call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); + } else { + call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); + } +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +class NativeMovConstReg: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + instruction_size = 4 * BytesPerInstWord, + next_instruction_offset = 4 * BytesPerInstWord, + }; + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + intptr_t data() const; + void set_data(intptr_t x, intptr_t o = 0); + + void patch_set48(intptr_t x); + + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// An interface for accessing/manipulating native moves of the form: +// lui AT, split_high(offset) +// addiu AT, split_low(offset) +// addu reg, reg, AT +// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 +// [lw/sw/lwc1/swc1 dest, reg, 4] +// or +// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset +// [lw/sw/lwc1/swc1 dest, reg, offset+4] +// +// Warning: These routines must be able to handle any instruction sequences +// that are generated as a result of the load/store byte,word,long +// macros. + +class NativeMovRegMem: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + hiword_offset = 4, + ldst_offset = 12, + immediate_size = 4, + ldst_size = 16 + }; + + //offset is less than 16 bits. + bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } + bool is_64ldst() const { + if (is_immediate()) { + return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && + (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); + } else { + return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && + (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); + } + } + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { + return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); + } + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { + NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional +// 32 bits: +// far jump: +// lui reg, split_high(addr) +// addiu reg, split_low(addr) +// jr reg +// nop +// or +// beq ZERO, ZERO, offset +// nop +// + +//64 bits: +// far jump: +// lui rd, imm(63...48); +// ori rd, rd, imm(47...32); +// dsll rd, rd, 16; +// ori rd, rd, imm(31...16); +// dsll rd, rd, 16; +// ori rd, rd, imm(15...0); +// jalr rd +// nop +// +class NativeJump: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + beq_opcode = 0x10000000,//000100|00000|00000|offset + b_mask = 0xffff0000, + short_size = 8, + instruction_size = 6 * BytesPerInstWord + }; + + bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } + bool is_b_far(); + address instruction_address() const { return addr_at(instruction_offset); } + address jump_destination(); + + void patch_set48_gs(address dest); + void patch_set48(address dest); + + void patch_on_jr_gs(address dest); + void patch_on_jr(address dest); + + void patch_on_j_gs(address dest); + void patch_on_j(address dest); + + void patch_on_j_only(address dest); + + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry) { Unimplemented(); } + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry) {} + static void patch_verified_entry(address entry, address verified_entry, address dest); + + void verify(); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); + debug_only(jump->verify();) + return jump; +} + +class NativeGeneralJump: public NativeJump { + public: + // Creation + inline friend NativeGeneralJump* nativeGeneralJump_at(address address); + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativeIllegalInstruction: public NativeInstruction { +public: + enum mips_specific_constants { + instruction_code = 0x42000029, // mips reserved instruction + instruction_size = 4, + instruction_offset = 0, + next_instruction_offset = 4 + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +// return instruction that does not pop values of the stack +// jr RA +// delay slot +class NativeReturn: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_size = 8, + instruction_offset = 0, + next_instruction_offset = 8 + }; +}; + + + + +class NativeCondJump; +inline NativeCondJump* nativeCondJump_at(address address); +class NativeCondJump: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_size = 16, + instruction_offset = 12, + next_instruction_offset = 20 + }; + + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + + // Creation + inline friend NativeCondJump* nativeCondJump_at(address address); + + address jump_destination() const { + return ::nativeCondJump_at(addr_at(12))->jump_destination(); + } + + void set_jump_destination(address dest) { + ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); + } + +}; + +inline NativeCondJump* nativeCondJump_at(address address) { + NativeCondJump* jump = (NativeCondJump*)(address); + return jump; +} + + + +inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } + +inline bool NativeInstruction::is_call() { + // jal target + // nop + if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return true; + } + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + + //daddiu dst, R0, imm16 + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + if(is_trampoline_call()) + return true; + + return false; + +} + +inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} + +inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum mips_specific_constants { + instruction_size = 2 * BytesPerInstWord, + instruction_offset = 0, + next_instruction_offset = 2 * BytesPerInstWord + }; + + address destination() const { + return (address)ptr_at(0); + } + + void set_destination(address new_destination) { + set_ptr_at(0, (intptr_t)new_destination); + } +}; + +inline bool NativeInstruction::is_trampoline_call() { + // lui dst, imm16 + // ori dst, dst, imm16 + // dsll dst, dst, 16 + // ld target, dst, imm16 + // jalr target + // nop + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ld_op) && + is_special_op(int_at(16), Assembler::jalr_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + return false; +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + return (NativeCallTrampolineStub*)addr; +} +#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp new file mode 100644 index 00000000000..7f800eb1070 --- /dev/null +++ b/src/hotspot/cpu/mips/registerMap_mips.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: +#ifndef CORE + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} +#endif + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp new file mode 100644 index 00000000000..4af25318346 --- /dev/null +++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_mips.hpp" +#ifdef TARGET_ARCH_MODEL_mips_32 +# include "interp_masm_mips_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_mips_64 +# include "interp_masm_mips_64.hpp" +#endif + +REGISTER_DEFINITION(Register, noreg); +REGISTER_DEFINITION(Register, i0); +REGISTER_DEFINITION(Register, i1); +REGISTER_DEFINITION(Register, i2); +REGISTER_DEFINITION(Register, i3); +REGISTER_DEFINITION(Register, i4); +REGISTER_DEFINITION(Register, i5); +REGISTER_DEFINITION(Register, i6); +REGISTER_DEFINITION(Register, i7); +REGISTER_DEFINITION(Register, i8); +REGISTER_DEFINITION(Register, i9); +REGISTER_DEFINITION(Register, i10); +REGISTER_DEFINITION(Register, i11); +REGISTER_DEFINITION(Register, i12); +REGISTER_DEFINITION(Register, i13); +REGISTER_DEFINITION(Register, i14); +REGISTER_DEFINITION(Register, i15); +REGISTER_DEFINITION(Register, i16); +REGISTER_DEFINITION(Register, i17); +REGISTER_DEFINITION(Register, i18); +REGISTER_DEFINITION(Register, i19); +REGISTER_DEFINITION(Register, i20); +REGISTER_DEFINITION(Register, i21); +REGISTER_DEFINITION(Register, i22); +REGISTER_DEFINITION(Register, i23); +REGISTER_DEFINITION(Register, i24); +REGISTER_DEFINITION(Register, i25); +REGISTER_DEFINITION(Register, i26); +REGISTER_DEFINITION(Register, i27); +REGISTER_DEFINITION(Register, i28); +REGISTER_DEFINITION(Register, i29); +REGISTER_DEFINITION(Register, i30); +REGISTER_DEFINITION(Register, i31); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp new file mode 100644 index 00000000000..4a9b22bfef2 --- /dev/null +++ b/src/hotspot/cpu/mips/register_mips.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_mips.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + 2 * FloatRegisterImpl::number_of_registers; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} + diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp new file mode 100644 index 00000000000..ea216fbcb9d --- /dev/null +++ b/src/hotspot/cpu/mips/register_mips.hpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP +#define CPU_MIPS_VM_REGISTER_MIPS_HPP + +#include "asm/register.hpp" +#include "utilities/formatBuffer.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + + +// The integer registers of the MIPS32 architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define NOREG ((Register)(noreg_RegisterEnumValue)) + +#define I0 ((Register)(i0_RegisterEnumValue)) +#define I1 ((Register)(i1_RegisterEnumValue)) +#define I2 ((Register)(i2_RegisterEnumValue)) +#define I3 ((Register)(i3_RegisterEnumValue)) +#define I4 ((Register)(i4_RegisterEnumValue)) +#define I5 ((Register)(i5_RegisterEnumValue)) +#define I6 ((Register)(i6_RegisterEnumValue)) +#define I7 ((Register)(i7_RegisterEnumValue)) +#define I8 ((Register)(i8_RegisterEnumValue)) +#define I9 ((Register)(i9_RegisterEnumValue)) +#define I10 ((Register)(i10_RegisterEnumValue)) +#define I11 ((Register)(i11_RegisterEnumValue)) +#define I12 ((Register)(i12_RegisterEnumValue)) +#define I13 ((Register)(i13_RegisterEnumValue)) +#define I14 ((Register)(i14_RegisterEnumValue)) +#define I15 ((Register)(i15_RegisterEnumValue)) +#define I16 ((Register)(i16_RegisterEnumValue)) +#define I17 ((Register)(i17_RegisterEnumValue)) +#define I18 ((Register)(i18_RegisterEnumValue)) +#define I19 ((Register)(i19_RegisterEnumValue)) +#define I20 ((Register)(i20_RegisterEnumValue)) +#define I21 ((Register)(i21_RegisterEnumValue)) +#define I22 ((Register)(i22_RegisterEnumValue)) +#define I23 ((Register)(i23_RegisterEnumValue)) +#define I24 ((Register)(i24_RegisterEnumValue)) +#define I25 ((Register)(i25_RegisterEnumValue)) +#define I26 ((Register)(i26_RegisterEnumValue)) +#define I27 ((Register)(i27_RegisterEnumValue)) +#define I28 ((Register)(i28_RegisterEnumValue)) +#define I29 ((Register)(i29_RegisterEnumValue)) +#define I30 ((Register)(i30_RegisterEnumValue)) +#define I31 ((Register)(i31_RegisterEnumValue)) + +#define R0 ((Register)(i0_RegisterEnumValue)) +#define AT ((Register)(i1_RegisterEnumValue)) +#define V0 ((Register)(i2_RegisterEnumValue)) +#define V1 ((Register)(i3_RegisterEnumValue)) +#define A0 ((Register)(i4_RegisterEnumValue)) +#define A1 ((Register)(i5_RegisterEnumValue)) +#define A2 ((Register)(i6_RegisterEnumValue)) +#define A3 ((Register)(i7_RegisterEnumValue)) +#define A4 ((Register)(i8_RegisterEnumValue)) +#define A5 ((Register)(i9_RegisterEnumValue)) +#define A6 ((Register)(i10_RegisterEnumValue)) +#define A7 ((Register)(i11_RegisterEnumValue)) +#define RT0 ((Register)(i12_RegisterEnumValue)) +#define RT1 ((Register)(i13_RegisterEnumValue)) +#define RT2 ((Register)(i14_RegisterEnumValue)) +#define RT3 ((Register)(i15_RegisterEnumValue)) +#define S0 ((Register)(i16_RegisterEnumValue)) +#define S1 ((Register)(i17_RegisterEnumValue)) +#define S2 ((Register)(i18_RegisterEnumValue)) +#define S3 ((Register)(i19_RegisterEnumValue)) +#define S4 ((Register)(i20_RegisterEnumValue)) +#define S5 ((Register)(i21_RegisterEnumValue)) +#define S6 ((Register)(i22_RegisterEnumValue)) +#define S7 ((Register)(i23_RegisterEnumValue)) +#define RT8 ((Register)(i24_RegisterEnumValue)) +#define RT9 ((Register)(i25_RegisterEnumValue)) +#define K0 ((Register)(i26_RegisterEnumValue)) +#define K1 ((Register)(i27_RegisterEnumValue)) +#define GP ((Register)(i28_RegisterEnumValue)) +#define SP ((Register)(i29_RegisterEnumValue)) +#define FP ((Register)(i30_RegisterEnumValue)) +#define S8 ((Register)(i30_RegisterEnumValue)) +#define RA ((Register)(i31_RegisterEnumValue)) + +#define c_rarg0 RT0 +#define c_rarg1 RT1 +#define Rmethod S3 +#define Rsender S4 +#define Rnext S1 + +/* +#define RT0 T0 +#define RT1 T1 +#define RT2 T2 +#define RT3 T3 +#define RT4 T8 +#define RT5 T9 +*/ + + +//for interpreter frame +// bytecode pointer register +#define BCP S0 +// local variable pointer register +#define LVP S7 +// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM +// be sure to save and restore its value in call_stub +#define TSR S2 + +#define OPT_THREAD 1 + +#define TREG S6 + +#define S5_heapbase S5 + +#define mh_SP_save SP + +#define FSR V0 +#define SSR V1 +#define FSF F0 +#define SSF F1 +#define FTF F14 +#define STF F15 + +#define AFT F30 + +#define RECEIVER T0 +#define IC_Klass T1 + +#define SHIFT_count T3 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + float_arg_base = 12, + number_of_registers = 32 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) +#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) +#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) +#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) +#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) +#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) +#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) +#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) +#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) +#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) +#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) +#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) +#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) +#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) +#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) +#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) +#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) +#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) +#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) +#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) +#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) +#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) +#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) +#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) +#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) +#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) +#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) +#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) +#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) +#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) +#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) +#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + + +const int MIPS_ARGS_IN_REGS_NUM = 4; + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 + }; + + static const int max_gpr; + static const int max_fpr; +}; + +#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp new file mode 100644 index 00000000000..ff8028032b2 --- /dev/null +++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "compiler/disassembler.hpp" +#include "nativeInst_mips.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/oop.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + x += o; + typedef Assembler::WhichOperand WhichOperand; + WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop + assert(which == Assembler::disp32_operand || + which == Assembler::narrow_oop_operand || + which == Assembler::imm_operand, "format unpacks ok"); + if (which == Assembler::imm_operand) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); + } + } else if (which == Assembler::narrow_oop_operand) { + // both compressed oops and compressed classes look the same + if (Universe::heap()->is_in_reserved((oop)x)) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); + } + } else { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); + } + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. + assert(0, "call32_operand not supported in MIPS64"); + } +} + + +//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, +//Maybe We should FORGET CALL RELOCATION +address Relocation::pd_call_destination(address orig_addr) { + intptr_t adj = 0; + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + if (!ni->is_trampoline_call()) { + return nativeCall_at(addr())->target_addr_for_insn(); + } else { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } else { + return (address) -1; + } + } + } else if (ni->is_jump()) { + return nativeGeneralJump_at(addr())->jump_destination() + adj; + } else if (ni->is_cond_jump()) { + return nativeCondJump_at(addr())->jump_destination() +adj; + } else { + tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); + Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); + ShouldNotReachHere(); + return NULL; + } +} + + +void Relocation::pd_set_call_destination(address x) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + NativeCall* call = nativeCall_at(addr()); + if (!ni->is_trampoline_call()) { + call->set_destination(x); + } else { + address trampoline_stub_addr = call->get_trampoline(); + if (trampoline_stub_addr != NULL) { + address orig = call->target_addr_for_insn(); + if (orig != trampoline_stub_addr) { + call->patch_on_trampoline(trampoline_stub_addr); + } + call->set_destination_mt_safe(x, false); + } + } + } else if (ni->is_jump()) + nativeGeneralJump_at(addr())->set_jump_destination(x); + else if (ni->is_cond_jump()) + nativeCondJump_at(addr())->set_jump_destination(x); + else + { ShouldNotReachHere(); } + + // Unresolved jumps are recognized by a destination of -1 + // However 64bit can't actually produce such an address + // and encodes a jump to self but jump_destination will + // return a -1 as the signal. We must not relocate this + // jmp or the ic code will not see it as unresolved. +} + + +address* Relocation::pd_address_in_code() { + return (address*)addr(); +} + + +address Relocation::pd_get_address_from_code() { + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + return (address)ni->data(); +} + + + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +/* +void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} +*/ + +void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + address target =0; + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + target = new_addr_for((address)ni->data(), src, dest); + ni->set_data((intptr_t)target); +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp new file mode 100644 index 00000000000..1e1e170fd87 --- /dev/null +++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP +#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since MIPS instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // imm_oop_operand vs. narrow_oop_operand + format_width = 2 + }; + + public: + + static bool mustIterateImmediateOopsInCode() { return false; } + +#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp new file mode 100644 index 00000000000..2a0488cd015 --- /dev/null +++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_mips.inline.hpp" +#endif + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +//-------------- generate_exception_blob ----------- +// creates _exception_blob. +// The exception blob is jumped to from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jump, and left with a jump. +// +// Arguments: +// V0: exception oop +// V1: exception pc +// +// Results: +// A0: exception oop +// A1: exception pc in caller or ??? +// jumps to: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +// [stubGenerator_mips.cpp] generate_forward_exception() +// |- V0, V1 are created +// |- T9 <= SharedRuntime::exception_handler_for_return_address +// `- jr T9 +// `- the caller's exception_handler +// `- jr OptoRuntime::exception_blob +// `- here +// +void OptoRuntime::generate_exception_blob() { + // Capture info about frame layout + enum layout { + fp_off, + return_off, // slot for return address + framesize + }; + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer("exception_blob", 5120, 5120); + MacroAssembler* masm = new MacroAssembler(&buffer); + + + address start = __ pc(); + + __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! + + // this frame will be treated as the original caller method. + // So, the return pc should be filled with the original exception pc. + // ref: X86's implementation + __ sd(V1, SP, return_off *wordSize); // return address + __ sd(FP, SP, fp_off *wordSize); + + // Save callee saved registers. None for UseSSE=0, + // floats-only for UseSSE=1, and doubles for UseSSE=2. + + __ daddiu(FP, SP, fp_off * wordSize); + + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); + __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + __ set_last_Java_frame(thread, NOREG, NOREG, NULL); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ relocate(relocInfo::internal_pc_type); + + { + long save_pc = (long)__ pc() + 48; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); + + __ move(A0, thread); + __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); + __ jalr(T9); + __ delayed()->nop(); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + oop_maps->add_gc_map( __ offset(), map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(thread, true); + + // Pop self-frame. + __ leave(); // Epilog! + + // V0: exception handler + + // We have a handler in V0, (could be deopt blob) + __ move(T9, V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // Get the exception + __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); + __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); + + // Fix seg fault when running: + // Eclipse + Plugin + Debug As + // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() + // + __ move(V0, A0); + __ move(V1, A1); + + // V0: exception oop + // T9: exception handler + // A1: exception pc + __ jr(T9); + __ delayed()->nop(); + + // make sure all code is generated + masm->flush(); + + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); +} diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp new file mode 100644 index 00000000000..4a9791d4cbf --- /dev/null +++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp @@ -0,0 +1,3879 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/nativeInst.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klass.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_mips.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#include + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class RegisterSaver { + enum { FPU_regs_live = 32 }; + // Capture info about frame layout + enum layout { +#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, + DEF_LAYOUT_OFFS(for_16_bytes_aligned) + DEF_LAYOUT_OFFS(fpr0) + DEF_LAYOUT_OFFS(fpr1) + DEF_LAYOUT_OFFS(fpr2) + DEF_LAYOUT_OFFS(fpr3) + DEF_LAYOUT_OFFS(fpr4) + DEF_LAYOUT_OFFS(fpr5) + DEF_LAYOUT_OFFS(fpr6) + DEF_LAYOUT_OFFS(fpr7) + DEF_LAYOUT_OFFS(fpr8) + DEF_LAYOUT_OFFS(fpr9) + DEF_LAYOUT_OFFS(fpr10) + DEF_LAYOUT_OFFS(fpr11) + DEF_LAYOUT_OFFS(fpr12) + DEF_LAYOUT_OFFS(fpr13) + DEF_LAYOUT_OFFS(fpr14) + DEF_LAYOUT_OFFS(fpr15) + DEF_LAYOUT_OFFS(fpr16) + DEF_LAYOUT_OFFS(fpr17) + DEF_LAYOUT_OFFS(fpr18) + DEF_LAYOUT_OFFS(fpr19) + DEF_LAYOUT_OFFS(fpr20) + DEF_LAYOUT_OFFS(fpr21) + DEF_LAYOUT_OFFS(fpr22) + DEF_LAYOUT_OFFS(fpr23) + DEF_LAYOUT_OFFS(fpr24) + DEF_LAYOUT_OFFS(fpr25) + DEF_LAYOUT_OFFS(fpr26) + DEF_LAYOUT_OFFS(fpr27) + DEF_LAYOUT_OFFS(fpr28) + DEF_LAYOUT_OFFS(fpr29) + DEF_LAYOUT_OFFS(fpr30) + DEF_LAYOUT_OFFS(fpr31) + + DEF_LAYOUT_OFFS(v0) + DEF_LAYOUT_OFFS(v1) + DEF_LAYOUT_OFFS(a0) + DEF_LAYOUT_OFFS(a1) + DEF_LAYOUT_OFFS(a2) + DEF_LAYOUT_OFFS(a3) + DEF_LAYOUT_OFFS(a4) + DEF_LAYOUT_OFFS(a5) + DEF_LAYOUT_OFFS(a6) + DEF_LAYOUT_OFFS(a7) + DEF_LAYOUT_OFFS(t0) + DEF_LAYOUT_OFFS(t1) + DEF_LAYOUT_OFFS(t2) + DEF_LAYOUT_OFFS(t3) + DEF_LAYOUT_OFFS(s0) + DEF_LAYOUT_OFFS(s1) + DEF_LAYOUT_OFFS(s2) + DEF_LAYOUT_OFFS(s3) + DEF_LAYOUT_OFFS(s4) + DEF_LAYOUT_OFFS(s5) + DEF_LAYOUT_OFFS(s6) + DEF_LAYOUT_OFFS(s7) + DEF_LAYOUT_OFFS(t8) + DEF_LAYOUT_OFFS(t9) + + DEF_LAYOUT_OFFS(gp) + DEF_LAYOUT_OFFS(fp) + DEF_LAYOUT_OFFS(return) + reg_save_size + }; + + public: + + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + static int raOffset(void) { return return_off / 2; } + //Rmethod + static int methodOffset(void) { return s3_off / 2; } + + static int v0Offset(void) { return v0_off / 2; } + static int v1Offset(void) { return v1_off / 2; } + + static int fpResultOffset(void) { return fpr0_off / 2; } + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { + + // Always make the frame size 16-byte aligned + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + + reg_save_size*BytesPerInt, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // save registers + + __ daddiu(SP, SP, - reg_save_size * jintSize); + + __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); + __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); + __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); + __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); + __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); + __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); + __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); + __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); + __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); + __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); + __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); + __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); + __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); + __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); + __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); + __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); + __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); + __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); + __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); + __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); + __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); + __ sd(T0, SP, t0_off * jintSize); + __ sd(T1, SP, t1_off * jintSize); + __ sd(T2, SP, t2_off * jintSize); + __ sd(T3, SP, t3_off * jintSize); + __ sd(S0, SP, s0_off * jintSize); + __ sd(S1, SP, s1_off * jintSize); + __ sd(S2, SP, s2_off * jintSize); + __ sd(S3, SP, s3_off * jintSize); + __ sd(S4, SP, s4_off * jintSize); + __ sd(S5, SP, s5_off * jintSize); + __ sd(S6, SP, s6_off * jintSize); + __ sd(S7, SP, s7_off * jintSize); + + __ sd(T8, SP, t8_off * jintSize); + __ sd(T9, SP, t9_off * jintSize); + + __ sd(GP, SP, gp_off * jintSize); + __ sd(FP, SP, fp_off * jintSize); + __ sd(RA, SP, return_off * jintSize); + __ daddiu(FP, SP, fp_off * jintSize); + + OopMapSet *oop_maps = new OopMapSet(); + //OopMap* map = new OopMap( frame_words, 0 ); + OopMap* map = new OopMap( frame_size_in_slots, 0 ); + + +//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) + map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); + + map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); + +#undef STACK_OFFSET + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { + __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); + __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); + __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); + __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); + __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); + __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); + __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); + __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); + __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); + __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); + __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); + __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); + __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); + __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); + __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); + __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); + + __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); + __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); + __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); + __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); + __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); + __ ld(T0, SP, t0_off * jintSize); + __ ld(T1, SP, t1_off * jintSize); + __ ld(T2, SP, t2_off * jintSize); + __ ld(T3, SP, t3_off * jintSize); + __ ld(S0, SP, s0_off * jintSize); + __ ld(S1, SP, s1_off * jintSize); + __ ld(S2, SP, s2_off * jintSize); + __ ld(S3, SP, s3_off * jintSize); + __ ld(S4, SP, s4_off * jintSize); + __ ld(S5, SP, s5_off * jintSize); + __ ld(S6, SP, s6_off * jintSize); + __ ld(S7, SP, s7_off * jintSize); + + __ ld(T8, SP, t8_off * jintSize); + __ ld(T9, SP, t9_off * jintSize); + + __ ld(GP, SP, gp_off * jintSize); + __ ld(FP, SP, fp_off * jintSize); + __ ld(RA, SP, return_off * jintSize); + + __ addiu(SP, SP, reg_save_size * jintSize); +} + +// Pop the current frame and restore the registers that might be holding +// a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restore to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + __ ld(V0, SP, v0_off * jintSize); + __ ld(V1, SP, v1_off * jintSize); + __ ldc1(F0, SP, fpr0_off * jintSize); + __ ldc1(F1, SP, fpr1_off * jintSize); + __ addiu(SP, SP, return_off * jintSize); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + +size_t SharedRuntime::trampoline_size() { + return 32; +} + +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { + // trampoline is not in CodeCache + __ set64(T9, (long)destination); + __ jr(T9); + __ delayed()->nop(); +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. + +static int reg2offset_in(VMReg r) { + // Account for saved fp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than SharedInfo::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 32-bit +// integer registers. + +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + + +// --------------------------------------------------------------------------- +// The compiled Java calling convention. +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + // Create the mapping between argument positions and registers. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + T0, A0, A1, A2, A3, A4, A5, A6 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + F12, F13, F14, F15, F16, F17, F18, F19 + }; + + uint args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + // Schedule the branch target address early. + // Call into the VM to patch the caller, then jump to compiled callee + // V0 isn't live so capture return address while we easily can + __ move(V0, RA); + + __ pushad(); +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // VM needs caller's callsite + // VM needs target method + + __ move(A0, Rmethod); + __ move(A1, V0); + // we should preserve the return address + __ move(TSR, SP); + __ move(AT, -(StackAlignmentInBytes)); // align the stack + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), + relocInfo::runtime_call_type); + + __ delayed()->nop(); + __ move(SP, TSR); + __ popad(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + // However we will run interpreted if we come thru here. The next pass + // thru the call site will run compiled. If we ran compiled here then + // we can (theorectically) do endless i2c->c2i->i2c transitions during + // deopt/uncommon trap cycles. If we always go interpreted here then + // we can have at most one and don't need to play any tricks to keep + // from endlessly growing the stack. + // + // Actually if we detected that we had an i2c->c2i transition here we + // ought to be able to reset the world back to the state of the interpreted + // call and not bother building another interpreter arg area. We don't + // do that at this point. + + patch_callers_callsite(masm); + __ bind(skip_fixup); + +#ifdef COMPILER2 + __ empty_FPU_stack(); +#endif + //this is for native ? + // Since all args are passed on the stack, total_args_passed * interpreter_ + // stack_element_size is the + // space we need. + int extraspace = total_args_passed * Interpreter::stackElementSize; + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + // Get return address + __ move(V0, RA); + // set senderSP value + //refer to interpreter_mips.cpp:generate_asm_entry + __ move(Rsender, SP); + __ addiu(SP, SP, -extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // st_off points to lowest address on stack. + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; + // Say 4 args: + // i st_off + // 0 12 T_LONG + // 1 8 T_VOID + // 2 4 T_OBJECT + // 3 0 T_BOOL + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use fpu stack top + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + } else { + + + int next_off = st_off - Interpreter::stackElementSize; + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + // Ref to is_Register condition + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ st_ptr(AT, SP, st_off - 8); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ sd(r, SP, st_off); + } else { + //FIXME, mips will not enter here + // long/double in gpr + __ sd(r, SP, st_off); + // In [java/util/zip/ZipFile.java] + // + // private static native long open(String name, int mode, long lastModified); + // private static native int getTotal(long jzfile); + // + // We need to transfer T_LONG paramenters from a compiled method to a native method. + // It's a complex process: + // + // Caller -> lir_static_call -> gen_resolve_stub + // -> -- resolve_static_call_C + // `- gen_c2i_adapter() [*] + // | + // `- AdapterHandlerLibrary::get_create_apapter_index + // -> generate_native_entry + // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] + // + // In [**], T_Long parameter is stored in stack as: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // However, the sequence is reversed here: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | | + // (low) + // + // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). + // + if (sig_bt[i] == T_LONG) + __ sd(r, SP, st_off - 8); + } + } else if (r_1->is_FloatRegister()) { + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ swc1(fr, SP, st_off); + else { + __ sdc1(fr, SP, st_off); + __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots + } + } + } + + // Schedule the branch target address early. + __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); + // And repush original return address + __ move(RA, V0); + __ jr (AT); + __ delayed()->nop(); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame + // layout. Lesp was saved by the calling I-frame and will be restored on + // return. Meanwhile, outgoing arg space is all owned by the callee + // C-frame, so we can mangle it at will. After adjusting the frame size, + // hoist register arguments and repack other args according to the compiled + // code convention. Finally, end in a jump to the compiled code. The entry + // point address is the start of the buffer. + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the mips side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + + __ move(T9, SP); + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); + // Convert 4-byte stack slots to words. + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); + __ daddiu(SP, SP, -comp_words_on_stack * wordSize); + } + + // Align the outgoing SP + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + // push the return address on the stack (note that pushing, rather + // than storing it, yields the correct frame alignment for the callee) + // Put saved SP in another register + const Register saved_sp = V0; + __ move(saved_sp, T9); + + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); + + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to + // account for return address ) + // NOTICE HERE!!!! I sub a wordSize here + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + //+ wordSize; + + if (!r_2->is_valid()) { + __ ld(AT, saved_sp, ld_off); + __ sd(AT, SP, st_off); + } else { + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + // st_off is LSW (i.e. reg.first()) + + // [./org/eclipse/swt/graphics/GC.java] + // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, + // int destX, int destY, int destWidth, int destHeight, + // boolean simple, + // int imgWidth, int imgHeight, + // long maskPixmap, <-- Pass T_LONG in stack + // int maskType); + // Before this modification, Eclipse displays icons with solid black background. + // + __ ld(AT, saved_sp, ld_off); + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ ld(AT, saved_sp, ld_off - 8); + __ sd(AT, SP, st_off); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // Remember r_1 is low address (and LSB on mips) + // So r_2 gets loaded from high address regardless of the platform + assert(r_2->as_Register() == r_1->as_Register(), ""); + __ ld(r, saved_sp, ld_off); + + // + // For T_LONG type, the real layout is as below: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // We should load the low-8 bytes. + // + if (sig_bt[i] == T_LONG) + __ ld(r, saved_sp, ld_off - 8); + } else { + __ lw(r, saved_sp, ld_off); + } + } else if (r_1->is_FloatRegister()) { // Float Register + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ lwc1(fr, saved_sp, ld_off); + else { + __ ldc1(fr, saved_sp, ld_off); + __ ldc1(fr, saved_sp, ld_off - 8); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); + + // move methodOop to V0 in case we end up in an c2i adapter. + // the c2i adapters expect methodOop in V0 (c2) because c2's + // resolve stubs return the result (the method) in V0. + // I'd love to fix this. + __ move(V0, Rmethod); + __ jr(T9); + __ delayed()->nop(); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The + // args start out packed in the compiled layout. They need to be unpacked + // into the interpreter layout. This will almost always require some stack + // space. We grow the current (compiled) stack, then repack the args. We + // finally end in a jump to the generic interpreter entry point. On exit + // from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + { + Register holder = T1; + Register receiver = T0; + Register temp = T8; + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + Label missed; + + //add for compressedoops + __ load_klass(temp, receiver); + + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); + __ bne(AT, temp, missed); + __ delayed()->nop(); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, skip_fixup); + __ delayed()->nop(); + __ bind(missed); + + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ delayed()->nop(); + } + + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on MIPS"); + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + F12, F13, F14, F15, F16, F17, F18, F19 + }; + uint args = 0; + uint stk_args = 0; // inc by 2 each time + +// Example: +// n java.lang.UNIXProcess::forkAndExec +// private native int forkAndExec(byte[] prog, +// byte[] argBlock, int argc, +// byte[] envBlock, int envc, +// byte[] dir, +// boolean redirectErrorStream, +// FileDescriptor stdin_fd, +// FileDescriptor stdout_fd, +// FileDescriptor stderr_fd) +// JNIEXPORT jint JNICALL +// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, +// jobject process, +// jbyteArray prog, +// jbyteArray argBlock, jint argc, +// jbyteArray envBlock, jint envc, +// jbyteArray dir, +// jboolean redirectErrorStream, +// jobject stdin_fd, +// jobject stdout_fd, +// jobject stderr_fd) +// +// ::c_calling_convention +// 0: // env <-- a0 +// 1: L // klass/obj <-- t0 => a1 +// 2: [ // prog[] <-- a0 => a2 +// 3: [ // argBlock[] <-- a1 => a3 +// 4: I // argc <-- a2 => a4 +// 5: [ // envBlock[] <-- a3 => a5 +// 6: I // envc <-- a4 => a5 +// 7: [ // dir[] <-- a5 => a7 +// 8: Z // redirectErrorStream <-- a6 => sp[0] +// 9: L // stdin fp[16] => sp[8] +// 10: L // stdout fp[24] => sp[16] +// 11: L // stderr fp[32] => sp[24] +// + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ swc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ sdc1(FSF, FP, -wordSize ); + break; + case T_VOID: break; + case T_LONG: + __ sd(V0, FP, -wordSize); + break; + case T_OBJECT: + case T_ARRAY: + __ sd(V0, FP, -wordSize); + break; + default: { + __ sw(V0, FP, -wordSize); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ lwc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ ldc1(FSF, FP, -wordSize ); + break; + case T_LONG: + __ ld(V0, FP, -wordSize); + break; + case T_VOID: break; + case T_OBJECT: + case T_ARRAY: + __ ld(V0, FP, -wordSize); + break; + default: { + __ lw(V0, FP, -wordSize); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ push(args[i].first()->as_FloatRegister()); + } + } +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ pop(args[i].first()->as_FloatRegister()); + } + } +} + +// A simple move of integer like type +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ lw(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + // stack to reg + __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + if (dst.first() != src.first()){ + __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + //FIXME, for mips, dst can be register + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = V0; + Label nil; + __ xorr(rHandle, rHandle, rHandle); + __ ld(AT, FP, reg2offset_in(src.first())); + __ beq(AT, R0, nil); + __ delayed()->nop(); + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); + __ bind(nil); + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(), rHandle); + //if dst is register + //FIXME, do mips need out preserve stack slots? + int offset_in_older_frame = src.first()->reg2stack() + + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles + const Register rOop = src.first()->as_Register(); + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); + const Register rHandle = V0; + //Important: refer to java_calling_convertion + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + Label skip; + __ sd( rOop , SP, offset ); + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ xorr( rHandle, rHandle, rHandle); + __ beq(rOop, R0, skip); + __ delayed()->nop(); + __ lea(rHandle, Address(SP, offset)); + __ bind(skip); + // Store the handle parameter + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); + //if dst is register + + if (is_receiver) { + *receiver_offset = offset; + } + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ lw(AT, FP, reg2offset_in(src.first())); + __ sw(AT, SP, reg2offset_out(dst.first())); + } + else + __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + // reg to stack + if(dst.first()->is_stack()) + __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + else + __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibility for a long_move VMRegPair is: + // 1: two stack slots (possibly unaligned) + // as neither the java or C calling convention will use registers + // for longs. + + if (src.first()->is_stack()) { + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); + if( dst.first()->is_stack()){ + __ ld(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ + __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); + } else { + __ move( (dst.first())->as_Register() , (src.first())->as_Register()); + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibilities for a double_move VMRegPair are: + // The painful thing here is that like long_move a VMRegPair might be + + // Because of the calling convention we know that src is either + // 1: a single physical register (xmm registers only) + // 2: two stack slots (possibly unaligned) + // dst can only be a pair of stack slots. + + + if (src.first()->is_stack()) { + // source is all stack + if( dst.first()->is_stack()){ + __ ld(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); + } + + } else { + // reg to stack + // No worries about stack alignment + if( dst.first()->is_stack()){ + __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } + else + __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + + } +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = T9; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = S3; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", iid); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = SSR; // known to be free at this point + __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type, + address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + bool is_critical_native = true; + address native_func = critical_entry; + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // Native nmethod wrappers never take possesion of the oop arguments. + // So the caller will gc the arguments. The only thing we need an + // oopMap for is if the call is static + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + // + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // registers. We must create space for them here that is disjoint from + // the windowed save area because we have no control over when we might + // flush the window again and overwrite values that gc has since modified. + // (The live window race) + // + // We always just allocate 6 word for storing down these object. This allow + // us to simply record the base and use the Ireg number to decide which + // slot to use. (Note that the reg number is the inbound number not the + // outbound number). + // We must shuffle args to match the native convention, and include var-args space. + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = round_to(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place to save return value or as a temporary for any gpr -> fpr moves + // + 2 for return address (which we own) and saved fp + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | vararg area | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + + + // First thing make an ic check to see if we should even be here + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + // We are free to use all registers as temps without saving them and + // restoring them except fp. fp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + //refer to register_mips.hpp:IC_Klass + const Register ic_reg = T1; + const Register receiver = T0; + + Label hit; + Label exception_pending; + + __ verify_oop(receiver); + //add for compressedoops + __ load_klass(T9, receiver); + __ beq(T9, ic_reg, hit); + __ delayed()->nop(); + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ delayed()->nop(); + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + // Generate stack overflow check + if (UseStackBanging) { + __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + } + + // Generate a new frame for the wrapper. + // do mips need this ? +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ enter(); + // -2 because return address is already present and so is saved fp + __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // Calculate the difference between sp and fp. We need to know it + // after the native call because on windows Java Natives will pop + // the arguments and it is painful to do sp relative addressing + // in a platform independent way. So after the call we switch to + // fp relative addressing. + //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change + //the SP + int fp_adjustment = stack_size - 2*wordSize; + +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // Compute the fp offset for any slots used after the jni call + + int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; + // We use TREG as a thread pointer because it is callee save and + // if we load it once it is usable thru the entire wrapper + const Register thread = TREG; + + // We use S4 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = S4; + if (is_critical_native) { + Unimplemented(); + // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + // oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmpi, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // and, if static, the class mirror instead of a receiver. This pretty much + // guarantees that register layout will not match (and mips doesn't use reg + // parms though amd does). Since the native abi doesn't use register args + // and the java conventions does we don't have to worry about collisions. + // All of our moved are reg->stack or stack->stack. + // We ignore the extra arguments during the shuffle and handle them at the + // last moment. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + + int c_arg = method->is_static() ? 2 : 1 ; + + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of fp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(T8->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + Unimplemented(); + // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + Unimplemented(); + // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + simple_move32(masm, in_regs[i], out_regs[c_arg]); + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + // Pre-load a static method's oop. Used both by locking code and + // the normal JNI call code. + + __ move(oop_handle_reg, A1); + + if (method->is_static() && !is_critical_native) { + + // load opp into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( + (method->method_holder())->java_mirror())); + + + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + // Now handlize the static class mirror it's known not-null. + __ sd( oop_handle_reg, SP, klass_offset); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(SP, klass_offset)); + // store the klass handle as second argument + __ move(A1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + intptr_t the_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(the_pc - start, map); + + __ set_last_Java_frame(SP, noreg, NULL); + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)the_pc ; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + save_args(masm, total_c_args, c_arg, out_regs); + int metadata_index = __ oop_recorder()->find_index(method()); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + thread, AT); + + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // These are register definitions we need for locking/unlocking + const Register swap_reg = T8; // Must use T8 for cmpxchg instruction + const Register obj_reg = T9; // Will contain the oop + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) + + + + Label slow_path_lock; + Label lock_done; + + // Lock a synchronized method + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ move(oop_handle_reg, A1); + + // Get address of the box + __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); + + // Load the oop from the handle + __ ld(obj_reg, oop_handle_reg, 0); + + if (UseBiasedLocking) { + // Note that oop_handle_reg is trashed during this call + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %T8 + __ move(swap_reg, 1); + + __ ld(AT, obj_reg, 0); + __ orr(swap_reg, swap_reg, AT); + + __ sd(swap_reg, lock_reg, mark_word_offset); + __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg + + __ dsubu(swap_reg, swap_reg, SP); + __ move(AT, 3 - os::vm_page_size()); + __ andr(swap_reg , swap_reg, AT); + // Save the test result, for recursive case, the result is zero + __ sd(swap_reg, lock_reg, mark_word_offset); + __ bne(swap_reg, R0, slow_path_lock); + __ delayed()->nop(); + // Slow path will re-enter here + __ bind(lock_done); + + if (UseBiasedLocking) { + // Re-fetch oop_handle_reg as we trashed it above + __ move(A1, oop_handle_reg); + } + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); + } + + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) + // Load the second arguments into A1 + //__ ld(A1, SP , wordSize ); // klass + + // Now set thread in native + __ addiu(AT, R0, _thread_in_native); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + // do the call + __ call(native_func, relocInfo::runtime_call_type); + __ delayed()->nop(); + // WARNING - on Windows Java Natives use pascal calling convention and pop the + // arguments off of the stack. We could just re-adjust the stack pointer here + // and continue to do SP relative addressing but we instead switch to FP + // relative addressing. + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ andi(V0, V0, 0xFFFF); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : // nothing to do break; + case T_DOUBLE : + case T_FLOAT : + // Result is in st0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ addiu(AT, R0, _thread_in_native_trans); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ sync(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + Label after_transition; + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + Label slow_path; + + __ safepoint_poll_acquire(slow_path, thread); + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ delayed()->nop(); + __ bind(slow_path); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ move(A0, thread); + __ addiu(SP, SP, -wordSize); + __ push(S2); + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + if (!is_critical_native) { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); + __ delayed()->nop(); + } else { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + __ move(SP, S2); // use S2 as a sender SP holder + __ pop(S2); + __ addiu(SP, SP, wordSize); + //add for compressedoops + __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic below. + __ beq(R0, R0, after_transition); + __ delayed()->nop(); + } + + __ bind(Continue); + } + + // change thread state + __ addiu(AT, R0, _thread_in_Java); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + __ bind(after_transition); + Label reguard; + Label reguard_done; + __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); + __ beq(AT, R0, reguard); + __ delayed()->nop(); + // slow path reguard re-enters here + __ bind(reguard_done); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live + + // Unlock + Label slow_path_unlock; + Label unlock_done; + if (method->is_synchronized()) { + + Label done; + + // Get locked oop from the handle we passed to jni + __ ld( obj_reg, oop_handle_reg, 0); + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, T8, done); + + } + + // Simple recursive lock? + + __ ld(AT, FP, lock_slot_fp_offset); + __ beq(AT, R0, done); + __ delayed()->nop(); + // Must save FSF if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + // get old displaced header + __ ld (T8, FP, lock_slot_fp_offset); + // get address of the stack lock + __ addiu(c_rarg0, FP, lock_slot_fp_offset); + // Atomic swap old header if oop still contains the stack lock + __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + // Tell dtrace about this method exit + save_native_result(masm, ret_type, stack_slots); + int metadata_index = __ oop_recorder()->find_index( (method())); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + thread, AT); + restore_native_result(masm, ret_type, stack_slots); + } + + // We can finally stop using that last_Java_frame we setup ages ago + + __ reset_last_Java_frame(false); + + // Unpack oop result, e.g. JNIHandles::resolve value. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + __ resolve_jobject(V0, thread, T9); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); + } + + if (!is_critical_native) { + // reset handle block + __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); + __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); + } + + if (!is_critical_native) { + // Any exception pending? + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, exception_pending); + __ delayed()->nop(); + } + // no exception, we're almost done + + // check that only result value is on FPU stack + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); + + // Return +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ leave(); + + __ jr(RA); + __ delayed()->nop(); + // Unexpected paths are out of line and go here + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + __ move(A0, obj_reg); + __ move(A1, lock_reg); + __ move(A2, thread); + __ addiu(SP, SP, - 3*wordSize); + + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ move(SP, S2); + __ addiu(SP, SP, 3*wordSize); + + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + __ delayed()->nop(); + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // Slow path unlock + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ push(AT); + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); + + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + // should be a peal + // +wordSize because of the push above + __ addiu(A1, FP, lock_slot_fp_offset); + + __ move(A0, obj_reg); + __ move(A2, thread); + __ addiu(SP, SP, -2*wordSize); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + relocInfo::runtime_call_type); + __ delayed()->nop(); + __ addiu(SP, SP, 2*wordSize); + __ move(SP, S2); + //add for compressedoops + __ reinit_heapbase(); +#ifdef ASSERT + { + Label L; + __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ pop(AT); + __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + __ delayed()->nop(); + // END Slow path unlock + + } + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); + __ delayed()->nop(); + //add for compressedoops + __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + __ delayed()->nop(); + + // BEGIN EXCEPTION PROCESSING + if (!is_critical_native) { + // Forward the exception + __ bind(exception_pending); + + // remove possible return value from FPU register stack + __ empty_FPU_stack(); + + // pop our frame + //forward_exception_entry need return address on stack + __ move(SP, FP); + __ pop(FP); + + // and forward the exception + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; + +} + +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; +static bool offsets_initialized = false; + +static VMRegPair reg64_to_VMRegPair(Register r) { + VMRegPair ret; + if (wordSize == 8) { + ret.set2(r->as_VMReg()); + } else { + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); + } + return ret; +} + + +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { + + + // generate_dtrace_nmethod is guarded by a mutex so we are sure to + // be single threaded in this method. + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); + + // Fill in the signature array, for the calling-convention call. + int total_args_passed = method->size_of_parameters(); + + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); + + // The signature we are going to use for the trap that dtrace will see + // java/lang/String is converted. We drop "this" and any other object + // is converted to NULL. (A one-slot java/lang/Long object reference + // is converted to a two-slot long, which is why we double the allocation). + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); + + int i=0; + int total_strings = 0; + int first_arg_to_pass = 0; + int total_c_args = 0; + + // Skip the receiver as dtrace doesn't want to see it + if( !method->is_static() ) { + in_sig_bt[i++] = T_OBJECT; + first_arg_to_pass = 1; + } + + SignatureStream ss(method->signature()); + for ( ; !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + in_sig_bt[i++] = bt; // Collect remaining bits of signature + out_sig_bt[total_c_args++] = bt; + if( bt == T_OBJECT) { + symbolOop s = ss.as_symbol_or_null(); + if (s == vmSymbols::java_lang_String()) { + total_strings++; + out_sig_bt[total_c_args-1] = T_ADDRESS; + } else if (s == vmSymbols::java_lang_Boolean() || + s == vmSymbols::java_lang_Byte()) { + out_sig_bt[total_c_args-1] = T_BYTE; + } else if (s == vmSymbols::java_lang_Character() || + s == vmSymbols::java_lang_Short()) { + out_sig_bt[total_c_args-1] = T_SHORT; + } else if (s == vmSymbols::java_lang_Integer() || + s == vmSymbols::java_lang_Float()) { + out_sig_bt[total_c_args-1] = T_INT; + } else if (s == vmSymbols::java_lang_Long() || + s == vmSymbols::java_lang_Double()) { + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } + } else if ( bt == T_LONG || bt == T_DOUBLE ) { + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots + // We convert double to long + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } else if ( bt == T_FLOAT) { + // We convert float to int + out_sig_bt[total_c_args-1] = T_INT; + } + } + + assert(i==total_args_passed, "validly parsed signature"); + + // Now get the compiled-Java layout as input arguments + int comp_args_on_stack; + comp_args_on_stack = SharedRuntime::java_calling_convention( + in_sig_bt, in_regs, total_args_passed, false); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the a native (non-jni) function would expect them. To figure out + // where they go we convert the java signature to a C signature and remove + // T_VOID for any long/double we might have received. + + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Plus a temp for possible converion of float/double/long register args + + int conversion_temp = stack_slots; + stack_slots += 2; + + + // Now space for the string(s) we must convert + + int string_locs = stack_slots; + stack_slots += total_strings * + (max_dtrace_string_size / VMRegImpl::stack_slot_size); + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | string[n] | + // |---------------------| <- string_locs[n] + // | string[n-1] | + // |---------------------| <- string_locs[n-1] + // | ... | + // | ... | + // |---------------------| <- string_locs[1] + // | string[0] | + // |---------------------| <- string_locs[0] + // | temp | + // |---------------------| <- conversion_temp + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + + { + Label L; + const Register temp_reg = G3_scratch; + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); + __ verify_oop(O0); + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); + __ cmp(temp_reg, G5_inline_cache_reg); + __ brx(Assembler::equal, true, Assembler::pt, L); + __ delayed()->nop(); + + __ jump_to(ic_miss, 0); + __ delayed()->nop(); + __ align(CodeEntryAlignment); + __ bind(L); + } + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + // Generate stack overflow check before creating frame + __ generate_stack_overflow_check(stack_size); + + // Generate a new frame for the wrapper. + __ save(SP, -stack_size, SP); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + VMRegPair zero; + const Register g0 = G0; // without this we get a compiler warning (why??) + zero.set2(g0->as_VMReg()); + + int c_arg, j_arg; + + Register conversion_off = noreg; + + for (j_arg = first_arg_to_pass, c_arg = 0 ; + j_arg < total_args_passed ; j_arg++, c_arg++ ) { + + VMRegPair src = in_regs[j_arg]; + VMRegPair dst = out_regs[c_arg]; + +#ifdef ASSERT + if (src.first()->is_Register()) { + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); + } else if (src.first()->is_FloatRegister()) { + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)], "ack!"); + } + if (dst.first()->is_Register()) { + reg_destroyed[dst.first()->as_Register()->encoding()] = true; + } else if (dst.first()->is_FloatRegister()) { + freg_destroyed[dst.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)] = true; + } +#endif /* ASSERT */ + + switch (in_sig_bt[j_arg]) { + case T_ARRAY: + case T_OBJECT: + { + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { + // need to unbox a one-slot value + Register in_reg = L0; + Register tmp = L2; + if ( src.first()->is_reg() ) { + in_reg = src.first()->as_Register(); + } else { + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); + } + // If the final destination is an acceptable register + if ( dst.first()->is_reg() ) { + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { + tmp = dst.first()->as_Register(); + } + } + + Label skipUnbox; + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { + __ mov(G0, tmp->successor()); + } + __ br_null(in_reg, true, Assembler::pn, skipUnbox); + __ delayed()->mov(G0, tmp); + + BasicType bt = out_sig_bt[c_arg]; + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); + switch (bt) { + case T_BYTE: + __ ldub(in_reg, box_offset, tmp); break; + case T_SHORT: + __ lduh(in_reg, box_offset, tmp); break; + case T_INT: + __ ld(in_reg, box_offset, tmp); break; + case T_LONG: + __ ld_long(in_reg, box_offset, tmp); break; + default: ShouldNotReachHere(); + } + + __ bind(skipUnbox); + // If tmp wasn't final destination copy to final destination + if (tmp == L2) { + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); + if (out_sig_bt[c_arg] == T_LONG) { + long_move(masm, tmp_as_VM, dst); + } else { + move32_64(masm, tmp_as_VM, out_regs[c_arg]); + } + } + if (out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // move over the T_VOID to keep the loop indices in sync + } + } else if (out_sig_bt[c_arg] == T_ADDRESS) { + Register s = + src.first()->is_reg() ? src.first()->as_Register() : L2; + Register d = + dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // We store the oop now so that the conversion pass can reach + // while in the inner frame. This will be the only store if + // the oop is NULL. + if (s != L2) { + // src is register + if (d != L2) { + // dst is register + __ mov(s, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } else { + // src not a register + assert(Assembler::is_simm13(reg2offset(src.first()) + + STACK_BIAS), "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); + if (d == L2) { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } else if (out_sig_bt[c_arg] != T_VOID) { + // Convert the arg to NULL + if (dst.first()->is_reg()) { + __ mov(G0, dst.first()->as_Register()); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } + break; + case T_VOID: + break; + + case T_FLOAT: + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + move32_64(masm, src, dst); + } else { + if (dst.first()->is_reg()) { + // freg -> reg + int off = + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + Register d = dst.first()->as_Register(); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + __ ld(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld(SP, conversion_off , d); + } + } else { + // freg -> mem + int off = STACK_BIAS + reg2offset(dst.first()); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + } + } + } + break; + + case T_DOUBLE: + assert( j_arg + 1 < total_args_passed && + in_sig_bt[j_arg + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + long_move(masm, src, dst); + } else { + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // Destination could be an odd reg on 32bit in which case + // we can't load direct to the destination. + + if (!d->is_even() && wordSize == 4) { + d = L2; + } + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, off); + __ ld_long(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld_long(SP, conversion_off, d); + } + if (d == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } + break; + + case T_LONG : + // 32bit can't do a split move of something like g1 -> O0, O1 + // so use a memory temp + if (src.is_single_phys_reg() && wordSize == 4) { + Register tmp = L2; + if (dst.first()->is_reg() && + (wordSize == 8 || dst.first()->as_Register()->is_even())) { + tmp = dst.first()->as_Register(); + } + + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stx(src.first()->as_Register(), SP, off); + __ ld_long(SP, off, tmp); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stx(src.first()->as_Register(), SP, conversion_off); + __ ld_long(SP, conversion_off, tmp); + } + + if (tmp == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } else { + long_move(masm, src, dst); + } + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, src, dst); + } + } + + + // If we have any strings we must store any register based arg to the stack + // This includes any still live xmm registers too. + + if (total_strings > 0 ) { + + // protect all the arg registers + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + const Register L2_string_off = L2; + + // Get first string offset + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); + + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { + if (out_sig_bt[c_arg] == T_ADDRESS) { + + VMRegPair dst = out_regs[c_arg]; + const Register d = dst.first()->is_reg() ? + dst.first()->as_Register()->after_save() : noreg; + + // It's a string the oop and it was already copied to the out arg + // position + if (d != noreg) { + __ mov(d, O0); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); + } + Label skip; + + __ br_null(O0, false, Assembler::pn, skip); + __ delayed()->addu(FP, L2_string_off, O1); + + if (d != noreg) { + __ mov(O1, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); + } + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), + relocInfo::runtime_call_type); + __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); + + __ bind(skip); + + } + + } + __ mov(L7_thread_cache, G2_thread); + __ restore(); + + } + + + // Ok now we are done. Need to place the nop that dtrace wants in order to + // patch in the trap + + int patch_offset = ((intptr_t)__ pc()) - start; + + __ nop(); + + + // Return + + __ ret(); + __ delayed()->restore(); + + __ flush(); + + nmethod *nm = nmethod::new_dtrace_nmethod( + method, masm->code(), vep_offset, patch_offset, frame_complete, + stack_slots / VMRegImpl::slots_per_word); + return nm; + +} + +#endif // HAVE_DTRACE_H + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; +} + +// "Top of Stack" slots that may be unused by the calling convention but must +// otherwise be preserved. +// On Intel these are not necessary and the value can be zero. +// On Sparc this describes the words reserved for storing a register window +// when an interrupt occurs. +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +//------------------------------generate_deopt_blob---------------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_deopt_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + //CodeBuffer buffer ("deopt_blob", 4000, 2048); + CodeBuffer buffer ("deopt_blob", 8000, 2048); + MacroAssembler* masm = new MacroAssembler( & buffer); + int frame_size_in_words; + OopMap* map = NULL; + // Account for the extra args we place on the stack + // by the time we call fetch_unroll_info + const int additional_words = 2; // deopt kind, thread + + OopMapSet *oop_maps = new OopMapSet(); + + address start = __ pc(); + Label cont; + // we use S3 for DeOpt reason register + Register reason = S3; + // use S6 for thread register + Register thread = TREG; + // use S7 for fetch_unroll_info returned UnrollBlock + Register unroll = S7; + // Prolog for non exception case! + // Correct the return address we were given. + //FIXME, return address is on the tos or Ra? + __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + // Normal deoptimization + __ move(reason, Deoptimization::Unpack_deopt); + __ b(cont); + __ delayed()->nop(); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + __ move(reason, Deoptimization::Unpack_reexecute); + __ b(cont); + __ delayed()->nop(); + + int exception_offset = __ pc() - start; + // Prolog for exception case + + // all registers are dead at this entry point, except for V0 and + // V1 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + int exception_in_tls_offset = __ pc() - start; + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // Return address will be patched later with the throwing pc. The correct value is not + // available now because loading it from memory would destroy registers. + // Save everything in sight. + // No need to update map as each call to save_live_registers will produce identical oopmap + __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + + // Now it is safe to overwrite any register + // store the correct deoptimization type + __ move(reason, Deoptimization::Unpack_exception); + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); + __ verify_oop(AT); + // verify that there is no pending exception + Label no_pending_exception; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, no_pending_exception); + __ delayed()->nop(); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + __ bind(cont); + // Compiled code leaves the floating point stack dirty, empty it. + __ empty_FPU_stack(); + + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ move(A0, thread); + __ move(A1, reason); // exec_mode + __ addiu(SP, SP, -additional_words * wordSize); + + __ set_last_Java_frame(NOREG, NOREG, NULL); + + // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. Call should capture return values. + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + __ call((address)Deoptimization::fetch_unroll_info); + //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); + __ delayed()->nop(); + oop_maps->add_gc_map(__ pc() - start, map); + __ addiu(SP, SP, additional_words * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + __ move(unroll, V0); + + + // Move the unpack kind to a safe place in the UnrollBlock because + // we are very short of registers + + Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ sw(reason, unpack_kind); + // save the unpack_kind value + // Retrieve the possible live values (return values) + // All callee save registers representing jvm state + // are now in the vframeArray. + + Label noException; + __ move(AT, Deoptimization::Unpack_exception); + __ bne(AT, reason, noException);// Was exception pending? + __ delayed()->nop(); + __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); + + __ verify_oop(V0); + + // Overwrite the result registers with the exception results. + __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); + __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); + + __ bind(noException); + + + // Stack is back to only having register save data on the stack. + // Now restore the result registers. Everything else is either dead or captured + // in the vframeArray. + + RegisterSaver::restore_result_registers(masm); + // All of the register save area has been popped of the stack. Only the + // return address remains. + // Pop all the frames we must move/replace. + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // register for the sender's sp + Register sender_sp = Rsender; + // register for frame pcs + Register pcs = T0; + // register for frame sizes + Register sizes = T1; + // register for frame count + Register count = T3; + + // Pop deoptimized frame + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ addu(SP, SP, AT); + // sp should be pointing at the return address to the caller (3) + + // Load array of frame pcs into pcs + __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + __ addiu(SP, SP, wordSize); // trash the old pc + // Load array of frame sizes into T6 + __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + + + + // Load count of frams into T3 + __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + // Pick up the initial fp we should save + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + __ move(sender_sp, SP); + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ subu(SP, SP, AT); + + // Push interpreter frames in a loop + // + //Loop: + // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld + // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] + // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 + // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 + // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp + // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at + // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp + // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 + // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); + // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- + // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 + // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 + // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 + // + // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split + Label loop; + __ bind(loop); + __ ld(T2, sizes, 0); // Load frame size + __ ld_ptr(AT, pcs, 0); // save return address + __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ subu(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addiu(count, count, -1); // decrement counter + __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ bne(count, R0, loop); + __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); + // Re-push self-frame + __ push2(AT, FP); + __ move(FP, SP); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); + + // Restore frame locals after moving the frame + __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); + __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); + __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local + __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); + + + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. + __ move(A1, reason); // exec_mode +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(A0, thread); // thread + __ addiu(SP, SP, (-additional_words) *wordSize); + + // set last_Java_sp, last_Java_fp + __ set_last_Java_frame(NOREG, FP, NULL); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); + __ delayed()->nop(); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // Set an oopmap for the call site + oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); + + __ push(V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(true); + + // Collect return values + __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); + __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); + __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local + __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); + //FIXME, + // Clear floating point stack before returning to interpreter + __ empty_FPU_stack(); + //FIXME, we should consider about float and double + // Push a float or double return value if necessary. + __ leave(); + + // Jump to interpreter + __ jr(RA); + __ delayed()->nop(); + + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + +#ifdef COMPILER2 + +//------------------------------generate_uncommon_trap_blob-------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_uncommon_trap_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); + MacroAssembler* masm = new MacroAssembler(&buffer); + + enum frame_layout { + fp_off, fp_off2, + return_off, return_off2, + framesize + }; + assert(framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + + // Push self-frame. + __ daddiu(SP, SP, -framesize * BytesPerInt); + + __ sd(RA, SP, return_off * BytesPerInt); + __ sd(FP, SP, fp_off * BytesPerInt); + + __ daddiu(FP, SP, fp_off * BytesPerInt); + + // Clear the floating point exception stack + __ empty_FPU_stack(); + + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // set last_Java_sp + __ set_last_Java_frame(NOREG, FP, NULL); + __ relocate(relocInfo::internal_pc_type); + { + long save_pc = (long)__ pc() + 56; + __ patchable_set48(AT, (long)save_pc); + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + __ move(A0, thread); + // argument already in T0 + __ move(A1, T0); + __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap); + __ patchable_call((address)Deoptimization::uncommon_trap); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + //oop_maps->add_gc_map( __ offset(), true, map); + oop_maps->add_gc_map( __ offset(), map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + Register unroll = S7; + __ move(unroll, V0); + +#ifdef ASSERT + { Label L; + __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ li(T9, Deoptimization::Unpack_uncommon_trap); + __ beq(AT, T9, L); + __ delayed()->nop(); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: possible-i2c-adapter-frame + // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an + // and c2i here) + + __ daddiu(SP, SP, framesize * BytesPerInt); + + // Pop deoptimized frame + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ daddu(SP, SP, AT); + + // register for frame pcs + Register pcs = T8; + // register for frame sizes + Register sizes = T9; + // register for frame count + Register count = T3; + // register for the sender's sp + Register sender_sp = T1; + + // sp should be pointing at the return address to the caller (4) + // Load array of frame pcs + __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + + // Load array of frame sizes + __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + + // Pick up the initial fp we should save + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + __ move(sender_sp, SP); + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ dsubu(SP, SP, AT); + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ld(T2, sizes, 0); // Load frame size + __ ld(AT, pcs, 0); // save return address + __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ dsubu(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ daddiu(count, count, -1); // decrement counter + __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + __ delayed()->nop(); // Bump array pointer (pcs) + + __ ld(RA, pcs, 0); + + // Re-push self-frame + // save old & set new FP + // save final return address + __ enter(); + + // Use FP because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(NOREG, FP, the_pc); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + __ move(A0, thread); + __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap); + __ patchable_call((address)Deoptimization::unpack_frames); + // Set an oopmap for the call site + oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); + + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog! + + // Jump to interpreter + __ jr(RA); + __ delayed()->nop(); + // ------------- + // make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); +} + +#endif // COMPILER2 + +//------------------------------generate_handler_blob------------------- +// +// Generate a special Compile2Runtime blob that saves all registers, and sets +// up an OopMap and calls safepoint code to stop the compiled code for +// a safepoint. +// +// This blob is jumped to (via a breakpoint and the signal handler) from a +// safepoint in compiled code. + +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { + + // Account for thread arg in our frame + const int additional_words = 0; + int frame_size_in_words; + + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // allocate space for the code + // setup code generation tools + CodeBuffer buffer ("handler_blob", 2048, 512); + MacroAssembler* masm = new MacroAssembler( &buffer); + + const Register thread = TREG; + address start = __ pc(); + address call_pc = NULL; + bool cause_return = (pool_type == POLL_AT_RETURN); + bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselvs. + + __ set_last_Java_frame(NOREG, NOREG, NULL); + + if (!cause_return) { + // overwrite the return address pushed by save_live_registers + // Additionally, TSR is a callee-saved register so we can look at + // it later to determine if someone changed the return address for + // us! + __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); + __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize); + } + + // Do the call + __ move(A0, thread); + __ call(call_ptr); + __ delayed()->nop(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + oop_maps->add_gc_map(__ offset(), map); + + Label noException; + + // Clear last_Java_sp again + __ reset_last_Java_frame(false); + + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, noException); + __ delayed()->nop(); + + // Exception pending + + RegisterSaver::restore_live_registers(masm, save_vectors); + //forward_exception_entry need return address on the stack + __ push(RA); + __ patchable_jump((address)StubRoutines::forward_exception_entry()); + + // No exception case + __ bind(noException); + + Label no_adjust, bail; + if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize); + __ bne(AT, TSR, no_adjust); + __ delayed()->nop(); + +#ifdef ASSERT + // Verify the correct encoding of the poll we're about to skip. + // See NativeInstruction::is_safepoint_poll() + __ lwu(AT, TSR, 0); + __ dsrl(AT, AT, 16); + __ andi(AT, AT, 0xfc1f); + __ xori(AT, AT, 0x8c01); + __ bne(AT, R0, bail); + __ delayed()->nop(); +#endif + // Adjust return pc forward to step over the safepoint poll instruction + __ addiu(RA, TSR, 4); // NativeInstruction::instruction_size=4 + __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize); + } + + __ bind(no_adjust); + // Normal exit, register restoring and exit + RegisterSaver::restore_live_registers(masm, save_vectors); + __ jr(RA); + __ delayed()->nop(); + +#ifdef ASSERT + __ bind(bail); + __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); +#endif + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); + CodeBuffer buffer(name, 2000, 2048); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_words; + //we put the thread in A0 + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + + int frame_complete = __ offset(); + +#ifndef OPT_THREAD + const Register thread = T8; + __ get_thread(thread); +#else + const Register thread = TREG; +#endif + + __ move(A0, thread); + __ set_last_Java_frame(noreg, FP, NULL); + //align the stack before invoke native + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); + + __ call(destination); + __ delayed()->nop(); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + oop_maps->add_gc_map( __ offset() - start, map); + // V0 contains the address we are going to jump to assuming no exception got installed +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // clear last_Java_sp + __ reset_last_Java_frame(true); + // check for pending exceptions + Label pending; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, pending); + __ delayed()->nop(); + // get the returned Method* + //FIXME, do mips need this ? + __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 + __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); + __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go the callee method. + __ jr(V0); + __ delayed()->nop(); + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + //forward_exception_entry need return address on the stack + __ push(RA); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); + __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + // + // make sure all code is generated + masm->flush(); + + RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); + return tmp; +} + +extern "C" int SpinPause() {return 0;} + + +//------------------------------Montgomery multiplication------------------------ +// + +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { + long borrow = 0, t = 0; + unsigned long tmp0, tmp1; + __asm__ __volatile__ ( + "0: \n" + "ld %[tmp0], 0(%[a]) \n" + "ld %[tmp1], 0(%[b]) \n" + "sltu %[t], %[tmp0], %[borrow] \n" + "dsubu %[tmp0], %[tmp0], %[borrow] \n" + "sltu %[borrow], %[tmp0], %[tmp1] \n" + "or %[borrow], %[borrow], %[t] \n" + "dsubu %[tmp0], %[tmp0], %[tmp1] \n" + "sd %[tmp0], 0(%[a]) \n" + "daddiu %[a], %[a], 8 \n" + "daddiu %[b], %[b], 8 \n" + "daddiu %[len], %[len], -1 \n" + "bgtz %[len], 0b \n" + "dsubu %[tmp0], %[carry], %[borrow] \n" + : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) + : [carry]"r"(carry) + : "memory" + ); + return tmp0; +} + +// Multiply (unsigned) Long A by Long B, accumulating the double- +// length result into the accumulator formed of t0, t1, and t2. +inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "dmultu %[A], %[B] \n" + "mfhi %[hi] \n" + "mflo %[lo] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// As above, but add twice the double-length result into the +// accumulator. +inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "dmultu %[A], %[B] \n" + "mfhi %[hi] \n" + "mflo %[lo] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// Fast Montgomery multiplication. The derivation of the algorithm is +// in A Cryptographic Library for the Motorola DSP56000, +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + +static void __attribute__((noinline)) +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + for (j = 0; j < i; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + MACC(a[i], b[0], t0, t1, t2); + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery multiply"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int j; + for (j = i-len+1; j < len; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Fast Montgomery squaring. This uses asymptotically 25% fewer +// multiplies so it should be up to 25% faster than Montgomery +// multiplication. However, its loop control is more complex and it +// may actually run slower on some machines. + +static void __attribute__((noinline)) +montgomery_square(unsigned long a[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + int end = (i+1)/2; + for (j = 0; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < i; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery square"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int start = i-len+1; + int end = start + (len - start)/2; + int j; + for (j = start; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < len; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Swap words in a longword. +static unsigned long swap(unsigned long x) { + return (x << 32) | (x >> 32); +} + +// Copy len longwords from s to d, word-swapping as we go. The +// destination array is reversed. +static void reverse_words(unsigned long *s, unsigned long *d, int len) { + d += len; + while(len-- > 0) { + d--; + *d = swap(*s); + s++; + } +} + +// The threshold at which squaring is advantageous was determined +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. +// Doesn't seem to be relevant for MIPS64 so we use the same value. +#define MONTGOMERY_SQUARING_THRESHOLD 64 + +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 8k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 4; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *b = scratch + 1 * longwords, + *n = scratch + 2 * longwords, + *m = scratch + 3 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)b_ints, b, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_square must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 6k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 3; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *n = scratch + 1 * longwords, + *m = scratch + 2 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); + } else { + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); + } + + reverse_words(m, (unsigned long *)m_ints, longwords); +} diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp new file mode 100644 index 00000000000..9fe2bc83771 --- /dev/null +++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp @@ -0,0 +1,2162 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_mips.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) +//#define a__ ((Assembler*)_masm)-> + +//#ifdef PRODUCT +//#define BLOCK_COMMENT(str) /* nothing */ +//#else +//#define BLOCK_COMMENT(str) __ block_comment(str) +//#endif + +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + + // ABI mips n64 + // This fig is not MIPS ABI. It is call Java from C ABI. + // Call stubs are used to call Java from C + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp + // 3 [ result ] <--- a1 + // 4 [ result_type ] <--- a2 + // 5 [ method ] <--- a3 + // 6 [ entry_point ] <--- a4 + // 7 [ parameters ] <--- a5 + // 8 [ parameter_size ] <--- a6 + // 9 [ thread ] <--- a7 + + // + // n64 does not save paras in sp. + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + //-13 [ thread ] + //-12 [ result_type ] <--- a2 + //-11 [ result ] <--- a1 + //-10 [ ] + // -9 [ ptr. to call wrapper ] <--- a0 + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ] <--- old sp + // + // Find a right place in the call_stub for GP. + // GP will point to the starting point of Interpreter::dispatch_table(itos). + // It should be saved/restored before/after Java calls. + // + enum call_stub_layout { + RA_off = 1, + FP_off = 0, + BCP_off = -1, + LVP_off = -2, + TSR_off = -3, + S1_off = -4, + S3_off = -5, + S4_off = -6, + S5_off = -7, + S6_off = -8, + call_wrapper_off = -9, + result_off = -11, + result_type_off = -12, + thread_off = -13, + total_off = thread_off - 1, + GP_off = -14, + }; + + address generate_call_stub(address& return_address) { + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + // same as in generate_catch_exception()! + + // stub code + // save ra and fp + __ enter(); + // I think 14 is the max gap between argument and callee saved register + assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); + __ daddiu(SP, SP, total_off * wordSize); + __ sd(BCP, FP, BCP_off * wordSize); + __ sd(LVP, FP, LVP_off * wordSize); + __ sd(TSR, FP, TSR_off * wordSize); + __ sd(S1, FP, S1_off * wordSize); + __ sd(S3, FP, S3_off * wordSize); + __ sd(S4, FP, S4_off * wordSize); + __ sd(S5, FP, S5_off * wordSize); + __ sd(S6, FP, S6_off * wordSize); + __ sd(A0, FP, call_wrapper_off * wordSize); + __ sd(A1, FP, result_off * wordSize); + __ sd(A2, FP, result_type_off * wordSize); + __ sd(A7, FP, thread_off * wordSize); + __ sd(GP, FP, GP_off * wordSize); + + __ set64(GP, (long)Interpreter::dispatch_table(itos)); + +#ifdef OPT_THREAD + __ move(TREG, A7); +#endif + //add for compressedoops + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // pass parameters if any + // A5: parameter + // A6: parameter_size + // T0: parameter_size_tmp(--) + // T2: offset(++) + // T3: tmp + Label parameters_done; + // judge if the parameter_size equals 0 + __ beq(A6, R0, parameters_done); + __ delayed()->nop(); + __ dsll(AT, A6, Interpreter::logStackElementSize); + __ dsubu(SP, SP, AT); + __ move(AT, -StackAlignmentInBytes); + __ andr(SP, SP , AT); + // Copy Java parameters in reverse order (receiver last) + // Note that the argument order is inverted in the process + Label loop; + __ move(T0, A6); + __ move(T2, R0); + __ bind(loop); + + // get parameter + __ dsll(T3, T0, LogBytesPerWord); + __ daddu(T3, T3, A5); + __ ld(AT, T3, -wordSize); + __ dsll(T3, T2, LogBytesPerWord); + __ daddu(T3, T3, SP); + __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); + __ daddiu(T2, T2, 1); + __ daddiu(T0, T0, -1); + __ bne(T0, R0, loop); + __ delayed()->nop(); + // advance to next parameter + + // call Java function + __ bind(parameters_done); + + // receiver in V0, methodOop in Rmethod + + __ move(Rmethod, A3); + __ move(Rsender, SP); //set sender sp + __ jalr(A4); + __ delayed()->nop(); + return_address = __ pc(); + + Label common_return; + __ bind(common_return); + + // store result depending on type + // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + __ ld(T0, FP, result_off * wordSize); // result --> T0 + Label is_long, is_float, is_double, exit; + __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 + __ daddiu(T3, T2, (-1) * T_LONG); + __ beq(T3, R0, is_long); + __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); + __ beq(T3, R0, is_float); + __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); + __ beq(T3, R0, is_double); + __ delayed()->nop(); + + // handle T_INT case + __ sd(V0, T0, 0 * wordSize); + __ bind(exit); + + // restore + __ ld(BCP, FP, BCP_off * wordSize); + __ ld(LVP, FP, LVP_off * wordSize); + __ ld(GP, FP, GP_off * wordSize); + __ ld(TSR, FP, TSR_off * wordSize); + + __ ld(S1, FP, S1_off * wordSize); + __ ld(S3, FP, S3_off * wordSize); + __ ld(S4, FP, S4_off * wordSize); + __ ld(S5, FP, S5_off * wordSize); + __ ld(S6, FP, S6_off * wordSize); + + __ leave(); + + // return + __ jr(RA); + __ delayed()->nop(); + + // handle return types different from T_INT + __ bind(is_long); + __ sd(V0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + + __ bind(is_float); + __ swc1(F0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + + __ bind(is_double); + __ sdc1(F0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + //FIXME, 1.6 mips version add operation of fpu here + StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); + __ b(common_return); + __ delayed()->nop(); + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // sp. + // + // V0: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + Register thread = TREG; + + // get thread directly +#ifndef OPT_THREAD + __ ld(thread, FP, thread_off * wordSize); +#endif + +#ifdef ASSERT + // verify that threads correspond + { Label L; + __ get_thread(T8); + __ beq(T8, thread, L); + __ delayed()->nop(); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + // set pending exception + __ verify_oop(V0); + __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ li(AT, (long)__FILE__); + __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); + __ li(AT, (long)__LINE__); + __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); + __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); + __ delayed()->nop(); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // V0: exception + // V1: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be on stack !! + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + //Register thread = TREG; + Register thread = TREG; + address start = __ pc(); + + // Upon entry, the sp points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into T9 + __ ld(A1, SP, 0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T9, V0); + __ pop(V1); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ bne(V0, R0, L); + __ delayed()->nop(); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler (return address removed) + // V0: exception + // T9: exception handler + // V1: throwing pc + __ verify_oop(V0); + __ jr(T9); + __ delayed()->nop(); + + return start; + } + + // Non-destructive plausibility checks for oops + // + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + __ reinit_heapbase(); + __ verify_oop_subroutine(); + address end = __ pc(); + return start; + } + + // + // Generate overlap test for array copy stubs + // + // Input: + // A0 - array1 + // A1 - array2 + // A2 - element count + // + + // use T9 as temp + void array_overlap_test(address no_overlap_target, int log2_elem_size) { + int elem_size = 1 << log2_elem_size; + Address::ScaleFactor sf = Address::times_1; + + switch (log2_elem_size) { + case 0: sf = Address::times_1; break; + case 1: sf = Address::times_2; break; + case 2: sf = Address::times_4; break; + case 3: sf = Address::times_8; break; + } + + __ dsll(AT, A2, sf); + __ daddu(AT, AT, A0); + __ daddiu(T9, AT, -elem_size); + __ dsubu(AT, A1, A0); + __ blez(AT, no_overlap_target); + __ delayed()->nop(); + __ dsubu(AT, A1, T9); + __ bgtz(AT, no_overlap_target); + __ delayed()->nop(); + + // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target + Label L; + __ bgez(A0, L); + __ delayed()->nop(); + __ bgtz(A1, no_overlap_target); + __ delayed()->nop(); + __ bind(L); + + } + + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: c_rarg0 + // value: c_rarg1 + // count: c_rarg2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register to = A0; // source array address + const Register value = A1; // value + const Register count = A2; // elements count + + const Register cnt_words = T8; // temp register + + __ enter(); + + Label L_fill_elements, L_exit1; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ dins(value, value, 8, 8); // 8 bit -> 16 bit + __ dins(value, value, 16, 16); // 16 bit -> 32 bit + __ bne(AT, R0, L_fill_elements); + __ delayed()->nop(); + break; + case T_SHORT: + shift = 1; + __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ dins(value, value, 16, 16); // 16 bit -> 32 bit + __ bne(AT, R0, L_fill_elements); + __ delayed()->nop(); + break; + case T_INT: + shift = 2; + __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bne(AT, R0, L_fill_elements); + __ delayed()->nop(); + break; + default: ShouldNotReachHere(); + } + + // Align source address at 8 bytes address boundary. + Label L_skip_align1, L_skip_align2, L_skip_align4; + if (!aligned) { + switch (t) { + case T_BYTE: + // One byte misalignment happens only for byte arrays. + __ andi(AT, to, 1); + __ beq(AT, R0, L_skip_align1); + __ delayed()->nop(); + __ sb(value, to, 0); + __ daddiu(to, to, 1); + __ addiu32(count, count, -1); + __ bind(L_skip_align1); + // Fallthrough + case T_SHORT: + // Two bytes misalignment happens only for byte and short (char) arrays. + __ andi(AT, to, 1 << 1); + __ beq(AT, R0, L_skip_align2); + __ delayed()->nop(); + __ sh(value, to, 0); + __ daddiu(to, to, 2); + __ addiu32(count, count, -(2 >> shift)); + __ bind(L_skip_align2); + // Fallthrough + case T_INT: + // Align to 8 bytes, we know we are 4 byte aligned to start. + __ andi(AT, to, 1 << 2); + __ beq(AT, R0, L_skip_align4); + __ delayed()->nop(); + __ sw(value, to, 0); + __ daddiu(to, to, 4); + __ addiu32(count, count, -(4 >> shift)); + __ bind(L_skip_align4); + break; + default: ShouldNotReachHere(); + } + } + + // + // Fill large chunks + // + __ srl(cnt_words, count, 3 - shift); // number of words + __ dinsu(value, value, 32, 32); // 32 bit -> 64 bit + __ sll(AT, cnt_words, 3 - shift); + __ subu32(count, count, AT); + + Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end; + __ addiu32(AT, cnt_words, -8); + __ bltz(AT, L_loop_not_64bytes_fill); + __ delayed()->nop(); + __ bind(L_loop_begin); + __ sd(value, to, 0); + __ sd(value, to, 8); + __ sd(value, to, 16); + __ sd(value, to, 24); + __ sd(value, to, 32); + __ sd(value, to, 40); + __ sd(value, to, 48); + __ sd(value, to, 56); + __ daddiu(to, to, 64); + __ addiu32(cnt_words, cnt_words, -8); + __ addiu32(AT, cnt_words, -8); + __ bgez(AT, L_loop_begin); + __ delayed()->nop(); + + __ bind(L_loop_not_64bytes_fill); + __ beq(cnt_words, R0, L_loop_end); + __ delayed()->nop(); + __ sd(value, to, 0); + __ daddiu(to, to, 8); + __ addiu32(cnt_words, cnt_words, -1); + __ b(L_loop_not_64bytes_fill); + __ delayed()->nop(); + __ bind(L_loop_end); + + // Remaining count is less than 8 bytes. Fill it by a single store. + // Note that the total length is no less than 8 bytes. + if (t == T_BYTE || t == T_SHORT) { + Label L_exit1; + __ beq(count, R0, L_exit1); + __ delayed()->nop(); + __ sll(AT, count, shift); + __ daddu(to, to, AT); // points to the end + __ sd(value, to, -8); // overwrite some elements + __ bind(L_exit1); + __ leave(); + __ jr(RA); + __ delayed()->nop(); + } + + // Handle copies less than 8 bytes. + Label L_fill_2, L_fill_4, L_exit2; + __ bind(L_fill_elements); + switch (t) { + case T_BYTE: + __ andi(AT, count, 1); + __ beq(AT, R0, L_fill_2); + __ delayed()->nop(); + __ sb(value, to, 0); + __ daddiu(to, to, 1); + __ bind(L_fill_2); + __ andi(AT, count, 1 << 1); + __ beq(AT, R0, L_fill_4); + __ delayed()->nop(); + __ sh(value, to, 0); + __ daddiu(to, to, 2); + __ bind(L_fill_4); + __ andi(AT, count, 1 << 2); + __ beq(AT, R0, L_exit2); + __ delayed()->nop(); + __ sw(value, to, 0); + break; + case T_SHORT: + __ andi(AT, count, 1); + __ beq(AT, R0, L_fill_4); + __ delayed()->nop(); + __ sh(value, to, 0); + __ daddiu(to, to, 2); + __ bind(L_fill_4); + __ andi(AT, count, 1 << 1); + __ beq(AT, R0, L_exit2); + __ delayed()->nop(); + __ sw(value, to, 0); + break; + case T_INT: + __ beq(count, R0, L_exit2); + __ delayed()->nop(); + __ sw(value, to, 0); + break; + default: ShouldNotReachHere(); + } + __ bind(L_exit2); + __ leave(); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + + Register tmp1 = T0; + Register tmp2 = T1; + Register tmp3 = T3; + + address start = __ pc(); + + __ push(tmp1); + __ push(tmp2); + __ push(tmp3); + __ move(tmp1, A0); + __ move(tmp2, A1); + __ move(tmp3, A2); + + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; + Label l_debug; + + __ daddiu(AT, tmp3, -9); //why the number is 9 ? + __ blez(AT, l_9); + __ delayed()->nop(); + + if (!aligned) { + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 1); + __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy + __ delayed()->nop(); + + __ andi(AT, tmp1, 1); + __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes + __ delayed()->nop(); + + __ lb(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 1); + __ sb(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 1); + __ daddiu(tmp3, tmp3, -1); + __ bind(l_10); + + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 3); + __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy + __ delayed()->nop(); + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 2 elements if necessary to align to 4 bytes. + __ andi(AT, tmp1, 3); + __ beq(AT, R0, l_2); + __ delayed()->nop(); + + __ lhu(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -2); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned + __ delayed()->nop(); + + // Copy a 4 elements if necessary to align to 8 bytes. + __ andi(AT, tmp1, 7); + __ beq(AT, R0, l_7); + __ delayed()->nop(); + + __ lw(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -4); + __ sw(AT, tmp2, 0); + { // FasterArrayCopy + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + } + } + + __ bind(l_7); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + + { // FasterArrayCopy + __ daddiu(AT, tmp3, -7); + __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain + __ delayed()->nop(); + + __ bind(l_8); + // For Loongson, there is 128-bit memory access. TODO + __ ld(AT, tmp1, 0); + __ sd(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + __ daddiu(tmp3, tmp3, -8); + __ daddiu(AT, tmp3, -8); + __ bgez(AT, l_8); + __ delayed()->nop(); + } + __ bind(l_6); + + // copy 4 bytes at a time + { // FasterArrayCopy + __ daddiu(AT, tmp3, -3); + __ blez(AT, l_1); + __ delayed()->nop(); + + __ bind(l_3); + __ lw(AT, tmp1, 0); + __ sw(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + __ daddiu(tmp3, tmp3, -4); + __ daddiu(AT, tmp3, -4); + __ bgez(AT, l_3); + __ delayed()->nop(); + + } + + // do 2 bytes copy + __ bind(l_1); + { + __ daddiu(AT, tmp3, -1); + __ blez(AT, l_9); + __ delayed()->nop(); + + __ bind(l_5); + __ lhu(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 2); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(AT, tmp3, -2); + __ bgez(AT, l_5); + __ delayed()->nop(); + } + + //do 1 element copy--byte + __ bind(l_9); + __ beq(R0, tmp3, l_4); + __ delayed()->nop(); + + { + __ bind(l_11); + __ lb(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -1); + __ sb(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 1); + __ daddiu(tmp2, tmp2, 1); + __ daddiu(AT, tmp3, -1); + __ bgez(AT, l_11); + __ delayed()->nop(); + } + + __ bind(l_4); + __ pop(tmp3); + __ pop(tmp2); + __ pop(tmp1); + + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; + Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; + + address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 0); + + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + const Register end_from = T3; // source array end address + const Register end_to = T0; // destination array end address + const Register end_count = T1; // destination array end address + + __ push(end_from); + __ push(end_to); + __ push(end_count); + __ push(T8); + + // copy from high to low + __ move(end_count, count); + __ daddu(end_from, from, end_count); + __ daddu(end_to, to, end_count); + + // If end_from and end_to has differante alignment, unaligned copy is performed. + __ andi(AT, end_from, 3); + __ andi(T8, end_to, 3); + __ bne(AT, T8, l_copy_byte); + __ delayed()->nop(); + + // First deal with the unaligned data at the top. + __ bind(l_unaligned); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + + __ andi(AT, end_from, 3); + __ bne(AT, R0, l_from_unaligned); + __ delayed()->nop(); + + __ andi(AT, end_to, 3); + __ beq(AT, R0, l_4_bytes_aligned); + __ delayed()->nop(); + + __ bind(l_from_unaligned); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddiu(end_from, end_from, -1); + __ daddiu(end_to, end_to, -1); + __ daddiu(end_count, end_count, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now end_to, end_from point to 4-byte aligned high-ends + // end_count contains byte count that is not copied. + // copy 4 bytes at a time + __ bind(l_4_bytes_aligned); + + __ move(T8, end_count); + __ daddiu(AT, end_count, -3); + __ blez(AT, l_copy_suffix); + __ delayed()->nop(); + + //__ andi(T8, T8, 3); + __ lea(end_from, Address(end_from, -4)); + __ lea(end_to, Address(end_to, -4)); + + __ dsrl(end_count, end_count, 2); + __ align(16); + __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes + __ lw(AT, end_from, 0); + __ sw(AT, end_to, 0); + __ addiu(end_from, end_from, -4); + __ addiu(end_to, end_to, -4); + __ addiu(end_count, end_count, -1); + __ bne(end_count, R0, l_copy_4_bytes_loop); + __ delayed()->nop(); + + __ b(l_copy_suffix); + __ delayed()->nop(); + // copy dwords aligned or not with repeat move + // l_copy_suffix + // copy suffix (0-3 bytes) + __ bind(l_copy_suffix); + __ andi(T8, T8, 3); + __ beq(T8, R0, l_exit); + __ delayed()->nop(); + __ addiu(end_from, end_from, 3); + __ addiu(end_to, end_to, 3); + __ bind(l_copy_suffix_loop); + __ lb(AT, end_from, 0); + __ sb(AT, end_to, 0); + __ addiu(end_from, end_from, -1); + __ addiu(end_to, end_to, -1); + __ addiu(T8, T8, -1); + __ bne(T8, R0, l_copy_suffix_loop); + __ delayed()->nop(); + + __ bind(l_copy_byte); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddiu(end_from, end_from, -1); + __ daddiu(end_to, end_to, -1); + __ daddiu(end_count, end_count, -1); + __ b(l_copy_byte); + __ delayed()->nop(); + + __ bind(l_exit); + __ pop(T8); + __ pop(end_count); + __ pop(end_to); + __ pop(end_from); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Generate stub for disjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: A0 + // to: A1 + // elm.count: A2 treated as signed + // one element: 2 bytes + // + // Strategy for aligned==true: + // + // If length <= 9: + // 1. copy 1 elements at a time (l_5) + // + // If length > 9: + // 1. copy 4 elements at a time until less than 4 elements are left (l_7) + // 2. copy 2 elements at a time until less than 2 elements are left (l_6) + // 3. copy last element if one was left in step 2. (l_1) + // + // + // Strategy for aligned==false: + // + // If length <= 9: same as aligned==true case + // + // If length > 9: + // 1. continue with step 7. if the alignment of from and to mod 4 + // is different. + // 2. align from and to to 4 bytes by copying 1 element if necessary + // 3. at l_2 from and to are 4 byte aligned; continue with + // 6. if they cannot be aligned to 8 bytes because they have + // got different alignment mod 8. + // 4. at this point we know that both, from and to, have the same + // alignment mod 8, now copy one element if necessary to get + // 8 byte alignment of from and to. + // 5. copy 4 elements at a time until less than 4 elements are + // left; depending on step 3. all load/stores are aligned. + // 6. copy 2 elements at a time until less than 2 elements are + // left. (l_6) + // 7. copy 1 element at a time. (l_5) + // 8. copy last element if one was left in step 6. (l_1) + + address generate_disjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Register tmp1 = T0; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T8; + Register tmp5 = T9; + Register tmp6 = T2; + + address start = __ pc(); + + __ push(tmp1); + __ push(tmp2); + __ push(tmp3); + __ move(tmp1, A0); + __ move(tmp2, A1); + __ move(tmp3, A2); + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; + Label l_debug; + // don't try anything fancy if arrays don't have many elements + __ daddiu(AT, tmp3, -23); + __ blez(AT, l_14); + __ delayed()->nop(); + // move push here + __ push(tmp4); + __ push(tmp5); + __ push(tmp6); + + if (!aligned) { + __ xorr(AT, A0, A1); + __ andi(AT, AT, 1); + __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? + __ delayed()->nop(); + + __ xorr(AT, A0, A1); + __ andi(AT, AT, 3); + __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy + __ delayed()->nop(); + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 1 element if necessary to align to 4 bytes. + __ andi(AT, A0, 3); + __ beq(AT, R0, l_2); + __ delayed()->nop(); + + __ lhu(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -1); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned + __ delayed()->nop(); + + // Copy a 2-element word if necessary to align to 8 bytes. + __ andi(AT, tmp1, 7); + __ beq(AT, R0, l_7); + __ delayed()->nop(); + + __ lw(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -2); + __ sw(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + }// end of if (!aligned) + + __ bind(l_7); + // At this time the position of both, from and to, are at least 8 byte aligned. + // Copy 8 elemnets at a time. + // Align to 16 bytes, but only if both from and to have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 15); + __ bne(AT, R0, l_9); + __ delayed()->nop(); + + // Copy 4-element word if necessary to align to 16 bytes, + __ andi(AT, tmp1, 15); + __ beq(AT, R0, l_10); + __ delayed()->nop(); + + __ ld(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -4); + __ sd(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + + __ bind(l_10); + + // Copy 8 elements at a time; either the loads or the stores can + // be unalligned if aligned == false + + { // FasterArrayCopy + __ bind(l_11); + // For loongson the 128-bit memory access instruction is gslq/gssq + if (UseLEXT1) { + __ gslq(AT, tmp4, tmp1, 0); + __ gslq(tmp5, tmp6, tmp1, 16); + __ daddiu(tmp1, tmp1, 32); + __ daddiu(tmp2, tmp2, 32); + __ gssq(AT, tmp4, tmp2, -32); + __ gssq(tmp5, tmp6, tmp2, -16); + } else { + __ ld(AT, tmp1, 0); + __ ld(tmp4, tmp1, 8); + __ ld(tmp5, tmp1, 16); + __ ld(tmp6, tmp1, 24); + __ daddiu(tmp1, tmp1, 32); + __ sd(AT, tmp2, 0); + __ sd(tmp4, tmp2, 8); + __ sd(tmp5, tmp2, 16); + __ sd(tmp6, tmp2, 24); + __ daddiu(tmp2, tmp2, 32); + } + __ daddiu(tmp3, tmp3, -16); + __ daddiu(AT, tmp3, -16); + __ bgez(AT, l_11); + __ delayed()->nop(); + } + __ bind(l_9); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + { // FasterArrayCopy + __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 + __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain + __ delayed()->nop(); + + __ bind(l_8); + __ ld(AT, tmp1, 0); + __ ld(tmp4, tmp1, 8); + __ ld(tmp5, tmp1, 16); + __ ld(tmp6, tmp1, 24); + __ sd(AT, tmp2, 0); + __ sd(tmp4, tmp2, 8); + __ sd(tmp5, tmp2,16); + __ daddiu(tmp1, tmp1, 32); + __ daddiu(tmp2, tmp2, 32); + __ daddiu(tmp3, tmp3, -16); + __ daddiu(AT, tmp3, -16); + __ bgez(AT, l_8); + __ delayed()->sd(tmp6, tmp2, -8); + } + __ bind(l_6); + + // copy 2 element at a time + { // FasterArrayCopy + __ daddiu(AT, tmp3, -7); + __ blez(AT, l_4); + __ delayed()->nop(); + + __ bind(l_3); + __ lw(AT, tmp1, 0); + __ lw(tmp4, tmp1, 4); + __ lw(tmp5, tmp1, 8); + __ lw(tmp6, tmp1, 12); + __ sw(AT, tmp2, 0); + __ sw(tmp4, tmp2, 4); + __ sw(tmp5, tmp2, 8); + __ daddiu(tmp1, tmp1, 16); + __ daddiu(tmp2, tmp2, 16); + __ daddiu(tmp3, tmp3, -8); + __ daddiu(AT, tmp3, -8); + __ bgez(AT, l_3); + __ delayed()->sw(tmp6, tmp2, -4); + } + + __ bind(l_1); + // do single element copy (8 bit), can this happen? + { // FasterArrayCopy + __ daddiu(AT, tmp3, -3); + __ blez(AT, l_4); + __ delayed()->nop(); + + __ bind(l_5); + __ lhu(AT, tmp1, 0); + __ lhu(tmp4, tmp1, 2); + __ lhu(tmp5, tmp1, 4); + __ lhu(tmp6, tmp1, 6); + __ sh(AT, tmp2, 0); + __ sh(tmp4, tmp2, 2); + __ sh(tmp5, tmp2, 4); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + __ daddiu(tmp3, tmp3, -4); + __ daddiu(AT, tmp3, -4); + __ bgez(AT, l_5); + __ delayed()->sh(tmp6, tmp2, -2); + } + // single element + __ bind(l_4); + + __ pop(tmp6); + __ pop(tmp5); + __ pop(tmp4); + + __ bind(l_14); + { // FasterArrayCopy + __ beq(R0, tmp3, l_13); + __ delayed()->nop(); + + __ bind(l_12); + __ lhu(AT, tmp1, 0); + __ sh(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 2); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -1); + __ daddiu(AT, tmp3, -1); + __ bgez(AT, l_12); + __ delayed()->nop(); + } + + __ bind(l_13); + __ pop(tmp3); + __ pop(tmp2); + __ pop(tmp1); + + __ jr(RA); + __ delayed()->nop(); + + __ bind(l_debug); + __ stop("generate_disjoint_short_copy should not reach here"); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; + + address nooverlap_target = aligned ? + StubRoutines::arrayof_jshort_disjoint_arraycopy() : + StubRoutines::jshort_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 1); + + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + const Register end_from = T3; // source array end address + const Register end_to = T0; // destination array end address + const Register end_count = T1; // destination array end address + + __ push(end_from); + __ push(end_to); + __ push(end_count); + __ push(T8); + + // copy from high to low + __ move(end_count, count); + __ sll(AT, end_count, Address::times_2); + __ daddu(end_from, from, AT); + __ daddu(end_to, to, AT); + + // If end_from and end_to has differante alignment, unaligned copy is performed. + __ andi(AT, end_from, 3); + __ andi(T8, end_to, 3); + __ bne(AT, T8, l_copy_short); + __ delayed()->nop(); + + // First deal with the unaligned data at the top. + __ bind(l_unaligned); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + + __ andi(AT, end_from, 3); + __ bne(AT, R0, l_from_unaligned); + __ delayed()->nop(); + + __ andi(AT, end_to, 3); + __ beq(AT, R0, l_4_bytes_aligned); + __ delayed()->nop(); + + // Copy 1 element if necessary to align to 4 bytes. + __ bind(l_from_unaligned); + __ lhu(AT, end_from, -2); + __ sh(AT, end_to, -2); + __ daddiu(end_from, end_from, -2); + __ daddiu(end_to, end_to, -2); + __ daddiu(end_count, end_count, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now end_to, end_from point to 4-byte aligned high-ends + // end_count contains byte count that is not copied. + // copy 4 bytes at a time + __ bind(l_4_bytes_aligned); + + __ daddiu(AT, end_count, -1); + __ blez(AT, l_copy_short); + __ delayed()->nop(); + + __ lw(AT, end_from, -4); + __ sw(AT, end_to, -4); + __ addiu(end_from, end_from, -4); + __ addiu(end_to, end_to, -4); + __ addiu(end_count, end_count, -2); + __ b(l_4_bytes_aligned); + __ delayed()->nop(); + + // copy 1 element at a time + __ bind(l_copy_short); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + __ lhu(AT, end_from, -2); + __ sh(AT, end_to, -2); + __ daddiu(end_from, end_from, -2); + __ daddiu(end_to, end_to, -2); + __ daddiu(end_count, end_count, -1); + __ b(l_copy_short); + __ delayed()->nop(); + + __ bind(l_exit); + __ pop(T8); + __ pop(end_count); + __ pop(end_to); + __ pop(end_from); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_3, l_4, l_5, l_6, l_7; + StubCodeMark mark(this, "StubRoutines", name); + + __ align(CodeEntryAlignment); + address start = __ pc(); + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); + + if(!aligned) { + __ xorr(AT, T3, T0); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time + __ delayed()->nop(); + + __ andi(AT, T3, 7); + __ beq(AT, R0, l_6); //copy 2 elements each time + __ delayed()->nop(); + + __ lw(AT, T3, 0); + __ daddiu(T1, T1, -1); + __ sw(AT, T0, 0); + __ daddiu(T3, T3, 4); + __ daddiu(T0, T0, 4); + } + + { + __ bind(l_6); + __ daddiu(AT, T1, -1); + __ blez(AT, l_5); + __ delayed()->nop(); + + __ bind(l_7); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ daddiu(T3, T3, 8); + __ daddiu(T0, T0, 8); + __ daddiu(T1, T1, -2); + __ daddiu(AT, T1, -2); + __ bgez(AT, l_7); + __ delayed()->nop(); + } + + __ bind(l_5); + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_3); + __ lw(AT, T3, 0); + __ sw(AT, T0, 0); + __ addiu(T3, T3, 4); + __ addiu(T0, T0, 4); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_3); + __ delayed()->nop(); + + // exit + __ bind(l_4); + bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_2, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target; + + if (is_oop) { + nooverlap_target = aligned ? + StubRoutines::arrayof_oop_disjoint_arraycopy() : + StubRoutines::oop_disjoint_arraycopy(); + } else { + nooverlap_target = aligned ? + StubRoutines::arrayof_jint_disjoint_arraycopy() : + StubRoutines::jint_disjoint_arraycopy(); + } + + array_overlap_test(nooverlap_target, 2); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // no registers are destroyed by this call + bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + // T3: source array address + // T0: destination array address + // T1: element count + + __ sll(AT, T1, Address::times_4); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -4); + __ sll(AT, T1, Address::times_4); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -4); + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_2); + __ lw(AT, T3, 0); + __ sw(AT, T0, 0); + __ addiu(T3, T3, -4); + __ addiu(T0, T0, -4); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_2); + __ delayed()->nop(); + + __ bind(l_4); + bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_3, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + // T3: source array address + // T0: destination array address + // T1: element count + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_3); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ addiu(T3, T3, 8); + __ addiu(T0, T0, 8); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_3); + __ delayed()->nop(); + + // exit + __ bind(l_4); + bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_2, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target; + + if (is_oop) { + nooverlap_target = aligned ? + StubRoutines::arrayof_oop_disjoint_arraycopy() : + StubRoutines::oop_disjoint_arraycopy(); + } else { + nooverlap_target = aligned ? + StubRoutines::arrayof_jlong_disjoint_arraycopy() : + StubRoutines::jlong_disjoint_arraycopy(); + } + + array_overlap_test(nooverlap_target, 3); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + __ sll(AT, T1, Address::times_8); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -8); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -8); + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_2); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ addiu(T3, T3, -8); + __ addiu(T0, T0, -8); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_2); + __ delayed()->nop(); + + // exit + __ bind(l_4); + bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + //FIXME + address generate_disjoint_long_copy(bool aligned, const char *name) { + Label l_1, l_2; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + __ push(T3); + __ push(T0); + __ push(T1); + __ b(l_2); + __ delayed()->nop(); + __ align(16); + __ bind(l_1); + __ ld(AT, T3, 0); + __ sd (AT, T0, 0); + __ addiu(T3, T3, 8); + __ addiu(T0, T0, 8); + __ bind(l_2); + __ addiu(T1, T1, -1); + __ bgez(T1, l_1); + __ delayed()->nop(); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + + address generate_conjoint_long_copy(bool aligned, const char *name) { + Label l_1, l_2; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target = aligned ? + StubRoutines::arrayof_jlong_disjoint_arraycopy() : + StubRoutines::jlong_disjoint_arraycopy(); + array_overlap_test(nooverlap_target, 3); + + __ push(T3); + __ push(T0); + __ push(T1); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -8); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -8); + + __ b(l_2); + __ delayed()->nop(); + __ align(16); + __ bind(l_1); + __ ld(AT, T3, 0); + __ sd (AT, T0, 0); + __ addiu(T3, T3, -8); + __ addiu(T0, T0,-8); + __ bind(l_2); + __ addiu(T1, T1, -1); + __ bgez(T1, l_1); + __ delayed()->nop(); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + void generate_arraycopy_stubs() { + if (UseCompressedOops) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, + "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, + "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, + "oop_arraycopy_uninit", true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, + "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, + "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, + "oop_arraycopy_uninit", true); + } + + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); + + // We don't generate specialized code for HeapWord-aligned source + // arrays, so just use the code we've already generated + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; + StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; + + StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; + StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; + + StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; + StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; + + StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; + StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; + StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // add a function to implement SafeFetch32 and SafeFetchN + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // A0 = adr + // A1 = errValue + // + // result: + // PPC_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into A1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ lw(A1, A0, 0); + break; + case 8: + // int64_t + __ ld(A1, A0, 0); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ addu(V0,A1,R0); + __ jr(RA); + __ delayed()->nop(); + } + + +#undef __ +#define __ masm-> + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + address generate_throw_exception(const char* name, + address runtime_entry, + bool restore_saved_exception_pc) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + enum layout { + thread_off, // last_java_sp + S7_off, // callee saved register sp + 1 + S6_off, // callee saved register sp + 2 + S5_off, // callee saved register sp + 3 + S4_off, // callee saved register sp + 4 + S3_off, // callee saved register sp + 5 + S2_off, // callee saved register sp + 6 + S1_off, // callee saved register sp + 7 + S0_off, // callee saved register sp + 8 + FP_off, + ret_address, + framesize + }; + + int insts_size = 2048; + int locs_size = 32; + + // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, + // NULL, NULL, NULL, false, NULL, name, false); + CodeBuffer code (name , insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM +#ifndef OPT_THREAD + Register java_thread = TREG; + __ get_thread(java_thread); +#else + Register java_thread = TREG; +#endif + if (restore_saved_exception_pc) { + __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); + } + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog + __ sd(S0, SP, S0_off * wordSize); + __ sd(S1, SP, S1_off * wordSize); + __ sd(S2, SP, S2_off * wordSize); + __ sd(S3, SP, S3_off * wordSize); + __ sd(S4, SP, S4_off * wordSize); + __ sd(S5, SP, S5_off * wordSize); + __ sd(S6, SP, S6_off * wordSize); + __ sd(S7, SP, S7_off * wordSize); + + int frame_complete = __ pc() - start; + // push java thread (becomes first argument of C function) + __ sd(java_thread, SP, thread_off * wordSize); + if (java_thread != A0) + __ move(A0, java_thread); + + // Set up last_Java_sp and last_Java_fp + __ set_last_Java_frame(java_thread, SP, FP, NULL); + // Align stack + __ set64(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); + + // Call runtime + __ call(runtime_entry); + __ delayed()->nop(); + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + oop_maps->add_gc_map(__ offset(), map); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + __ get_thread(java_thread); +#endif + + __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + __ reset_last_Java_frame(java_thread, true); + + // Restore callee save registers. This must be done after resetting the Java frame + __ ld(S0, SP, S0_off * wordSize); + __ ld(S1, SP, S1_off * wordSize); + __ ld(S2, SP, S2_off * wordSize); + __ ld(S3, SP, S3_off * wordSize); + __ ld(S4, SP, S4_off * wordSize); + __ ld(S5, SP, S5_off * wordSize); + __ ld(S6, SP, S6_off * wordSize); + __ ld(S7, SP, S7_off * wordSize); + + // discard arguments + __ move(SP, FP); // epilog + __ pop(FP); + + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ should_not_reach_here(); + __ bind(L); +#endif //ASSERT + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + framesize, + oop_maps, false); + return stub->entry_point(); + } + + // Initialization + void generate_initial() { + // Generates all stubs and initializes the entry points + + //------------------------------------------------------------- + //----------------------------------------------------------- + // entry points that exist in all platforms + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller + // than the disadvantage of having a much more complicated generator structure. + // See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), + false); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), + false); + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds and need to be relocatable, so they each + // fabricate a RuntimeStub internally. + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); + + StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + + // entry points that are platform specific + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); +#ifndef CORE + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); +#endif + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + +#ifdef COMPILER2 + if (UseMontgomeryMultiplyIntrinsic) { + StubRoutines::_montgomeryMultiply + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); + } + if (UseMontgomerySquareIntrinsic) { + StubRoutines::_montgomerySquare + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); + } +#endif + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp new file mode 100644 index 00000000000..920c08844e1 --- /dev/null +++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc){ + return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); +} + +enum platform_dependent_constants { + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 40000 // simply increase if too small (assembler will crash if too small) +}; + +class gs2 { + friend class StubGenerator; + friend class VMStructs; + private: + // If we call compiled code directly from the call stub we will + // need to adjust the return back to the call stub to a specialized + // piece of code that can handle compiled results and cleaning the fpu + // stack. The variable holds that location. + static address _call_stub_compiled_return; + +public: + // Call back points for traps in compiled code + static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } + static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } + +}; + +#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp new file mode 100644 index 00000000000..358d580d527 --- /dev/null +++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// a description of how to extend it, see the stubRoutines.hpp file. + +//find the last fp value +address StubRoutines::gs2::_call_stub_compiled_return = NULL; diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp new file mode 100644 index 00000000000..19e2f29c592 --- /dev/null +++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp @@ -0,0 +1,2149 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +int TemplateInterpreter::InterpreterCodeSize = 500 * K; + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + + // Rmethod: method + // LVP: pointer to locals + // A3: first stack arg + __ move(A3, SP); + __ daddiu(SP, SP, -10 * wordSize); + __ sd(RA, SP, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + Rmethod, LVP, A3); + + // V0: result handler + + // Stack layout: + // ... + // 10 stack arg0 <--- old sp + // 9 float/double identifiers + // 8 register arg7 + // ... + // 2 register arg1 + // 1 aligned slot + // SP: 0 return address + + // Do FP first so we can use T3 as temp + __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers + + // A0 is for env. + // If the mothed is not static, A1 will be corrected in generate_native_entry. + for ( int i = 1; i < Argument::n_register_parameters; i++ ) { + Register reg = as_Register(i + A0->encoding()); + FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); + Label isfloatordouble, isdouble, next; + + __ andi(AT, T3, 1 << (i*2)); // Float or Double? + __ bne(AT, R0, isfloatordouble); + __ delayed()->nop(); + + // Do Int register here + __ ld(reg, SP, (1 + i) * wordSize); + __ b (next); + __ delayed()->nop(); + + __ bind(isfloatordouble); + __ andi(AT, T3, 1 << ((i*2)+1)); // Double? + __ bne(AT, R0, isdouble); + __ delayed()->nop(); + + // Do Float Here + __ lwc1(floatreg, SP, (1 + i) * wordSize); + __ b(next); + __ delayed()->nop(); + + // Do Double here + __ bind(isdouble); + __ ldc1(floatreg, SP, (1 + i) * wordSize); + + __ bind(next); + } + + __ ld(RA, SP, 0); + __ daddiu(SP, SP, 10 * wordSize); + __ jr(RA); + __ delayed()->nop(); + return entry; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + Unimplemented(); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + Unimplemented(); + return entry; + } + return NULL; +} + +/** +* Method entry for static (non-native) methods: +* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) +* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) +*/ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + Unimplemented(); + return entry; + } + return NULL; +} + +// +// Various method entries +// + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: + // [ arg ] <-- sp + // [ arg ] + // retaddr in ra + + address entry_point = NULL; + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); + __ ldc1(F12, SP, 0); + __ abs_d(F0, F12); + __ move(SP, Rsender); + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); + __ ldc1(F12, SP, 0); + __ sqrt_d(F0, F12); + __ move(SP, Rsender); + break; + case Interpreter::java_lang_math_sin : + case Interpreter::java_lang_math_cos : + case Interpreter::java_lang_math_tan : + case Interpreter::java_lang_math_log : + case Interpreter::java_lang_math_log10 : + case Interpreter::java_lang_math_exp : + entry_point = __ pc(); + __ ldc1(F12, SP, 0); + __ move(SP, Rsender); + __ dmtc1(RA, F24); + __ dmtc1(SP, F25); + __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); + generate_transcendental_entry(kind, 1); + __ dmfc1(SP, F25); + __ dmfc1(RA, F24); + break; + case Interpreter::java_lang_math_pow : + entry_point = __ pc(); + __ ldc1(F12, SP, 2 * Interpreter::stackElementSize); + __ ldc1(F13, SP, 0); + __ move(SP, Rsender); + __ dmtc1(RA, F24); + __ dmtc1(SP, F25); + __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); + generate_transcendental_entry(kind, 2); + __ dmfc1(SP, F25); + __ dmfc1(RA, F24); + break; + case Interpreter::java_lang_math_fmaD : + if (UseFMA) { + entry_point = __ pc(); + __ ldc1(F12, SP, 4 * Interpreter::stackElementSize); + __ ldc1(F13, SP, 2 * Interpreter::stackElementSize); + __ ldc1(F14, SP, 0); + __ madd_d(F0, F14, F13, F12); + __ move(SP, Rsender); + } + break; + case Interpreter::java_lang_math_fmaF : + if (UseFMA) { + entry_point = __ pc(); + __ lwc1(F12, SP, 2 * Interpreter::stackElementSize); + __ lwc1(F13, SP, Interpreter::stackElementSize); + __ lwc1(F14, SP, 0); + __ madd_s(F0, F14, F13, F12); + __ move(SP, Rsender); + } + break; + default: + ; + } + if (entry_point) { + __ jr(RA); + __ delayed()->nop(); + } + + return entry_point; +} + + // double trigonometrics and transcendentals + // static jdouble dsin(jdouble x); + // static jdouble dcos(jdouble x); + // static jdouble dtan(jdouble x); + // static jdouble dlog(jdouble x); + // static jdouble dlog10(jdouble x); + // static jdouble dexp(jdouble x); + // static jdouble dpow(jdouble x, jdouble y); + +void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { + address fn; + switch (kind) { + case Interpreter::java_lang_math_sin : + if (StubRoutines::dsin() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); + } + break; + case Interpreter::java_lang_math_cos : + if (StubRoutines::dcos() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); + } + break; + case Interpreter::java_lang_math_tan : + if (StubRoutines::dtan() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); + } + break; + case Interpreter::java_lang_math_log : + if (StubRoutines::dlog() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); + } + break; + case Interpreter::java_lang_math_log10 : + if (StubRoutines::dlog10() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); + } + break; + case Interpreter::java_lang_math_exp : + if (StubRoutines::dexp() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); + } + break; + case Interpreter::java_lang_math_pow : + if (StubRoutines::dpow() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); + } + break; + default: + ShouldNotReachHere(); + fn = NULL; // unreachable + } + __ li(T9, fn); + __ jalr(T9); + __ delayed()->nop(); +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender : sender 's sp + address entry_point = __ pc(); + + // abstract method entry + // throw exception + // adjust stack to what a normal return would do + __ empty_expression_stack(); + __ restore_bcp(); + __ restore_locals(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + + +const int method_offset = frame::interpreter_frame_method_offset * wordSize; +const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; +const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ subu(T1, T1, SP); // T1 = maximal sp for current fp + __ bgez(T1, L); // check if frame is complete + __ delayed()->nop(); + __ stop("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + // FIXME: please change the func restore_bcp + // S0 is the conventional register for bcp + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + // FIXME: why do not pass parameter thread ? + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // ??? convention: expect array in register A1 + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // setup parameters + __ li(A1, (long)name); + if (pass_oop) { + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); + } else { + __ li(A2, (long)message); + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); + } + // throw exception + __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); + __ delayed()->nop(); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that sp is now tos until next java call + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + __ restore_bcp(); + __ restore_locals(); + + // mdp: T8 + // ret: FSR + // tmp: T9 + if (state == atos) { + Register mdp = T8; + Register tmp = T9; + __ profile_return_type(mdp, FSR, tmp); + } + + + const Register cache = T9; + const Register index = T3; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); + __ dsll(AT, flags, Interpreter::logStackElementSize); + __ daddu(SP, SP, AT); + + Register java_thread; +#ifndef OPT_THREAD + java_thread = T9; + __ get_thread(java_thread); +#else + java_thread = TREG; +#endif + + __ check_and_handle_popframe(java_thread); + __ check_and_handle_earlyret(java_thread); + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step, + address continuation) { + address entry = __ pc(); + // NULL last_sp until next java call + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ restore_bcp(); + __ restore_locals(); + // handle exceptions + { + Label L; + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + __ delayed()->nop(); + } + return entry; +} + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : i = 4; break; + case T_FLOAT : i = 5; break; + case T_DOUBLE : i = 6; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 7; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ andi(V0, V0, 0xFFFF); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + { + __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ verify_oop(V0); // and verify it + } + break; + default : ShouldNotReachHere(); + } + __ jr(RA); // return from result handler + __ delayed()->nop(); + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + + + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// prerequisites : method in T0, invocation counter in T3 +void TemplateInterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + + __ get_method_counters(Rmethod, FSR, done); + + if (ProfileInterpreter) { // %%% Merge this into methodDataOop + __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + __ incrementl(T9, 1); + __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ lw(T3, invocation_counter); + __ increment(T3, InvocationCounter::count_increment); + __ sw(T3, invocation_counter); // save invocation count + + __ lw(FSR, backedge_counter); // load backedge counter + __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits + __ andr(FSR, FSR, AT); + + __ daddu(T3, T3, FSR); // add both counters + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { + __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ lw(AT, AT, 0); + __ slt(AT, T3, AT); + } + + __ bne_far(AT, R0, *profile_method_continue); + __ delayed()->nop(); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(FSR, *profile_method); + } + + if (Assembler::is_simm16(CompileThreshold)) { + __ srl(AT, T3, InvocationCounter::count_shift); + __ slti(AT, AT, CompileThreshold); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); + __ lw(AT, AT, 0); + __ slt(AT, T3, AT); + } + + __ beq_far(AT, R0, *overflow); + __ delayed()->nop(); + __ bind(done); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { + + // Asm interpreter on entry + // S7 - locals + // S0 - bcp + // Rmethod - method + // FP - interpreter frame + + // On return (i.e. jump to entry_point) + // Rmethod - method + // RA - return address of interpreter caller + // tos - the last parameter to Java method + // SP - sender_sp + + // the bcp is valid if and only if it's not null + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), R0); + __ ld(Rmethod, FP, method_offset); + // Preserve invariant that S0/S7 contain bcp/locals of sender frame + __ b_far(do_continue); + __ delayed()->nop(); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// T2: number of additional locals this frame needs (what we must check) +// T0: Method* +// +void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { + // see if we've got enough room on the stack for locals plus overhead. + // the expression stack grows down incrementally, so the normal guard + // page mechanism will work for that. + // + // Registers live on entry: + // + // T0: Method* + // T2: number of additional locals this frame needs (what we must check) + + // NOTE: since the additional locals are also always pushed (wasn't obvious in + // generate_method_entry) so the guard should work for them too. + // + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) + + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); + __ slt(AT, AT, T2); + __ beq(AT, R0, after_frame_check); + __ delayed()->nop(); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone +#ifndef OPT_THREAD + Register thread = T1; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + // locals + overhead, in bytes + __ dsll(T3, T2, Interpreter::logStackElementSize); + __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); + __ bne(AT, R0, stack_base_okay); + __ delayed()->nop(); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); + __ bne(AT, R0, stack_size_okay); + __ delayed()->nop(); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT + __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 + __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT + __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 + + // Use the bigger size for banging. + const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); + + // add in the redzone and yellow size + __ move(AT, max_bang_size); + __ addu(T3, T3, AT); + + // check against the current stack bottom + __ slt(AT, T3, SP); + __ bne(AT, R0, after_frame_check); + __ delayed()->nop(); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + __ move(SP, Rsender); + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// Rmethod - Method* +void TemplateInterpreterGenerator::lock_method(void) { + // synchronize method + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { Label L; + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); + __ bne(T0, R0, L); + __ delayed()->nop(); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + // get synchronization object + { + Label done; + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, T0, JVM_ACC_STATIC); + __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); + __ beq(T2, R0, done); + __ delayed()->nop(); + __ load_mirror(T0, Rmethod, T9); + __ bind(done); + } + // add space for monitor & lock + __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry + __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // set new monitor block top + __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object + // FIXME: I do not know what lock_object will do and what it will need + __ move(c_rarg0, SP); // object address + __ lock_object(c_rarg0); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- T0(sender's sp) + // ... + // [ argument word 0 ] <--- S7 + + // initialize fixed part of activation frame + // sender's sp in Rsender + int i = 0; + int frame_size = 10; +#ifndef CORE + ++frame_size; +#endif + __ daddiu(SP, SP, (-frame_size) * wordSize); + __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address + __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp + __ daddiu(FP, SP, (frame_size - 2) * wordSize); + __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp + __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null + __ sd(LVP, FP, (-++i) * wordSize); // save locals offset + __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase + __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* + // Get mirror and store it in the frame as GC root for this Method* + __ load_mirror(T2, Rmethod, T9); + __ sd(T2, FP, (-++i) * wordSize); // Mirror +#ifndef CORE + if (ProfileInterpreter) { + Label method_data_continue; + __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); + __ beq(AT, R0, method_data_continue); + __ delayed()->nop(); + __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); + __ bind(method_data_continue); + __ sd(AT, FP, (-++i) * wordSize); + } else { + __ sd(R0, FP, (-++i) * wordSize); + } +#endif // !CORE + + __ ld(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); + __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache + if (native_call) { + __ sd(R0, FP, (-++i) * wordSize); // no bcp + } else { + __ sd(BCP, FP, (-++i) * wordSize); // set bcp + } + __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom + assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + address entry = __ pc(); + Label slow_path; + __ b(slow_path); + __ delayed()->nop(); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + __ delayed()->nop(); + return entry; +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + // Rsender: sender's sp + // Rmethod: Method* + address entry_point = __ pc(); + +#ifndef CORE + const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset())); +#endif + + // get parameter size (always needed) + // the size in the java stack + __ ld(V0, Rmethod, in_bytes(Method::const_offset())); + __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); + + // native calls don't need the stack size check since they have no expression stack + // and the arguments are already on the stack and we only add a handful of words + // to the stack + + // Rmethod: Method* + // V0: size of parameters + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + + // for natives the size of locals is zero + + // compute beginning of parameters (S7) + __ dsll(LVP, V0, Address::times_8); + __ daddiu(LVP, LVP, (-1) * wordSize); + __ daddu(LVP, LVP, SP); + + + // add 2 zero-initialized slots for native calls + // 1 slot for native oop temp offset (setup via runtime) + // 1 slot for static native result handler3 (setup via runtime) + __ push2(R0, R0); + + // Layout of frame at this point + // [ method holder mirror ] <--- sp + // [ result type info ] + // [ argument word n-1 ] <--- T0 + // ... + // [ argument word 0 ] <--- LVP + + +#ifndef CORE + if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count +#endif + + // initialize fixed part of activation frame + generate_fixed_frame(true); + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- sender's sp + // ... + // [ argument word 0 ] <--- S7 + + + // make sure method is native & not abstract +#ifdef ASSERT + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(AT, T0, JVM_ACC_NATIVE); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(AT, T0, JVM_ACC_ABSTRACT); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, (int)true); + __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); +#endif // CORE + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // after method_lock, the layout of frame is as following + // + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // start execution +#ifdef ASSERT + { + Label L; + __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ delayed()->nop(); + __ stop("broken stack frame setup in interpreter in asm"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + // work registers + const Register method = Rmethod; + const Register t = T8; + + __ get_method(method); + { + Label L, Lstatic; + __ ld(t,method,in_bytes(Method::const_offset())); + __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); + // MIPS n64 ABI: caller does not reserve space for the register auguments. + // A0 and A1(if needed) + __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, AT, JVM_ACC_STATIC); + __ beq(AT, R0, Lstatic); + __ delayed()->nop(); + __ daddiu(t, t, 1); + __ bind(Lstatic); + __ daddiu(t, t, -7); + __ blez(t, L); + __ delayed()->nop(); + __ dsll(t, t, Address::times_8); + __ dsubu(SP, SP, t); + __ bind(L); + } + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ move(AT, SP); + // [ ] <--- sp + // ... (size of parameters - 8 ) + // [ monitor entry ] + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Mirror ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + // get signature handler + { + Label L; + __ ld(T9, method, in_bytes(Method::signature_handler_offset())); + __ bne(T9, R0, L); + __ delayed()->nop(); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld(T9, method, in_bytes(Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + // FIXME: when change codes in InterpreterRuntime, note this point + // from: begin of parameters + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); + // to: current sp + assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); + // temp: T3 + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); + + __ jalr(T9); + __ delayed()->nop(); + __ get_method(method); + + // + // if native function is static, and its second parameter has type length of double word, + // and first parameter has type length of word, we have to reserve one word + // for the first parameter, according to mips o32 abi. + // if native function is not static, and its third parameter has type length of double word, + // and second parameter has type length of word, we have to reserve one word for the second + // parameter. + // + + + // result handler is in V0 + // set result handler + __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); + +#define FIRSTPARA_SHIFT_COUNT 5 +#define SECONDPARA_SHIFT_COUNT 9 +#define THIRDPARA_SHIFT_COUNT 13 +#define PARA_MASK 0xf + + // pass mirror handle if static call + { + Label L; + __ lw(t, method, in_bytes(Method::access_flags_offset())); + __ andi(AT, t, JVM_ACC_STATIC); + __ beq(AT, R0, L); + __ delayed()->nop(); + + // get mirror + __ load_mirror(t, method, T9); + // copy mirror into activation frame + __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + // pass handle to mirror + __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ move(A1, t); + __ bind(L); + } + + // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) + // [ ] | + // ... size of parameters(or +1) | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Mirror ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // get native function entry point + { Label L; + __ ld(T9, method, in_bytes(Method::native_function_offset())); + __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ bne(V1, T9, L); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld(T9, method, in_bytes(Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + // native function in T9 +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); + __ move(A0, t); + // [ jni environment ] <--- sp + // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) + // [ ] | + // ... size of parameters | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Mirror ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // set_last_Java_frame_before_call + __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + __ li(t, __ pc()); + __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); + __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + + // change thread state +#ifdef ASSERT + { + Label L; + __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ daddiu(t, t, (-1) * _thread_in_Java); + __ beq(t, R0, L); + __ delayed()->nop(); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + __ move(t, _thread_in_native); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + + // call native method + __ jalr(T9); + __ delayed()->nop(); + // result potentially in V0 or F0 + + + // via _last_native_pc and not via _last_jave_sp + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + //FIXME, should modify here + // save return value to keep the value from being destroyed by other calls + __ push(dtos); + __ push(ltos); + + // change thread state +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(t, _thread_in_native_trans); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ sync(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { Label Continue; + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + Label slow_path; + + __ safepoint_poll_acquire(slow_path, thread); + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ delayed()->nop(); + __ bind(slow_path); + __ move(A0, thread); + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + relocInfo::runtime_call_type); + __ delayed()->nop(); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + //add for compressedoops + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ move(t, _thread_in_Java); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ reset_last_Java_frame(thread, true); + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); + __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); + + // If result was an oop then unbox and save it in the frame + { + Label no_oop; + //FIXME, addi only support 16-bit imeditate + __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); + __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); + __ bne(AT, T0, no_oop); + __ delayed()->nop(); + __ pop(ltos); + // Unbox oop result, e.g. JNIHandles::resolve value. + __ resolve_jobject(V0, thread, T9); + __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + { + Label no_reguard; + __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t, AT, no_reguard); + __ delayed()->nop(); + __ pushad(); + __ move(S5_heapbase, SP); + __ move(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ move(SP, S5_heapbase); + __ popad(); + //add for compressedoops + __ reinit_heapbase(); + __ bind(no_reguard); + } + // restore BCP to have legal interpreter frame, + // i.e., bci == 0 <=> BCP == code_base() + // Can't call_VM until bcp is within reasonable. + __ get_method(method); // method is junk from thread_in_native to now. + __ ld(BCP, method, in_bytes(Method::const_offset())); + __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(t, R0, L); + __ delayed()->nop(); + // Note: At some point we may want to unify this with the code used in + // call_VM_base(); + // i.e., we should use the StubRoutines::forward_exception code. For now this + // doesn't work here because the sp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ lw(t, method, in_bytes(Method::access_flags_offset())); + __ andi(t, t, JVM_ACC_SYNCHRONIZED); + __ beq(t, R0, L); + // the code below should be shared with interpreter macro assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, + // since this is a synchronized method. However, need + // to check that the object has not been unlocked by + // an explicit monitorexit bytecode. + __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); + // address of first monitor + + __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ bne(t, R0, unlock); + __ delayed()->nop(); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg0); + } + __ bind(L); + } + + // jvmti/jvmpi support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in V0, + // call result handler to restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); + __ jalr(t); + __ delayed()->nop(); + + + // remove activation + __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp + __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address + __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp + __ jr(RA); + __ delayed()->nop(); + +#ifndef CORE + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + // entry_point is the beginning of this + // function and checks again for compiled code + } +#endif + return entry_point; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Quick & dirty stack overflow checking: bang the stack & handle trap. + // Note that we do the banging after the frame is setup, since the exception + // handling code expects to find a valid interpreter frame on the stack. + // Doing the banging earlier fails if the caller frame is not an interpreter + // frame. + // (Also, the exception throwing code expects to unlock any synchronized + // method receiever, so do the banging after locking the receiver.) + + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; + const int start_page = native_call ? n_shadow_pages : 1; + BLOCK_COMMENT("bang_stack_shadow_pages:"); + for (int pages = start_page; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages*page_size); + } + } +} + +// +// Generic interpreted method entry to (asm) interpreter +// +// Layout of frame just at the entry +// +// [ argument word n-1 ] <--- sp +// ... +// [ argument word 0 ] +// assume Method* in Rmethod before call this method. +// prerequisites to the generated stub : the callee Method* in Rmethod +// note you must save the caller bcp before call the generated stub +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // Rmethod: Method* + // Rsender: sender 's sp + address entry_point = __ pc(); + + const Address invocation_counter(Rmethod, + in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); + + // get parameter size (always needed) + __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod + __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); + + // Rmethod: Method* + // V0: size of parameters + // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i + // get size of locals in words to T2 + __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); + // T2 = no. of additional locals, locals include parameters + __ dsubu(T2, T2, V0); + + // see if we've got enough room on the stack for locals plus overhead. + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + generate_stack_overflow_check(); + // after this function, the layout of frame does not change + + // compute beginning of parameters (LVP) + __ dsll(LVP, V0, LogBytesPerWord); + __ daddiu(LVP, LVP, (-1) * wordSize); + __ daddu(LVP, LVP, SP); + + // T2 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ beq(T2, R0, exit); + __ delayed()->nop(); + + __ bind(loop); + __ daddiu(SP, SP, (-1) * wordSize); + __ daddiu(T2, T2, -1); // until everything initialized + __ bne(T2, R0, loop); + __ delayed()->sd(R0, SP, 0); // initialize local variables + + __ bind(exit); + } + + // + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argument word n-1 ] <--- T0? + // ... + // [ argument word 0 ] <--- LVP + + // initialize fixed part of activation frame + + generate_fixed_frame(false); + + + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] <--- fp + // [ return address ] + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(T2, AT, JVM_ACC_NATIVE); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(T2, AT, JVM_ACC_ABSTRACT); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ move(AT, (int)true); + __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + + // mdp : T8 + // tmp1: T9 + // tmp2: T2 + __ profile_parameters_type(T8, T9, T2); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + +#endif // CORE + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + // + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { Label L; + __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // layout of frame after lock_method + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // start execution +#ifdef ASSERT + { + Label L; + __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ delayed()->nop(); + __ stop("broken stack frame setup in interpreter in native"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ get_method(Rmethod); + __ b(profile_method_continue); + __ delayed()->nop(); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); + + // V0: exception + // V1: return address/pc that threw exception + __ restore_bcp(); // BCP points to call/send + __ restore_locals(); + + //add for compressedoops + __ reinit_heapbase(); + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // expression stack is undefined here + // V0: exception + // BCP: exception bcp + __ verify_oop(V0); + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ move(A1, V0); + __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); + // V0: exception handler entry point + // V1: preserved exception oop + // S0: bcp for exception handler + __ push(V1); // push exception which is now the only value on the stack + __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) + __ delayed()->nop(); + + // If the exception is not handled in the current frame the frame is removed and + // the exception is rethrown (i.e. exception continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // In current activation + // V0: exception + // BCP: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. +#ifndef OPT_THREAD + Register thread = T2; + __ get_thread(T2); +#else + Register thread = TREG; +#endif + __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + __ ori(T3, T3, JavaThread::popframe_processing_bit); + __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#ifndef CORE + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ld(A0, FP, frame::return_addr_offset * wordSize); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); + __ bne(V0, R0, caller_not_deoptimized); + __ delayed()->nop(); + + // Compute size of arguments for saving when returning to deoptimized caller + __ get_method(A1); + __ verify_oop(A1); + __ ld( A1, A1, in_bytes(Method::const_offset())); + __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); + __ shl(A1, Interpreter::logStackElementSize); + __ restore_locals(); + __ dsubu(A2, LVP, A1); + __ daddiu(A2, A2, wordSize); + // Save these arguments +#ifndef OPT_THREAD + __ get_thread(A0); +#else + __ move(A0, TREG); +#endif + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); + + __ remove_activation(vtos, T9, false, false, false); + + // Inform deoptimization that it is responsible for restoring these arguments +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + // Continue in deoptimization handler + __ jr(T9); + __ delayed()->nop(); + + __ bind(caller_not_deoptimized); + } +#endif /* !CORE */ + + __ remove_activation(vtos, T3, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Clear the popframe condition flag + // Finish with popframe handling + // A previous I2C followed by a deoptimization might have moved the + // outgoing arguments further up the stack. PopFrame expects the + // mutations to those outgoing arguments to be preserved and other + // constraints basically require this frame to look exactly as + // though it had previously invoked an interpreted activation with + // no space between the top of the expression stack (current + // last_sp) and the top of stack. Rather than force deopt to + // maintain this kind of invariant all the time we call a small + // fixup routine to move the mutated arguments onto the top of our + // expression stack if necessary. + __ move(T8, SP); + __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // PC must point into interpreter here + __ set_last_Java_frame(thread, noreg, FP, __ pc()); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); + __ reset_last_Java_frame(thread, true); + // Restore the last_sp and null it out + __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + + + __ move(AT, JavaThread::popframe_inactive); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + + // Finish with popframe handling + __ restore_bcp(); + __ restore_locals(); +#ifndef CORE + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } +#endif // !CORE + // Clear the popframe condition flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, JavaThread::popframe_inactive); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ lbu(AT, BCP, 0); + __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); + __ bne(AT, R0, L_done); + __ delayed()->nop(); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(T9); + __ ld(T8, LVP, 0); + __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); + + __ beq(T8, R0, L_done); + __ delayed()->nop(); + + __ sd(T8, SP, 0); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop(T0); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, T3, false, true, false); + // restore exception + __ get_vm_result(T0, thread); + __ verify_oop(T0); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects + // the following registers set up: + // + // T0: exception + // T1: return address/pc that threw exception + // SP: expression stack of caller + // FP: fp of caller + __ push2(T0, T3); // save exception and return address + __ move(A1, T3); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T9, V0); // save exception handler + __ pop2(V0, V1); // restore return address and exception + + // Note that an "issuing PC" is actually the next PC after the call + __ jr(T9); // jump to exception handler of caller + __ delayed()->nop(); +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ load_earlyret_value(state); + +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); + // Clear the earlyret state + __ move(AT, JvmtiThreadState::earlyret_inactive); + __ sw(AT, cond_addr); + __ sync(); + + + __ remove_activation(state, T0, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ sync(); + __ jr(T0); + __ delayed()->nop(); + return entry; +} // end of ForceEarlyReturn support + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); + dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); + lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); + aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); + bep = cep = sep = + iep = __ pc(); __ push(itos); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + + +/* +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} +*/ + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + // prepare expression stack + __ push(state); // save tosca + + // tos & tos2 + // trace_bytecode need actually 4 args, the last two is tos&tos2 + // this work fine for x86. but mips o32 call convention will store A2-A3 + // to the stack position it think is the tos&tos2 + // when the expression stack have no more than 2 data, error occur. + __ ld(A2, SP, 0); + __ ld(A3, SP, 1 * wordSize); + + // pass arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); + __ move(RA, V0); // make sure return address is not destroyed by pop(state) + + // restore expression stack + __ pop(state); // restore tosca + + // return + __ jr(RA); + __ delayed()->nop(); + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ li(T8, (long)&BytecodeCounter::_counter_value); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + __ li(T8, (long)&BytecodePairHistogram::_index); + __ lw(T9, T8, 0); + __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); + __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); + __ orr(T9, T9, T8); + __ li(T8, (long)&BytecodePairHistogram::_index); + __ sw(T9, T8, 0); + __ dsll(T9, T9, 2); + __ li(T8, (long)BytecodePairHistogram::_counters); + __ daddu(T8, T8, T9); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + address entry = Interpreter::trace_code(t->tos_in()); + assert(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + __ delayed()->nop(); + //add for compressedoops + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ li(T8, long(&BytecodeCounter::_counter_value)); + __ lw(T8, T8, 0); + __ move(AT, StopInterpreterAt); + __ bne(T8, AT, L); + __ delayed()->nop(); + __ brk(5); + __ delayed()->nop(); + __ bind(L); +} +#endif // !PRODUCT diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp new file mode 100644 index 00000000000..46a88aba261 --- /dev/null +++ b/src/hotspot/cpu/mips/templateTable_mips.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp new file mode 100644 index 00000000000..52654838309 --- /dev/null +++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp @@ -0,0 +1,4688 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + + +#ifndef CC_INTERP + +#define __ _masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { + // No mips specific initialization +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(LVP, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} +static inline Address haddress(int n) { return iaddress(n + 0); } + + +static inline Address at_sp() { return Address(SP, 0); } +static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } +static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } + +// At top of Java expression stack which may be different than sp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); + return tos; +} + +static inline Address at_tos_p1() { + return Address(SP, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(SP, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(SP, Interpreter::expr_offset_in_bytes(3)); +} + +// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(BCP, offset); +} + +// Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address dst, + Register val, + DecoratorSet decorators = 0) { + assert(val == noreg || val == V0, "parameter is just for looks"); + __ store_heap_oop(dst, val, T9, T1, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators = 0) { + __ load_heap_oop(dst, src, T9, T1, decorators); +} + +// bytecode folding +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) { + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); + __ daddiu(bc_reg, R0, bc); + __ beq(tmp_reg, R0, L_patch_done); + __ delayed()->nop(); + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ move(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ lbu(tmp_reg, at_bcp(0)); + __ move(AT, Bytecodes::_breakpoint); + __ bne(tmp_reg, AT, L_fast_patch); + __ delayed()->nop(); + + __ get_method(tmp_reg); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); + + __ b(L_patch_done); + __ delayed()->nop(); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ lbu(tmp_reg, at_bcp(0)); + __ move(AT, (int)Bytecodes::java_code(bc)); + __ beq(tmp_reg, AT, L_okay); + __ delayed()->nop(); + __ beq(tmp_reg, bc_reg, L_patch_done); + __ delayed()->nop(); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ sb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ move(FSR, R0); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ move(FSR, value); + } +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ move(FSR, value); + } +} + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + switch( value ) { + case 0: __ mtc1(R0, FSF); return; + case 1: __ addiu(AT, R0, 1); break; + case 2: __ addiu(AT, R0, 2); break; + default: ShouldNotReachHere(); + } + __ mtc1(AT, FSF); + __ cvt_s_w(FSF, FSF); +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + switch( value ) { + case 0: __ dmtc1(R0, FSF); + return; + case 1: __ daddiu(AT, R0, 1); + __ dmtc1(AT, FSF); + __ cvt_d_w(FSF, FSF); + break; + default: ShouldNotReachHere(); + } +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ lb(FSR, at_bcp(1)); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ lb(FSR, BCP, 1); + __ lbu(AT, BCP, 2); + __ dsll(FSR, FSR, 8); + __ orr(FSR, FSR, AT); +} + +// T1 : tags +// T2 : index +// T3 : cpool +// T8 : tag +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, notInt, Done; + // get index in cpool + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + } else { + __ lbu(T2, at_bcp(1)); + } + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { + __ gslbx(T1, T1, T2, tags_offset); + } else { + __ daddu(AT, T1, T2); + __ lb(T1, AT, tags_offset); + } + if(os::is_MP()) { + __ sync(); // load acquire + } + //now T1 is the tag + + // unresolved class - get the resolved class + __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); + __ beq(AT, R0, call_ldc); + __ delayed()->nop(); + + // unresolved class in error (resolution failed) - call into runtime + // so that the same error from first resolution attempt is thrown. + __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); + __ beq(AT, R0, call_ldc); + __ delayed()->nop(); + + // resolved class - need to call vm to get java mirror of the class + __ daddiu(AT, T1, - JVM_CONSTANT_Class); + __ bne(AT, R0, notClass); + __ delayed()->dsll(T2, T2, Address::times_8); + + __ bind(call_ldc); + __ move(A1, wide); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); + //__ push(atos); + __ daddiu(SP, SP, - Interpreter::stackElementSize); + __ b(Done); + __ delayed()->sd(FSR, SP, 0); // added for performance issue + + __ bind(notClass); + __ daddiu(AT, T1, -JVM_CONSTANT_Float); + __ bne(AT, R0, notFloat); + __ delayed()->nop(); + // ftos + if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { + __ gslwxc1(FSF, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ lwc1(FSF, AT, base_offset); + } + //__ push_f(); + __ daddiu(SP, SP, - Interpreter::stackElementSize); + __ b(Done); + __ delayed()->swc1(FSF, SP, 0); + + __ bind(notFloat); + __ daddiu(AT, T1, -JVM_CONSTANT_Integer); + __ bne(AT, R0, notInt); + __ delayed()->nop(); + // itos + if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { + __ gslwx(FSR, T3, T2, base_offset); + } else { + __ daddu(T0, T3, T2); + __ lw(FSR, T0, base_offset); + } + __ push(itos); + __ b(Done); + __ delayed()->nop(); + + // assume the tag is for condy; if not, the VM runtime will tell us + __ bind(notInt); + condy_helper(Done); + + __ bind(Done); +} + +void TemplateTable::condy_helper(Label& Done) { + const Register obj = FSR; + const Register off = SSR; + const Register flags = T3; + const Register rarg = A1; + __ move(rarg, (int)bytecode()); + __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); + __ get_vm_result_2(flags, TREG); + // VMr = obj = base address to find primitive value to push + // VMr2 = flags = (tos, off) using format of CPCE::_flags + __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask); + __ daddu(obj, off, obj); + const Address field(obj, 0 * wordSize); + + // What sort of thing are we loading? + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + ConstantPoolCacheEntry::verify_tos_state_shift(); + + switch (bytecode()) { + case Bytecodes::_ldc: + case Bytecodes::_ldc_w: + { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ daddiu(AT, flags, -itos); + __ bne(AT, R0, notInt); + __ delayed()->nop(); + // itos + __ ld(obj, field); + __ push(itos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notInt); + __ daddiu(AT, flags, -ftos); + __ bne(AT, R0, notFloat); + __ delayed()->nop(); + // ftos + __ lwc1(FSF, field); + __ push(ftos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notFloat); + __ daddiu(AT, flags, -stos); + __ bne(AT, R0, notShort); + __ delayed()->nop(); + // stos + __ lh(obj, field); + __ push(stos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notShort); + __ daddiu(AT, flags, -btos); + __ bne(AT, R0, notByte); + __ delayed()->nop(); + // btos + __ lb(obj, field); + __ push(btos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notByte); + __ daddiu(AT, flags, -ctos); + __ bne(AT, R0, notChar); + __ delayed()->nop(); + // ctos + __ lhu(obj, field); + __ push(ctos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notChar); + __ daddiu(AT, flags, -ztos); + __ bne(AT, R0, notBool); + __ delayed()->nop(); + // ztos + __ lbu(obj, field); + __ push(ztos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: + { + Label notLong, notDouble; + __ daddiu(AT, flags, -ltos); + __ bne(AT, R0, notLong); + __ delayed()->nop(); + // ltos + __ ld(obj, field); + __ push(ltos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notLong); + __ daddiu(AT, flags, -dtos); + __ bne(AT, R0, notDouble); + __ delayed()->nop(); + // dtos + __ ldc1(FSF, field); + __ push(dtos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + Register result = FSR; + Register tmp = SSR; + Register rarg = A1; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp, T9); + __ bne(result, R0, resolved); + __ delayed()->nop(); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + // first time invocation - must resolve first + int i = (int)bytecode(); + __ move(rarg, i); + __ call_VM(result, entry, rarg); + + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + Label notNull; + __ set64(rarg, (long)Universe::the_null_sentinel_addr()); + __ ld_ptr(tmp, Address(rarg)); + __ bne(tmp, result, notNull); + __ delayed()->nop(); + __ xorr(result, result, result); // NULL object reference + __ bind(notNull); + } + + if (VerifyOops) { + __ verify_oop(result); + } +} + + +// used register: T2, T3, T1 +// T2 : index +// T3 : cpool +// T1 : tag +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label notDouble, notLong, Done; + + // get index in cpool + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type in T1 + if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { + __ gslbx(T1, T1, T2, tags_offset); + } else { + __ daddu(AT, T1, T2); + __ lb(T1, AT, tags_offset); + } + + __ daddiu(AT, T1, -JVM_CONSTANT_Double); + __ bne(AT, R0, notDouble); + __ delayed()->nop(); + + // dtos + __ dsll(T2, T2, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { + __ gsldxc1(FSF, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ ldc1(FSF, AT, base_offset); + } + __ push(dtos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notDouble); + __ daddiu(AT, T1, -JVM_CONSTANT_Long); + __ bne(AT, R0, notLong); + __ delayed()->nop(); + + // ltos + __ dsll(T2, T2, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { + __ gsldx(FSR, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ ld(FSR, AT, base_offset); + } + __ push(ltos); + __ b(Done); + __ delayed()->nop(); + + __ bind(notLong); + condy_helper(Done); + + __ bind(Done); +} + +// we compute the actual local variable address here +// the x86 dont do so for it has scaled index memory access model, we dont have, so do here +void TemplateTable::locals_index(Register reg, int offset) { + __ lbu(reg, at_bcp(offset)); + __ dsll(reg, reg, Address::times_8); + __ dsubu(reg, LVP, reg); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +// this method will do bytecode folding of the two form: +// iload iload iload caload +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + // get the next bytecode in T2 + __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ move(AT, Bytecodes::_iload); + __ beq(AT, T2, done); + __ delayed()->nop(); + + __ move(T3, Bytecodes::_fast_iload2); + __ move(AT, Bytecodes::_fast_iload); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _caload, rewrite to fast_icaload + __ move(T3, Bytecodes::_fast_icaload); + __ move(AT, Bytecodes::_caload); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // rewrite so iload doesn't check again. + __ move(T3, Bytecodes::_fast_iload); + + // rewrite + // T3 : fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, T3, T2, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(T2); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload2() { + transition(vtos, itos); + locals_index(T2); + __ lw(FSR, T2, 0); + __ push(itos); + locals_index(T2, 3); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload() { + transition(vtos, itos); + locals_index(T2); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::lload() { + transition(vtos, ltos); + locals_index(T2); + __ ld(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::fload() { + transition(vtos, ftos); + locals_index(T2); + __ lwc1(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::dload() { + transition(vtos, dtos); + locals_index(T2); + __ ldc1(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::aload() { + transition(vtos, atos); + locals_index(T2); + __ ld(FSR, T2, 0); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ get_unsigned_2_byte_index_at_bcp(reg, 2); + __ dsll(reg, reg, Address::times_8); + __ dsubu(reg, LVP, reg); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(T2); + __ ld(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(T2); + __ ld(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(T2); + __ lwc1(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(T2); + __ ldc1(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(T2); + __ ld(FSR, T2, 0); +} + +// we use A2 as the regiser for index, BE CAREFUL! +// we dont use our tge 29 now, for later optimization +void TemplateTable::index_check(Register array, Register index) { + // Pop ptr into array + __ pop_ptr(array); + index_check_without_pop(array, index); +} + +void TemplateTable::index_check_without_pop(Register array, Register index) { + // destroys A2 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + + // sign extend since tos (index) might contain garbage in upper bits + __ sll(index, index, 0); + + // check index + Label ok; + __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); +#ifndef OPT_RANGECHECK + __ sltu(AT, index, AT); + __ bne(AT, R0, ok); + __ delayed()->nop(); + + //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 + if (A1 != array) __ move(A1, array); + if (A2 != index) __ move(A2, index); + __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ delayed()->nop(); + __ bind(ok); +#else + __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); + __ move(A2, index); + __ tgeu(A2, AT, 29); +#endif +} + +void TemplateTable::iaload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, 2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, 2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ warn("iaload Unimplemented yet"); + __ gslwle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(FSR, FSR, 2); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); + } +} + +void TemplateTable::laload() { + transition(itos, ltos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, Address::times_8); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); + + __ warn("laload Unimplemented yet"); + __ gsldle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(AT, FSR, Address::times_8); + __ daddu(T9, SSR, AT); + __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); + } +} + +void TemplateTable::faload() { + transition(itos, ftos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ shl(FSR, 2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ shl(AT, 2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + + __ warn("faload Unimplemented yet"); + __ gslwlec1(FSF, FSR, AT); + } else { + index_check(SSR, FSR); + __ shl(FSR, 2); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); + } +} + +void TemplateTable::daload() { + transition(itos, dtos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, 3); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, 3); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); + + __ warn("daload Unimplemented yet"); + __ gsldlec1(FSF, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(AT, FSR, 3); + __ daddu(T9, SSR, AT); + __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); + } +} + +void TemplateTable::aaload() { + transition(itos, atos); + index_check(SSR, FSR); + __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); + __ daddu(FSR, SSR, FSR); + //add for compressedoops + do_oop_load(_masm, + Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + FSR, + IS_ARRAY); +} + +void TemplateTable::baload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR:index + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound + + __ warn("baload Unimplemented yet"); + __ gslble(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); + } +} + +void TemplateTable::caload() { + transition(itos, itos); + index_check(SSR, FSR); + __ dsll(FSR, FSR, Address::times_2); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +// iload followed by caload frequent pair +// used register : T2 +// T2 : index +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // load index out of locals + locals_index(T2); + __ lw(FSR, T2, 0); + index_check(SSR, FSR); + __ dsll(FSR, FSR, 1); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +void TemplateTable::saload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, Address::times_2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, Address::times_2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + + __ warn("saload Unimplemented yet"); + __ gslhle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(FSR, FSR, Address::times_2); + __ daddu(FSR, SSR, FSR); + __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); + } +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ lw(FSR, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ ld(FSR, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ lwc1(FSF, faddress(n)); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ ldc1(FSF, laddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ld(FSR, aaddress(n)); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::aload_0_internal(RewriteControl rc) { + transition(vtos, atos); + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + // get the next bytecode in T2 + __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ move(AT, Bytecodes::_getfield); + __ beq(AT, T2, done); + __ delayed()->nop(); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_iaccess_0); + __ move(AT, Bytecodes::_fast_igetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_aaccess_0); + __ move(AT, Bytecodes::_fast_agetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_faccess_0); + __ move(AT, Bytecodes::_fast_fgetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_aload_0); + + // rewrite + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, T3, T2, false); + + __ bind(done); + } else { + aload(0); + } +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(T2); + __ sw(FSR, T2, 0); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(T2); + __ sd(FSR, T2, -wordSize); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(T2); + __ swc1(FSF, T2, 0); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(T2); + __ sdc1(FSF, T2, -wordSize); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index(T2); + __ sd(FSR, T2, 0); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, 0); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, -wordSize); +} + +void TemplateTable::wide_fstore() { + wide_istore(); +} + +void TemplateTable::wide_dstore() { + wide_lstore(); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); // T2: array SSR: index + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_4); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_4); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound + + __ warn("iastore Unimplemented yet"); + __ gsswle(FSR, SSR, AT); + } else { + index_check(T2, SSR); // prefer index in SSR + __ dsll(SSR, SSR, Address::times_4); + __ daddu(T2, T2, SSR); + __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); + } +} + + + +// used register T2, T3 +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i (T2); + if(UseBoundCheckInstruction) { + __ pop_ptr(T3); + __ dsll(T2, T2, Address::times_8); + __ daddu(T2, T3, T2); + __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base + + __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, T3, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound + + __ warn("lastore Unimplemented yet"); + __ gssdle(FSR, T2, AT); + } else { + index_check(T3, T2); + __ dsll(T2, T2, Address::times_8); + __ daddu(T3, T3, T2); + __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); + } +} + +// used register T2 +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_4); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_4); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound + + __ warn("fastore Unimplemented yet"); + __ gsswlec1(FSF, SSR, AT); + } else { + index_check(T2, SSR); + __ dsll(SSR, SSR, Address::times_4); + __ daddu(T2, T2, SSR); + __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); + } +} + +// used register T2, T3 +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i (T2); + if(UseBoundCheckInstruction) { + __ pop_ptr(T3); + __ dsll(T2, T2, Address::times_8); + __ daddu(T2, T3, T2); + __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base + + __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, T3, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound + + __ warn("dastore Unimplemented yet"); + __ gssdlec1(FSF, T2, AT); + } else { + index_check(T3, T2); + __ dsll(T2, T2, Address::times_8); + __ daddu(T3, T3, T2); + __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); + } +} + +// used register : T2, T3, T8 +// T2 : array +// T3 : subklass +// T8 : supklass +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ld(FSR, at_tos()); // Value + __ lw(SSR, at_tos_p1()); // Index + __ ld(T2, at_tos_p2()); // Array + + // index_check(T2, SSR); + index_check_without_pop(T2, SSR); + // do array store check - check for NULL value first + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Move subklass into T3 + //add for compressedoops + __ load_klass(T3, FSR); + // Move superklass into T8 + //add for compressedoops + __ load_klass(T8, T2); + __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); + // Compress array+index*4+12 into a single register. T2 + __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); + __ daddu(T2, T2, AT); + __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Generate subtype check. + // Superklass in T8. Subklass in T3. + __ gen_subtype_check(T8, T3, ok_is_subtype); + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ArrayStoreException_entry); + __ delayed()->nop(); + // Come here on success + __ bind(ok_is_subtype); + do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); + __ b(done); + __ delayed()->nop(); + + // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(T9); + __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); + __ daddu(T2, T2, AT); + do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); + + __ bind(done); + __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + guarantee(false, "unimplemented yet!"); + __ pop_ptr(T2); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound + + __ warn("bastore Unimplemented yet"); + __ gssble(FSR, SSR, AT); + } else { + index_check(T2, SSR); + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(T9, T2); + __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); + + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ move(AT, diffbit); + + Label L_skip; + __ andr(AT, T9, AT); + __ beq(AT, R0, L_skip); + __ delayed()->nop(); + __ andi(FSR, FSR, 0x1); + __ bind(L_skip); + + __ daddu(SSR, T2, SSR); + __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); + } +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_2); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_2); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound + + __ warn("castore Unimplemented yet"); + __ gsshle(FSR, SSR, AT); + } else { + index_check(T2, SSR); + __ dsll(SSR, SSR, Address::times_2); + __ daddu(SSR, T2, SSR); + __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); + } +} + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ sw(FSR, iaddress(n)); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ sd(FSR, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ swc1(FSF, faddress(n)); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ sdc1(FSF, laddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(FSR); + __ sd(FSR, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ daddiu(SP, SP, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + // stack: ..., a + __ load_ptr(0, FSR); + __ push_ptr(FSR); + // stack: ..., a, a +} + +// blows FSR +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(0, FSR); // load b + __ load_ptr(1, A5); // load a + __ store_ptr(1, FSR); // store b + __ store_ptr(0, A5); // store a + __ push_ptr(FSR); // push b + // stack: ..., b, a, b +} + +// blows FSR +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, FSR); // load c + __ load_ptr(2, A5); // load a + __ store_ptr(2, FSR); // store c in a + __ push_ptr(FSR); // push c + // stack: ..., c, b, c, c + __ load_ptr(2, FSR); // load b + __ store_ptr(2, A5); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, FSR); // store b in c + // stack: ..., c, a, b, c +} + +// blows FSR +void TemplateTable::dup2() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, FSR); // load a + __ push_ptr(FSR); // push a + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + // stack: ..., a, b, a, b +} + +// blows FSR +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, T2); // load c + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + __ push_ptr(T2); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, T2); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr(4, T2); // load a + __ store_ptr(2, T2); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, FSR); // store b in a + // stack: ..., b, c, a, b, c + + // stack: ..., b, c, a, b, c +} + +// blows FSR, SSR +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + // stack: ..., a, b, c, d + __ load_ptr(0, T2); // load d + __ load_ptr(1, FSR); // load c + __ push_ptr(FSR); // push c + __ push_ptr(T2); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, FSR); // load b + __ store_ptr(2, FSR); // store b in d + __ store_ptr(4, T2); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, T2); // load a + __ load_ptr(3, FSR); // load c + __ store_ptr(3, T2); // store a in c + __ store_ptr(5, FSR); // store c in a + // stack: ..., c, d, a, b, c, d + + // stack: ..., c, d, a, b, c, d +} + +// blows FSR +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + + __ load_ptr(1, A5); // load a + __ load_ptr(0, FSR); // load b + __ store_ptr(0, A5); // store a in b + __ store_ptr(1, FSR); // store b in a + + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + + __ pop_i(SSR); + switch (op) { + case add : __ addu32(FSR, SSR, FSR); break; + case sub : __ subu32(FSR, SSR, FSR); break; + case mul : __ mul(FSR, SSR, FSR); break; + case _and : __ andr(FSR, SSR, FSR); break; + case _or : __ orr(FSR, SSR, FSR); break; + case _xor : __ xorr(FSR, SSR, FSR); break; + case shl : __ sllv(FSR, SSR, FSR); break; + case shr : __ srav(FSR, SSR, FSR); break; + case ushr : __ srlv(FSR, SSR, FSR); break; + default : ShouldNotReachHere(); + } +} + +// the result stored in FSR, SSR, +// used registers : T2, T3 +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(T2); + + switch (op) { + case add : __ daddu(FSR, T2, FSR); break; + case sub : __ dsubu(FSR, T2, FSR); break; + case _and: __ andr(FSR, T2, FSR); break; + case _or : __ orr(FSR, T2, FSR); break; + case _xor: __ xorr(FSR, T2, FSR); break; + default : ShouldNotReachHere(); + } +} + +// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, +// the result is 0x80000000 +// the godson2 cpu do the same, so we need not handle this specially like x86 +void TemplateTable::idiv() { + transition(itos, itos); + Label not_zero; + + __ bne(FSR, R0, not_zero); + __ delayed()->nop(); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + __ bind(not_zero); + + __ pop_i(SSR); + if (UseLEXT1) { + __ gsdiv(FSR, SSR, FSR); + } else { + __ div(SSR, FSR); + __ mflo(FSR); + } +} + +void TemplateTable::irem() { + transition(itos, itos); + Label not_zero; + __ pop_i(SSR); + __ div(SSR, FSR); + + __ bne(FSR, R0, not_zero); + __ delayed()->nop(); + //__ brk(7); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(not_zero); + __ mfhi(FSR); +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(T2); + if (UseLEXT1) { + __ gsdmult(FSR, T2, FSR); + } else { + __ dmult(T2, FSR); + __ mflo(FSR); + } +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::ldiv() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + __ delayed()->nop(); + + //__ brk(7); //generate FPE + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(normal); + __ pop_l(A2); + if (UseLEXT1) { + __ gsddiv(FSR, A2, FSR); + } else { + __ ddiv(A2, FSR); + __ mflo(FSR); + } +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::lrem() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + __ delayed()->nop(); + + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(normal); + __ pop_l (A2); + + if (UseLEXT1) { + __ gsdmod(FSR, A2, FSR); + } else { + __ ddiv(A2, FSR); + __ mfhi(FSR); + } +} + +// result in FSR +// used registers : T0 +void TemplateTable::lshl() { + transition(itos, ltos); + __ pop_l(T0); + __ dsllv(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lshr() { + transition(itos, ltos); + __ pop_l(T0); + __ dsrav(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lushr() { + transition(itos, ltos); + __ pop_l(T0); + __ dsrlv(FSR, T0, FSR); +} + +// result in FSF +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: + __ lwc1(FTF, at_sp()); + __ add_s(FSF, FTF, FSF); + break; + case sub: + __ lwc1(FTF, at_sp()); + __ sub_s(FSF, FTF, FSF); + break; + case mul: + __ lwc1(FTF, at_sp()); + __ mul_s(FSF, FTF, FSF); + break; + case div: + __ lwc1(FTF, at_sp()); + __ div_s(FSF, FTF, FSF); + break; + case rem: + __ mov_s(F13, FSF); + __ lwc1(F12, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + break; + default : ShouldNotReachHere(); + } + + __ daddiu(SP, SP, 1 * wordSize); +} + +// result in SSF||FSF +// i dont handle the strict flags +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: + __ ldc1(FTF, at_sp()); + __ add_d(FSF, FTF, FSF); + break; + case sub: + __ ldc1(FTF, at_sp()); + __ sub_d(FSF, FTF, FSF); + break; + case mul: + __ ldc1(FTF, at_sp()); + __ mul_d(FSF, FTF, FSF); + break; + case div: + __ ldc1(FTF, at_sp()); + __ div_d(FSF, FTF, FSF); + break; + case rem: + __ mov_d(F13, FSF); + __ ldc1(F12, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + break; + default : ShouldNotReachHere(); + } + + __ daddiu(SP, SP, 2 * wordSize); +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ subu32(FSR, R0, FSR); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ dsubu(FSR, R0, FSR); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ neg_s(FSF, FSF); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ neg_d(FSF, FSF); +} + +// used registers : T2 +void TemplateTable::iinc() { + transition(vtos, vtos); + locals_index(T2); + __ lw(FSR, T2, 0); + __ lb(AT, at_bcp(2)); // get constant + __ daddu(FSR, FSR, AT); + __ sw(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(T2); + __ get_2_byte_integer_at_bcp(FSR, AT, 4); + __ hswap(FSR); + __ lw(AT, T2, 0); + __ daddu(FSR, AT, FSR); + __ sw(FSR, T2, 0); +} + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ sll(FSR, FSR, 0); + break; + case Bytecodes::_i2f: + __ mtc1(FSR, FSF); + __ cvt_s_w(FSF, FSF); + break; + case Bytecodes::_i2d: + __ mtc1(FSR, FSF); + __ cvt_d_w(FSF, FSF); + break; + case Bytecodes::_i2b: + __ seb(FSR, FSR); + break; + case Bytecodes::_i2c: + __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits + break; + case Bytecodes::_i2s: + __ seh(FSR, FSR); + break; + case Bytecodes::_l2i: + __ sll(FSR, FSR, 0); + break; + case Bytecodes::_l2f: + __ dmtc1(FSR, FSF); + __ cvt_s_l(FSF, FSF); + break; + case Bytecodes::_l2d: + __ dmtc1(FSR, FSF); + __ cvt_d_l(FSF, FSF); + break; + case Bytecodes::_f2i: + { + Label L; + + __ trunc_w_s(F12, FSF); + __ move(AT, 0x7fffffff); + __ mfc1(FSR, F12); + __ c_un_s(FSF, FSF); //NaN? + __ movt(FSR, R0); + + __ bne(AT, FSR, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, FSF); + __ andr(AT, AT, T9); + + __ movn(FSR, T9, AT); + + __ bind(L); + } + break; + case Bytecodes::_f2l: + { + Label L; + + __ trunc_l_s(F12, FSF); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(FSR, F12); + __ c_un_s(FSF, FSF); //NaN? + __ movt(FSR, R0); + + __ bne(AT, FSR, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, FSF); + __ andr(AT, AT, T9); + + __ dsll32(T9, T9, 0); + __ movn(FSR, T9, AT); + + __ bind(L); + } + break; + case Bytecodes::_f2d: + __ cvt_d_s(FSF, FSF); + break; + case Bytecodes::_d2i: + { + Label L; + + __ trunc_w_d(F12, FSF); + __ move(AT, 0x7fffffff); + __ mfc1(FSR, F12); + + __ bne(FSR, AT, L); + __ delayed()->mtc1(R0, F12); + + __ cvt_d_w(F12, F12); + __ c_ult_d(FSF, F12); + __ bc1f(L); + __ delayed()->addiu(T9, R0, -1); + + __ c_un_d(FSF, FSF); //NaN? + __ subu32(FSR, T9, AT); + __ movt(FSR, R0); + + __ bind(L); + } + break; + case Bytecodes::_d2l: + { + Label L; + + __ trunc_l_d(F12, FSF); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(FSR, F12); + + __ bne(FSR, AT, L); + __ delayed()->mtc1(R0, F12); + + __ cvt_d_w(F12, F12); + __ c_ult_d(FSF, F12); + __ bc1f(L); + __ delayed()->daddiu(T9, R0, -1); + + __ c_un_d(FSF, FSF); //NaN? + __ subu(FSR, T9, AT); + __ movt(FSR, R0); + + __ bind(L); + } + break; + case Bytecodes::_d2f: + __ cvt_s_d(FSF, FSF); + break; + default : + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + __ pop(T0); + __ pop(R0); + + __ slt(AT, T0, FSR); + __ slt(FSR, FSR, T0); + __ subu(FSR, FSR, AT); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + __ ori(FSR, R0, 1); + __ ori(AT, R0, 1); + + if (is_float) { + __ lwc1(FTF, at_sp()); + __ daddiu(SP, SP, 1 * wordSize); + if (unordered_result < 0) { + __ c_olt_s(FSF, FTF); + __ movf(FSR, R0); + __ c_ult_s(FTF, FSF); + } else { + __ c_ult_s(FSF, FTF); + __ movf(FSR, R0); + __ c_olt_s(FTF, FSF); + } + } else { + __ ldc1(FTF, at_sp()); + __ daddiu(SP, SP, 2 * wordSize); + if (unordered_result < 0) { + __ c_olt_d(FSF, FTF); + __ movf(FSR, R0); + __ c_ult_d(FTF, FSF); + } else { + __ c_ult_d(FSF, FTF); + __ movf(FSR, R0); + __ c_olt_d(FTF, FSF); + } + } + + __ movf(AT, R0); + __ subu(FSR, FSR, AT); +} + + +// used registers : T3, A7, Rnext +// FSR : return bci, this is defined by the vm specification +// T2 : MDO taken count +// T3 : method +// A7 : offset +// Rnext : next bytecode, this is required by dispatch_base +void TemplateTable::branch(bool is_jsr, bool is_wide) { + __ get_method(T3); + __ profile_taken_branch(A7, T2); // only C2 meaningful + + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // Load up T4 with the branch displacement + if (!is_wide) { + __ lb(A7, BCP, 1); + __ lbu(AT, BCP, 2); + __ dsll(A7, A7, 8); + __ orr(A7, A7, AT); + } else { + __ get_4_byte_integer_at_bcp(A7, AT, 1); + __ swap(A7); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occuring below. + if (is_jsr) { + // Pre-load the next target bytecode into Rnext + __ daddu(AT, BCP, A7); + __ lbu(Rnext, AT, 0); + + // compute return address as bci in FSR + __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); + __ ld(AT, T3, in_bytes(Method::const_offset())); + __ dsubu(FSR, FSR, AT); + // Adjust the bcp in BCP by the displacement in A7 + __ daddu(BCP, BCP, A7); + // jsr returns atos that is not an oop + // Push return address + __ push_i(FSR); + // jsr returns vtos + __ dispatch_only_noverify(vtos); + + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp in S0 by the displacement in T4 + __ daddu(BCP, BCP, A7); + + assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // T3: method + // T4: target offset + // BCP: target bcp + // LVP: locals pointer + __ bgtz(A7, dispatch); // check if forward or backward branch + __ delayed()->nop(); + + // check if MethodCounters exists + Label has_counters; + __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ bne(AT, R0, has_counters); + __ delayed()->nop(); + __ push(T3); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), + T3); + __ pop(T3); + __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ beq(AT, R0, dispatch); + __ delayed()->nop(); + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + if (ProfileInterpreter) { + // Are we profiling? + __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); + __ beq(T0, R0, no_mdo); + __ delayed()->nop(); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + __ beq(R0, R0, dispatch); + __ delayed()->nop(); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ld(T0, Address(T3, Method::method_counters_offset())); + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + if (!UseOnStackReplacement) { + __ bind(backedge_counter_overflow); + } + } else { + // increment back edge counter + __ ld(T1, T3, in_bytes(Method::method_counters_offset())); + __ lw(T0, T1, in_bytes(be_offset)); + __ increment(T0, InvocationCounter::count_increment); + __ sw(T0, T1, in_bytes(be_offset)); + + // load invocation counter + __ lw(T1, T1, in_bytes(inv_offset)); + // buffer bit added, mask no needed + + // dadd backedge counter & invocation counter + __ daddu(T1, T1, T0); + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + // T1 : backedge counter & invocation counter + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { + __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ lw(AT, AT, 0); + __ slt(AT, T1, AT); + } + + __ bne(AT, R0, dispatch); + __ delayed()->nop(); + + // if no method data exists, go to profile method + __ test_method_data_pointer(T1, profile_method); + + if (UseOnStackReplacement) { + if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { + __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ lw(AT, AT, 0); + __ slt(AT, T2, AT); + } + + __ bne(AT, R0, dispatch); + __ delayed()->nop(); + + // When ProfileInterpreter is on, the backedge_count comes + // from the methodDataOop, which value does not get reset on + // the call to frequency_counter_overflow(). + // To avoid excessive calls to the overflow routine while + // the method is being compiled, dadd a second test to make + // sure the overflow function is called only once every + // overflow_frequency. + const int overflow_frequency = 1024; + __ andi(AT, T2, overflow_frequency-1); + __ beq(AT, R0, backedge_counter_overflow); + __ delayed()->nop(); + } + } else { + if (UseOnStackReplacement) { + // check for overflow against AT, which is the sum of the counters + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ lw(AT, AT, 0); + __ slt(AT, T1, AT); + __ beq(AT, R0, backedge_counter_overflow); + __ delayed()->nop(); + } + } + } + __ bind(dispatch); + } + + // Pre-load the next target bytecode into Rnext + __ lbu(Rnext, BCP, 0); + + // continue with the bytecode @ target + // FSR: return bci for jsr's, unused otherwise + // Rnext: target bytecode + // BCP: target bcp + __ dispatch_only(vtos, true); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + __ delayed()->nop(); + } + + if (UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ subu(A7, BCP, A7); // branch bcp + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), A7); + + // V0: osr nmethod (osr ok) or NULL (osr not possible) + // V1: osr adapter frame return address + // LVP: locals pointer + // BCP: bcp + __ beq(V0, R0, dispatch); + __ delayed()->nop(); + // nmethod may have been invalidated (VM may block upon call_VM return) + __ lb(T3, V0, nmethod::state_offset()); + __ move(AT, nmethod::in_use); + __ bne(AT, T3, dispatch); + __ delayed()->nop(); + + // We have the address of an on stack replacement routine in rax. + // In preparation of invoking it, first we must migrate the locals + // and monitors from off the interpreter frame on the stack. + // Ensure to save the osr nmethod over the migration call, + // it will be preserved in Rnext. + __ move(Rnext, V0); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // V0 is OSR buffer, move it to expected parameter location + // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp + __ move(T0, V0); + + // pop the interpreter frame + __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + //FIXME, shall we keep the return address on the stack? + __ leave(); // remove frame anchor + __ move(LVP, RA); + __ move(SP, A7); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP , SP , AT); + + // push the (possibly adjusted) return address + //refer to osr_entry in c1_LIRAssembler_mips.cpp + __ ld(AT, Rnext, nmethod::osr_entry_point_offset()); + __ jr(AT); + __ delayed()->nop(); + } + } +} + + +void TemplateTable::if_0cmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + case less: + __ bgez(FSR, not_taken); + break; + case less_equal: + __ bgtz(FSR, not_taken); + break; + case greater: + __ blez(FSR, not_taken); + break; + case greater_equal: + __ bltz(FSR, not_taken); + break; + } + __ delayed()->nop(); + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + + __ pop_i(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + case less: + __ slt(AT, SSR, FSR); + __ beq(AT, R0, not_taken); + break; + case less_equal: + __ slt(AT, FSR, SSR); + __ bne(AT, R0, not_taken); + break; + case greater: + __ slt(AT, FSR, SSR); + __ beq(AT, R0, not_taken); + break; + case greater_equal: + __ slt(AT, SSR, FSR); + __ bne(AT, R0, not_taken); + break; + } + __ delayed()->nop(); + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + default: + ShouldNotReachHere(); + } + __ delayed()->nop(); + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + // __ lw(SSR, SP, 0); + __ pop_ptr(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + default: + ShouldNotReachHere(); + } + __ delayed()->nop(); + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::ret() { + transition(vtos, vtos); + + locals_index(T2); + __ ld(T2, T2, 0); + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld(BCP, T1, in_bytes(Method::const_offset())); + __ daddu(BCP, BCP, T2); + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos, 0, true); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::wide_ret() { + transition(vtos, vtos); + + locals_index_wide(T2); + __ ld(T2, T2, 0); // get return bci, compute return bcp + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld(BCP, T1, in_bytes(Method::const_offset())); + __ daddu(BCP, BCP, T2); + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos, 0, true); +} + +// used register T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : low +// A7 : high +// Rnext : dest bytecode, required by dispatch_base +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + + // align BCP + __ daddiu(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // load lo & hi + __ lw(T3, T2, 1 * BytesPerInt); + __ swap(T3); + __ lw(A7, T2, 2 * BytesPerInt); + __ swap(A7); + + // check against lo & hi + __ slt(AT, FSR, T3); + __ bne(AT, R0, default_case); + __ delayed()->nop(); + + __ slt(AT, A7, FSR); + __ bne(AT, R0, default_case); + __ delayed()->nop(); + + // lookup dispatch offset, in A7 big endian + __ dsubu(FSR, FSR, T3); + __ dsll(AT, FSR, Address::times_4); + __ daddu(AT, T2, AT); + __ lw(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(FSR, T9, T3); + + __ bind(continue_execution); + __ swap(A7); + __ daddu(BCP, BCP, A7); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); + + // handle default + __ bind(default_case); + __ profile_switch_default(FSR); + __ lw(A7, T2, 0); + __ b(continue_execution); + __ delayed()->nop(); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +// used registers : T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : pair index +// A7 : offset +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + + // swap FSR so we can avoid swapping the table entries + __ swap(FSR); + + // align BCP + __ daddiu(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // set counter + __ lw(T3, T2, BytesPerInt); + __ swap(T3); + __ b(loop_entry); + __ delayed()->nop(); + + // table search + __ bind(loop); + // get the entry value + __ dsll(AT, T3, Address::times_8); + __ daddu(AT, T2, AT); + __ lw(AT, AT, 2 * BytesPerInt); + + // found? + __ beq(FSR, AT, found); + __ delayed()->nop(); + + __ bind(loop_entry); + __ bgtz(T3, loop); + __ delayed()->daddiu(T3, T3, -1); + + // default case + __ profile_switch_default(FSR); + __ lw(A7, T2, 0); + __ b(continue_execution); + __ delayed()->nop(); + + // entry found -> get offset + __ bind(found); + __ dsll(AT, T3, Address::times_8); + __ daddu(AT, T2, AT); + __ lw(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(T3, FSR, T2); + + // continue execution + __ bind(continue_execution); + __ swap(A7); + __ daddu(BCP, BCP, A7); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); +} + +// used registers : T0, T1, T2, T3, A7, Rnext +// T2 : pairs address(array) +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // register allocation + const Register array = T2; + const Register i = T3, j = A7; + const Register h = T1; + const Register temp = T0; + const Register key = FSR; + + // setup array + __ daddiu(array, BCP, 3*BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(array, array, AT); + + // initialize i & j + __ move(i, R0); + __ lw(j, array, - 1 * BytesPerInt); + // Convert j into native byteordering + __ swap(j); + + // and start + Label entry; + __ b(entry); + __ delayed()->nop(); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ daddu(h, i, j); + __ dsrl(h, h, 1); + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ dsll(AT, h, Address::times_8); + __ daddu(AT, array, AT); + __ lw(temp, AT, 0 * BytesPerInt); + __ swap(temp); + + __ slt(AT, key, temp); + __ movz(i, h, AT); + __ movn(j, h, AT); + + // while (i+1 < j) + __ bind(entry); + __ daddiu(h, i, 1); + __ slt(AT, h, j); + __ bne(AT, R0, loop); + __ delayed()->nop(); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ dsll(AT, i, Address::times_8); + __ daddu(AT, array, AT); + __ lw(temp, AT, 0 * BytesPerInt); + __ swap(temp); + __ bne(key, temp, default_case); + __ delayed()->nop(); + + // entry found -> j = offset + __ dsll(AT, i, Address::times_8); + __ daddu(AT, array, AT); + __ lw(j, AT, 1 * BytesPerInt); + __ profile_switch_case(i, key, array); + __ swap(j); + + __ daddu(BCP, BCP, j); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ lw(j, array, - 2 * BytesPerInt); + __ swap(j); + __ daddu(BCP, BCP, j); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos, true); +} + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + __ ld(T1, aaddress(0)); + __ load_klass(LVP, T1); + __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); + __ move(AT, JVM_ACC_HAS_FINALIZER); + __ andr(AT, AT, LVP); + Label skip_register_finalizer; + __ beq(AT, R0, skip_register_finalizer); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::register_finalizer), T1); + __ bind(skip_register_finalizer); + } + + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { + Label no_safepoint; + NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); + __ lb(AT, thread, in_bytes(Thread::polling_page_offset())); + __ andi(AT, AT, SafepointMechanism::poll_bit()); + __ beq(AT, R0, no_safepoint); + __ delayed()->nop(); + __ push(state); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::at_safepoint)); + __ pop(state); + __ bind(no_safepoint); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(FSR); + } + + __ remove_activation(state, T9); + __ sync(); + + __ jr(T9); + __ delayed()->nop(); +} + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. +void TemplateTable::volatile_barrier() { + if(os::is_MP()) __ sync(); +} + +// we dont shift left 2 bits in get_cache_and_index_at_bcp +// for we always need shift the index we use it. the ConstantPoolCacheEntry +// is 16-byte long, index is the index in +// ConstantPoolCache, so cache + base_offset() + index * 16 is +// the corresponding ConstantPoolCacheEntry +// used registers : T2 +// NOTE : the returned index need also shift left 4 to get the address! +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + const Register temp = A1; + assert_different_registers(Rcache, index); + + Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + default: break; + } + + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + // is resolved? + int i = (int)code; + __ addiu(temp, temp, -i); + __ beq(temp, R0, resolved); + __ delayed()->nop(); + + // resolve first time through + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + + __ move(temp, i); + __ call_VM(NOREG, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + __ bind(resolved); +} + +// The Rcache and index registers must be set before call +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); + // Flags + __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); + + // klass overwrite register + if (is_static) { + __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld(obj, Address(obj, mirror_offset)); + + __ resolve_oop_handle(obj, T9); + } +} + +// get the method, itable_index and flags of the current invoke +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = T3; + const Register index = T1; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); + // determine constant pool cache field offsets + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + + //assert(wordSize == 8, "adjust code below"); + // note we shift 4 not 2, for we get is the true inde + // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ ld(method, AT, method_offset); + + if (itable_index != NOREG) { + __ ld(itable_index, AT, index_offset); + } + __ ld(flags, AT, flags_offset); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + // kill FSR + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + assert_different_registers(cache, index, AT); + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ lw(AT, AT, 0); + __ beq(AT, R0, L1); + __ delayed()->nop(); + + __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); + + // cache entry pointer + __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); + __ shl(tmp3, LogBytesPerWord); + __ daddu(tmp2, tmp2, tmp3); + if (is_static) { + __ move(tmp1, R0); + } else { + __ ld(tmp1, SP, 0); + __ verify_oop(tmp1); + } + // tmp1: object pointer or NULL + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +// used registers : T1, T2, T3, T1 +// T1 : flags +// T2 : off +// T3 : obj +// T1 : field address +// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the +// following mapping to the TosState states: +// btos: 0 +// ctos: 1 +// stos: 2 +// itos: 3 +// ltos: 4 +// ftos: 5 +// dtos: 6 +// atos: 7 +// vtos: 8 +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + if (!is_static) pop_and_check_object(obj); + __ daddu(index, obj, off); + + const Address field(index, 0); + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + __ delayed()->nop(); + + // btos + __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); + __ push(btos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + + __ bind(notByte); + __ move(AT, ztos); + __ bne(flags, AT, notBool); + __ delayed()->nop(); + + // ztos + __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); + __ push(ztos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + + __ bind(notBool); + __ move(AT, itos); + __ bne(flags, AT, notInt); + __ delayed()->nop(); + + // itos + __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); + __ push(itos); + + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notInt); + __ move(AT, atos); + __ bne(flags, AT, notObj); + __ delayed()->nop(); + + // atos + //add for compressedoops + do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); + __ push(atos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notObj); + __ move(AT, ctos); + __ bne(flags, AT, notChar); + __ delayed()->nop(); + + // ctos + __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); + __ push(ctos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notChar); + __ move(AT, stos); + __ bne(flags, AT, notShort); + __ delayed()->nop(); + + // stos + __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); + __ push(stos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notShort); + __ move(AT, ltos); + __ bne(flags, AT, notLong); + __ delayed()->nop(); + + // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 + // ltos + __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); + __ push(ltos); + + // Don't rewrite to _fast_lgetfield for potential volatile case. + __ b(Done); + __ delayed()->nop(); + + __ bind(notLong); + __ move(AT, ftos); + __ bne(flags, AT, notFloat); + __ delayed()->nop(); + + // ftos + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + __ push(ftos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notFloat); + __ move(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); + __ delayed()->nop(); +#endif + + // dtos + __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); + __ push(dtos); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); + } + +#ifdef ASSERT + __ b(Done); + __ delayed()->nop(); + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + //kill AT, T1, T2, T3, T9 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T9; + assert_different_registers(cache, index, tmp4); + + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ lw(AT, AT, 0); + __ beq(AT, R0, L1); + __ delayed()->nop(); + + __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); + + if (is_static) { + __ move(tmp1, R0); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + Label two_word, valsize_known; + __ dsll(AT, tmp4, Address::times_8); + __ daddu(AT, tmp2, AT); + __ ld(tmp3, AT, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset())); + __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); + + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ move(tmp1, SP); + __ move(AT, ltos); + __ beq(tmp3, AT, two_word); + __ delayed()->nop(); + __ move(AT, dtos); + __ beq(tmp3, AT, two_word); + __ delayed()->nop(); + __ b(valsize_known); + __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); + + __ bind(two_word); + __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); + + __ bind(valsize_known); + // setup object pointer + __ ld(tmp1, tmp1, 0*wordSize); + } + // cache entry pointer + __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); + __ shl(tmp4, LogBytesPerWord); + __ daddu(tmp2, tmp2, tmp4); + // object (tos) + __ move(tmp3, SP); + // tmp1: object pointer set up above (NULL if static) + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +// used registers : T0, T1, T2, T3, T8 +// T1 : flags +// T2 : off +// T3 : obj +// T8 : volatile bit +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + const Register bc = T3; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + + Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + + // btos + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + __ delayed()->nop(); + + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ztos + __ bind(notByte); + __ move(AT, ztos); + __ bne(flags, AT, notBool); + __ delayed()->nop(); + + __ pop(ztos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ andi(FSR, FSR, 0x1); + __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // itos + __ bind(notBool); + __ move(AT, itos); + __ bne(flags, AT, notInt); + __ delayed()->nop(); + + __ pop(itos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // atos + __ bind(notInt); + __ move(AT, atos); + __ bne(flags, AT, notObj); + __ delayed()->nop(); + + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + + do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); + + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ctos + __ bind(notObj); + __ move(AT, ctos); + __ bne(flags, AT, notChar); + __ delayed()->nop(); + + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // stos + __ bind(notChar); + __ move(AT, stos); + __ bne(flags, AT, notShort); + __ delayed()->nop(); + + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ltos + __ bind(notShort); + __ move(AT, ltos); + __ bne(flags, AT, notLong); + __ delayed()->nop(); + + __ pop(ltos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ftos + __ bind(notLong); + __ move(AT, ftos); + __ bne(flags, AT, notFloat); + __ delayed()->nop(); + + __ pop(ftos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + + // dtos + __ bind(notFloat); + __ move(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); + __ delayed()->nop(); +#endif + + __ pop(dtos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(T9, obj, off); + __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); + } + +#ifdef ASSERT + __ b(Done); + __ delayed()->nop(); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +// used registers : T1, T2, T3 +// T1 : cp_entry +// T2 : obj +// T3 : value pointer +void TemplateTable::jvmti_post_fast_field_mod() { + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + //kill AT, T1, T2, T3, T9 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T9; + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ lw(tmp3, AT, 0); + __ beq(tmp3, R0, L2); + __ delayed()->nop(); + __ pop_ptr(tmp1); + __ verify_oop(tmp1); + __ push_ptr(tmp1); + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(FSR); break; + case Bytecodes::_fast_dputfield: __ push_d(FSF); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(FSR); break; + default: ShouldNotReachHere(); + } + __ move(tmp3, SP); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); + __ verify_oop(tmp1); + // tmp1: object pointer copied above + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; + default: break; + } + __ bind(L2); + } +} + +// used registers : T2, T3, T1 +// T2 : index & off & field address +// T3 : cache & obj +// T1 : flags +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + const Register scratch = T8; + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ sync(); + + // test for volatile with T1 + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); + + // replace index with field offset from cache entry + __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); + + Label Done; + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, T1); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(T3); + + if (bytecode() != Bytecodes::_fast_aputfield) { + // field address + __ daddu(T2, T3, T2); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_zputfield: + __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 + __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: + __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: + __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: + __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: + __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_lputfield: + __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: + __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: + __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); + break; + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +// used registers : T2, T3, T1 +// T3 : cp_entry & cache +// T2 : index & offset +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + const Register scratch = T8; + + // do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label L1; + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ lw(T3, AT, 0); + __ beq(T3, R0, L1); + __ delayed()->nop(); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(T3, T1, 1); + __ move(TSR, FSR); + __ verify_oop(FSR); + // FSR: object pointer copied above + // T3: cache entry pointer + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + FSR, T3); + __ move(FSR, TSR); + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ sync(); + + // replace index with field offset from cache entry + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // FSR: object + __ verify_oop(FSR); + __ null_check(FSR); + // field addresses + __ daddu(FSR, FSR, T2); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_bgetfield: + __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_sgetfield: + __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_cgetfield: + __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_igetfield: + __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_lgetfield: + __ stop("should not be rewritten"); + break; + case Bytecodes::_fast_fgetfield: + __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_dgetfield: + __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_agetfield: + //add for compressedoops + do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); + __ verify_oop(FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 +// used registers : T1, T2, T3, T1 +// T1 : obj & field address +// T2 : off +// T3 : cache +// T1 : index +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + const Register scratch = T8; + + // get receiver + __ ld(T1, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 2); + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ daddiu(BCP, BCP, 1); + __ null_check(T1); + __ daddu(T1, T1, T2); + + if (state == itos) { + __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); + } else if (state == atos) { + do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); + __ verify_oop(FSR); + } else if (state == ftos) { + __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); + } else { + ShouldNotReachHere(); + } + __ daddiu(BCP, BCP, -1); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) { + // implemented elsewhere + ShouldNotReachHere(); +} + +// method, index, recv, flags: T1, T2, T3, T1 +// byte_no = 2 for _invokevirtual, 1 else +// T0 : return address +// get the method & index of the invoke, and push the return address of +// the invoke(first word in the frame) +// this address is where the return code jmp to. +// NOTE : this method will set T3&T1 as recv&flags +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == T1, "error flags reg."); + assert(recv == noreg || recv == T3, "error recv reg."); + + // setup registers & access constant pool cache + if(recv == noreg) recv = T3; + if(flags == noreg) flags = T1; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); + __ andr(AT, AT, flags); + __ beq(AT, R0, L_no_push); + __ delayed()->nop(); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + Register tmp = SSR; + __ push(tmp); + __ move(tmp, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, tmp, recv); + __ pop(tmp); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + // Note: no return address pushed yet + if (load_receiver) { + __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); + __ andr(recv, flags, AT); + // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. + const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address + const int receiver_is_at_end = -1; // back off one slot to get receiver + Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + __ ld(recv, recv_addr); + __ verify_oop(recv); + } + if(save_flags) { + __ move(BCP, flags); + } + + // compute return type + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, 0xf); + + // Make sure we don't need to mask flags for tos_state_shift after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table = (address) Interpreter::invoke_return_entry_table_for(code); + __ li(AT, (long)table); + __ dsll(flags, flags, LogBytesPerWord); + __ daddu(AT, AT, flags); + __ ld(RA, AT, 0); + } + + if (save_flags) { + __ move(flags, BCP); + __ restore_bcp(); + } +} + +// used registers : T0, T3, T1, T2 +// T3 : recv, this two register using convention is by prepare_invoke +// T1 : flags, klass +// Rmethod : method, index must be Rmethod +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) { + + assert_different_registers(index, recv, flags, T2); + + // Test for an invoke of a final method + Label notFinal; + __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, flags, AT); + __ beq(AT, R0, notFinal); + __ delayed()->nop(); + + Register method = index; // method must be Rmethod + assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // the index is indeed methodOop, for this is vfinal, + // see ConstantPoolCacheEntry::set_method for more info + + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(T2); + + // T2: tmp, used for mdp + // method: callee + // T9: tmp + // is_virtual: true + __ profile_arguments_type(T2, method, T9, true); + + __ jump_from_interpreted(method, T2); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(T2, recv); + + // profile this call + __ profile_virtual_call(T2, T0, T1); + + // get target methodOop & entry point + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + __ dsll(AT, index, Address::times_ptr); + // T2: receiver + __ daddu(AT, T2, AT); + //this is a ualign read + __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); + __ profile_arguments_type(T2, method, T9, true); + __ jump_from_interpreted(method, T2); + +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); + // now recv & flags in T3, T1 + invokevirtual_helper(Rmethod, T3, T1); +} + +// T9 : entry +// Rmethod : method +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3); + // now recv & flags in T3, T1 + __ verify_oop(T3); + __ null_check(T3); + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ jump_from_interpreted(Rmethod, T9); + __ move(T0, T3); +} + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG); + + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ jump_from_interpreted(Rmethod, T9); +} + +// i have no idea what to do here, now. for future change. FIXME. +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on mips64"); +} + +// used registers : T0, T1, T2, T3, T1, A7 +// T0 : itable, vtable, entry +// T1 : interface +// T3 : receiver +// T1 : flags, klass +// Rmethod : index, method, this is required by interpreter_entry +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + //this method will use T1-T4 and T0 + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, T2, Rmethod, T3, T1); + // T2: reference klass (from f1) if interface method + // Rmethod: method (from f2) + // T3: receiver + // T1: flags + + // First check for Object case, then private interface method, + // then regular interface method. + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details. + Label notObjectMethod; + __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notObjectMethod); + __ delayed()->nop(); + + invokevirtual_helper(Rmethod, T3, T1); + // no return from above + __ bind(notObjectMethod); + + Label no_such_interface; // for receiver subtype check + Register recvKlass; // used for exception processing + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notVFinal); + __ delayed()->nop(); + + // Get receiver klass into FSR - also a null check + __ null_check(T3, oopDesc::klass_offset_in_bytes()); + __ load_klass(FSR, T3); + + Label subtype; + __ check_klass_subtype(FSR, T2, T0, subtype); + // If we get here the typecheck failed + recvKlass = T1; + __ move(recvKlass, FSR); + __ b(no_such_interface); + __ delayed()->nop(); + + __ bind(subtype); + + // do the call - rbx is actually the method to call + + __ profile_final_call(T1); + __ profile_arguments_type(T1, Rmethod, T0, true); + + __ jump_from_interpreted(Rmethod, T1); + // no return from above + __ bind(notVFinal); + + // Get receiver klass into T1 - also a null check + __ restore_locals(); + __ null_check(T3, oopDesc::klass_offset_in_bytes()); + __ load_klass(T1, T3); + + Label no_such_method; + + // Preserve method for throw_AbstractMethodErrorVerbose. + __ move(T3, Rmethod); + // Receiver subtype check against REFC. + // Superklass in T2. Subklass in T1. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, noreg, + // outputs: scan temp. reg, scan temp. reg + T0, FSR, + no_such_interface, + /*return_method=*/false); + + + // profile this call + __ restore_bcp(); + __ profile_virtual_call(T1, T0, FSR); + + // Get declaring interface class from method, and itable index + __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); + __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); + __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); + __ subu32(Rmethod, R0, Rmethod); + + // Preserve recvKlass for throw_AbstractMethodErrorVerbose. + __ move(FSR, T1); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + FSR, T2, Rmethod, + // outputs: method, scan temp. reg + Rmethod, T0, + no_such_interface); + + // Rmethod: Method* to call + // T3: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ beq(Rmethod, R0, no_such_method); + __ delayed()->nop(); + + __ profile_called_method(Rmethod, T0, T1); + __ profile_arguments_type(T1, Rmethod, T0, true); + + // do the call + // T3: receiver + // Rmethod: Method* + __ jump_from_interpreted(Rmethod, T1); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. + recvKlass = A1; + Register method = A2; + if (recvKlass != T1) { __ move(recvKlass, T1); } + if (method != T3) { __ move(method, T3); } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. + if (recvKlass != T1) { __ move(recvKlass, T1); } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); +} + + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + const Register T2_method = Rmethod; + const Register FSR_mtype = FSR; + const Register T3_recv = T3; + + prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); + //??__ verify_method_ptr(T2_method); + __ verify_oop(T3_recv); + __ null_check(T3_recv); + + // T9: MethodType object (from cpool->resolved_references[f1], if necessary) + // T2_method: MH.invokeExact_MT method (from f2) + + // Note: T9 is already pushed (if necessary) by prepare_invoke + + // FIXME: profile the LambdaForm also + __ profile_final_call(T9); + + // T8: tmp, used for mdp + // T2_method: callee + // T9: tmp + // is_virtual: true + __ profile_arguments_type(T8, T2_method, T9, true); + + __ jump_from_interpreted(T2_method, T9); +} + + void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + //const Register Rmethod = T2; + const Register T2_callsite = T2; + + prepare_invoke(byte_no, Rmethod, T2_callsite); + + // T2: CallSite object (from cpool->resolved_references[f1]) + // Rmethod: MH.linkToCallSite method (from f2) + + // Note: T2_callsite is already pushed by prepare_invoke + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ verify_oop(T2_callsite); + + __ jump_from_interpreted(Rmethod, T9); + } + +//----------------------------------------------------------------------------- +// Allocation +// T1 : tags & buffer end & thread +// T2 : object end +// T3 : klass +// T1 : object size +// A1 : cpool +// A2 : cp index +// return object in FSR +void TemplateTable::_new() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + Label allocate_shared; + + __ get_cpool_and_tags(A1, T1); + + // make sure the class we're about to instantiate has been resolved. + // Note: slow_case does a pop of stack, which is why we loaded class/pushed above + const int tags_offset = Array::base_offset_in_bytes(); + if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { + __ gslbx(AT, T1, A2, tags_offset); + } else { + __ daddu(T1, T1, A2); + __ lb(AT, T1, tags_offset); + } + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // get InstanceKlass + __ load_resolved_klass_at_index(A1, A2, T3); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); + __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // has_finalizer + __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); + __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // Allocate the instance + // 1) Try to allocate in the TLAB + // 2) if fail and the object is large allocate in the shared Eden + // 3) if the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.) + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc(); + +#ifndef OPT_THREAD + const Register thread = T8; + if (UseTLAB || allow_shared_alloc) { + __ get_thread(thread); + } +#else + const Register thread = TREG; +#endif + + if (UseTLAB) { + // get tlab_top + __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); + // get tlab_end + __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); + __ daddu(T2, FSR, T0); + __ slt(AT, AT, T2); + __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); + __ delayed()->nop(); + __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); + + if (ZeroTLAB) { + // the fields have been already cleared + __ beq(R0, R0, initialize_header); + } else { + // initialize both the header and fields + __ beq(R0, R0, initialize_object); + } + __ delayed()->nop(); + } + + // Allocation in the shared Eden , if allowed + // T0 : instance size in words + if(allow_shared_alloc){ + __ bind(allocate_shared); + + Label done, retry; + Address heap_top(T1); + __ set64(T1, (long)Universe::heap()->top_addr()); + __ ld(FSR, heap_top); + + __ bind(retry); + __ set64(AT, (long)Universe::heap()->end_addr()); + __ ld(AT, AT, 0); + __ daddu(T2, FSR, T0); + __ slt(AT, AT, T2); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // Compare FSR with the top addr, and if still equal, store the new + // top addr in T2 at the address of the top addr pointer. Sets AT if was + // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. + // + // FSR: object begin + // T2: object end + // T0: instance size in words + + // if someone beat us on the allocation, try again, otherwise continue + __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); + + __ bind(done); + + __ incr_allocated_bytes(thread, T0, 0); + } + + if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ set64(AT, - sizeof(oopDesc)); + __ daddu(T0, T0, AT); + __ beq(T0, R0, initialize_header); + __ delayed()->nop(); + + // initialize remaining object fields: T0 is a multiple of 2 + { + Label loop; + __ daddu(T1, FSR, T0); + __ daddiu(T1, T1, -oopSize); + + __ bind(loop); + __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); + __ bne(T1, FSR, loop); //dont clear header + __ delayed()->daddiu(T1, T1, -oopSize); + } + + //klass in T3, + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); + __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); + } else { + __ set64(AT, (long)markOopDesc::prototype()); + __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); + } + + __ store_klass_gap(FSR, R0); + __ store_klass(FSR, T3); + + { + SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); + // Trigger dtrace event for fastpath + __ push(atos); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); + __ pop(atos); + + } + __ b(done); + __ delayed()->nop(); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(A1); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); + + // continue + __ bind(done); + __ sync(); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ lbu(A1, at_bcp(1)); + //type, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); + __ sync(); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_2_byte_integer_at_bcp(A2, AT, 1); + __ huswap(A2); + __ get_constant_pool(A1); + // cp, index, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); + __ sync(); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); + __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); +} + +// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) +// T2 : sub klass +// T3 : cpool +// T3 : super klass +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + __ daddu(AT, T1, T2); + __ lb(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + __ delayed()->nop(); + + // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. + // Then, GC will move the object in V0 to another places in heap. + // Therefore, We should never save such an object in register. + // Instead, we should save it in the stack. It can be modified automatically by the GC thread. + // After GC, the object address in FSR is changed to a new place. + // + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + __ delayed()->nop(); + + // klass already in cp, get superklass in T3 + __ bind(quicked); + __ load_resolved_klass_at_index(T3, T2, T3); + + __ bind(resolved); + + // get subklass in T2 + //add for compressedoops + __ load_klass(T2, FSR); + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ClassCastException_entry); + __ delayed()->nop(); + + // Come here on success + __ bind(ok_is_subtype); + + // Collect counts on whether this check-cast sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ delayed()->nop(); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); + } + __ bind(done); +} + +// i use T3 as cpool, T1 as tags, T2 as index +// object always in FSR, superklass in T3, subklass in T2 +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + // get index + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + // quicked + __ daddu(AT, T1, T2); + __ lb(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + __ delayed()->nop(); + + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + __ delayed()->nop(); + + // get superklass in T3, subklass in T2 + __ bind(quicked); + __ load_resolved_klass_at_index(T3, T2, T3); + + __ bind(resolved); + // get subklass in T2 + //add for compressedoops + __ load_klass(T2, FSR); + + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + // Come here on failure + __ b(done); + __ delayed(); __ move(FSR, R0); + + // Come here on success + __ bind(ok_is_subtype); + __ move(FSR, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ beq(R0, R0, done); + __ delayed()->nop(); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // FSR = 0: obj == NULL or obj is not an instanceof the specified klass + // FSR = 1: obj != NULL and obj is an instanceof the specified klass +} + +//-------------------------------------------------------- +//-------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(A1); + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + A1, BCP); + __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal + + // post the breakpoint event + __ get_method(A1); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); + + // complete the execution of original bytecode + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(FSR); + __ jmp(Interpreter::throw_exception_entry()); + __ delayed()->nop(); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- SP = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [return addr ] <--- FP + +// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer +// object always in FSR +void TemplateTable::monitorenter() { + transition(atos, vtos); + + // check for NULL object + __ null_check(FSR); + + const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset + * wordSize); + const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); + Label allocated; + + // initialize entry pointer + __ move(c_rarg0, R0); + + // find a free slot in the monitor block (result in c_rarg0) + { + Label entry, loop, exit; + __ ld(T2, monitor_block_top); + __ b(entry); + __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + + // free slot? + __ bind(loop); + __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); + __ movz(c_rarg0, T2, AT); + + __ beq(FSR, AT, exit); + __ delayed()->nop(); + __ daddiu(T2, T2, entry_size); + + __ bind(entry); + __ bne(T3, T2, loop); + __ delayed()->nop(); + __ bind(exit); + } + + __ bne(c_rarg0, R0, allocated); + __ delayed()->nop(); + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // SP: old expression stack top + __ ld(c_rarg0, monitor_block_top); + __ daddiu(SP, SP, - entry_size); + __ daddiu(c_rarg0, c_rarg0, - entry_size); + __ sd(c_rarg0, monitor_block_top); + __ b(entry); + __ delayed(); __ move(T3, SP); + + // 2. move expression stack contents + __ bind(loop); + __ ld(AT, T3, entry_size); + __ sd(AT, T3, 0); + __ daddiu(T3, T3, wordSize); + __ bind(entry); + __ bne(T3, c_rarg0, loop); + __ delayed()->nop(); + } + + __ bind(allocated); + // Increment bcp to point to the next bytecode, + // so exception handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ daddiu(BCP, BCP, 1); + __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ lock_object(c_rarg0); + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + // The bcp has already been incremented. Just need to dispatch to next instruction. + + __ dispatch_next(vtos); +} + +// T2 : top +// c_rarg0 : entry +void TemplateTable::monitorexit() { + transition(atos, vtos); + + __ null_check(FSR); + + const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); + Label found; + + // find matching slot + { + Label entry, loop; + __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ b(entry); + __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + + __ bind(loop); + __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ beq(FSR, AT, found); + __ delayed()->nop(); + __ daddiu(c_rarg0, c_rarg0, entry_size); + __ bind(entry); + __ bne(T2, c_rarg0, loop); + __ delayed()->nop(); + } + + // error handling. Unlocking was not block-structured + Label end; + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + // c_rarg0: points to monitor entry + __ bind(found); + __ move(TSR, FSR); + __ unlock_object(c_rarg0); + __ move(FSR, TSR); + __ bind(end); +} + + +// Wide instructions +void TemplateTable::wide() { + transition(vtos, vtos); + __ lbu(Rnext, at_bcp(1)); + __ dsll(T9, Rnext, Address::times_8); + __ li(AT, (long)Interpreter::_wentry_point); + __ daddu(AT, T9, AT); + __ ld(T9, AT, 0); + __ jr(T9); + __ delayed()->nop(); +} + + +void TemplateTable::multianewarray() { + transition(vtos, atos); + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ lbu(A1, at_bcp(3)); // dimension + __ daddiu(A1, A1, -1); + __ dsll(A1, A1, Address::times_8); + __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); + __ lbu(AT, at_bcp(3)); + __ dsll(AT, AT, Address::times_8); + __ daddu(SP, SP, AT); + __ sync(); +} +#endif // !CC_INTERP diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp new file mode 100644 index 00000000000..6939914356d --- /dev/null +++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* JavaCallWrapper */ \ + /******************************/ \ + /******************************/ \ + /* JavaFrameAnchor */ \ + /******************************/ \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp new file mode 100644 index 00000000000..ac2a43edcef --- /dev/null +++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" +#include "vm_version_ext_mips.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + if (is_loongson()) { + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); + } else { + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); + } + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp new file mode 100644 index 00000000000..ffdcff06777 --- /dev/null +++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); +}; + +#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp new file mode 100644 index 00000000000..2e7b61390e4 --- /dev/null +++ b/src/hotspot/cpu/mips/vm_version_mips.cpp @@ -0,0 +1,516 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/vm_version.hpp" +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +int VM_Version::_cpuFeatures; +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; +volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; +bool VM_Version::_is_cpucfg_instruction_supported = true; +bool VM_Version::_cpu_info_is_initialized = false; + +static BufferBlob* stub_blob; +static const int stub_size = 600; + +extern "C" { + typedef void (*get_cpu_info_stub_t)(void*); +} +static get_cpu_info_stub_t get_cpu_info_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_get_cpu_info() { + assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); + StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); +# define __ _masm-> + + address start = __ pc(); + + __ enter(); + __ push(AT); + __ push(V0); + + __ li(AT, (long)0); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + + __ li(AT, 1); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + + __ li(AT, 2); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + + __ pop(V0); + __ pop(AT); + __ leave(); + __ jr(RA); + __ delayed()->nop(); +# undef __ + + return start; + }; +}; + +uint32_t VM_Version::get_feature_flags_by_cpucfg() { + uint32_t result = 0; + if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) + result |= CPU_MMI; + if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) + result |= CPU_MSA1_0; + if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) + result |= CPU_MSA2_0; + if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) + result |= CPU_CGP; + if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) + result |= CPU_LSX1; + if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) + result |= CPU_LSX2; + if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) + result |= CPU_LASX; + if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) + result |= CPU_LLSYNC; + if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) + result |= CPU_TGTSYNC; + if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) + result |= CPU_MUALP; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) + result |= CPU_LEXT1; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) + result |= CPU_LEXT2; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) + result |= CPU_LEXT3; + if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) + result |= CPU_LAMO; + if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) + result |= CPU_LPIXU; + + result |= CPU_ULSYNC; + + return result; +} + +void read_cpu_info(const char *path, char *result) { + FILE *ptr; + char buf[1024]; + int i = 0; + if((ptr=fopen(path, "r")) != NULL) { + while(fgets(buf, 1024, ptr)!=NULL) { + strcat(result,buf); + i++; + if (i == 10) break; + } + fclose(ptr); + } else { + warning("Can't detect CPU info - cannot open %s", path); + } +} + +void strlwr(char *str) { + for (; *str!='\0'; str++) + *str = tolower(*str); +} + +int VM_Version::get_feature_flags_by_cpuinfo(int features) { + assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); + + char res[10240]; + int i; + memset(res, '\0', 10240 * sizeof(char)); + read_cpu_info("/proc/cpuinfo", res); + // res is converted to lower case + strlwr(res); + + if (strstr(res, "loongson")) { + // Loongson CPU + features |= CPU_LOONGSON; + + const struct Loongson_Cpuinfo loongson_cpuinfo[] = { + {L_3A1000, "3a1000"}, + {L_3B1500, "3b1500"}, + {L_3A2000, "3a2000"}, + {L_3B2000, "3b2000"}, + {L_3A3000, "3a3000"}, + {L_3B3000, "3b3000"}, + {L_2K1000, "2k1000"}, + {L_UNKNOWN, "unknown"} + }; + + // Loongson Family + int detected = 0; + for (i = 0; i <= L_UNKNOWN; i++) { + switch (i) { + // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed + // test PRID REV in /proc/cpuinfo + // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 + // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 + case L_3A1000: + if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { + features |= CPU_LOONGSON_GS464; + detected++; + //tty->print_cr("3A1000 platform"); + } + break; + case L_3B1500: + if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { + features |= CPU_LOONGSON_GS464; + detected++; + //tty->print_cr("3B1500 platform"); + } + break; + case L_3A2000: + case L_3B2000: + case L_3A3000: + case L_3B3000: + if (strstr(res, loongson_cpuinfo[i].match_str)) { + features |= CPU_LOONGSON_GS464E; + detected++; + //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); + } + break; + case L_2K1000: + if (strstr(res, loongson_cpuinfo[i].match_str)) { + features |= CPU_LOONGSON_GS264; + detected++; + //tty->print_cr("2K1000 platform"); + } + break; + case L_UNKNOWN: + if (detected == 0) { + detected++; + //tty->print_cr("unknown Loongson platform"); + } + break; + default: + ShouldNotReachHere(); + } + } + assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); + } else { // not Loongson + // Not Loongson CPU + //tty->print_cr("MIPS platform"); + } + + if (features & CPU_LOONGSON_GS264) { + features |= CPU_LEXT1; + features |= CPU_LEXT2; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + features |= CPU_MSA1_0; + features |= CPU_LSX1; + } else if (features & CPU_LOONGSON_GS464) { + features |= CPU_LEXT1; + features |= CPU_LLSYNC; + features |= CPU_TGTSYNC; + } else if (features & CPU_LOONGSON_GS464E) { + features |= CPU_LEXT1; + features |= CPU_LEXT2; + features |= CPU_LEXT3; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + } else if (features & CPU_LOONGSON) { + // unknow loongson + features |= CPU_LLSYNC; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + } + VM_Version::_cpu_info_is_initialized = true; + + return features; +} + +void VM_Version::get_processor_features() { + + clean_cpuFeatures(); + + // test if cpucfg instruction is supported + VM_Version::_is_determine_cpucfg_supported_running = true; + __asm__ __volatile__( + ".insn \n\t" + ".word (0xc8080118)\n\t" // cpucfg zero, zero + : + : + : + ); + VM_Version::_is_determine_cpucfg_supported_running = false; + + if (supports_cpucfg()) { + get_cpu_info_stub(&_cpuid_info); + _cpuFeatures = get_feature_flags_by_cpucfg(); + // Only Loongson CPUs support cpucfg + _cpuFeatures |= CPU_LOONGSON; + } else { + _cpuFeatures = get_feature_flags_by_cpuinfo(0); + } + + _supports_cx8 = true; + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { + FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); + } + +#ifdef COMPILER2 + if (MaxVectorSize > 0) { + if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2"); + MaxVectorSize = 8; + } + if (MaxVectorSize > 0 && supports_ps()) { + MaxVectorSize = 8; + } else { + MaxVectorSize = 0; + } + } + // + // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. + // Vector optimization was closed by default. + // The reasons: + // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. + // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. + // + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = 0; + } + +#endif + + if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 1000); + } + } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 2000); + } + } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 3000); + } + } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 4000); + } + } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } else { + assert(false, "Should Not Reach Here, what is the cpu type?"); + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } + + if (supports_lext1()) { + if (FLAG_IS_DEFAULT(UseLEXT1)) { + FLAG_SET_DEFAULT(UseLEXT1, true); + } + } else if (UseLEXT1) { + warning("LEXT1 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT1, false); + } + + if (supports_lext2()) { + if (FLAG_IS_DEFAULT(UseLEXT2)) { + FLAG_SET_DEFAULT(UseLEXT2, true); + } + } else if (UseLEXT2) { + warning("LEXT2 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT2, false); + } + + if (supports_lext3()) { + if (FLAG_IS_DEFAULT(UseLEXT3)) { + FLAG_SET_DEFAULT(UseLEXT3, true); + } + } else if (UseLEXT3) { + warning("LEXT3 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT3, false); + } + + if (UseLEXT2) { + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { + FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); + } + } else if (UseCountTrailingZerosInstructionMIPS64) { + if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) + warning("ctz/dctz instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); + } + + if (TieredCompilation) { + if (!FLAG_IS_DEFAULT(TieredCompilation)) + warning("TieredCompilation not supported"); + FLAG_SET_DEFAULT(TieredCompilation, false); + } + + char buf[256]; + bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); + + // A note on the _features_string format: + // There are jtreg tests checking the _features_string for various properties. + // For some strange reason, these tests require the string to contain + // only _lowercase_ characters. Keep that in mind when being surprised + // about the unusual notation of features - and when adding new ones. + // Features may have one comma at the end. + // Furthermore, use one, and only one, separator space between features. + // Multiple spaces are considered separate tokens, messing up everything. + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", + (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), + (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), + (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), + (is_gs264() ? ", gs264 (2k1000)" : ""), + (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), + (supports_dsp() ? ", dsp" : ""), + (supports_ps() ? ", ps" : ""), + (supports_3d() ? ", 3d" : ""), + (supports_mmi() ? ", mmi" : ""), + (supports_msa1_0() ? ", msa1_0" : ""), + (supports_msa2_0() ? ", msa2_0" : ""), + (supports_lsx1() ? ", lsx1" : ""), + (supports_lsx2() ? ", lsx2" : ""), + (supports_lasx() ? ", lasx" : ""), + (supports_lext1() ? ", lext1" : ""), + (supports_lext2() ? ", lext2" : ""), + (supports_lext3() ? ", lext3" : ""), + (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), + (supports_lamo() ? ", lamo" : ""), + (supports_lpixu() ? ", lpixu" : ""), + (needs_llsync() ? ", llsync" : ""), + (needs_tgtsync() ? ", tgtsync": ""), + (needs_ulsync() ? ", ulsync": ""), + (supports_mualp() ? ", mualp" : ""), + UseSyncLevel); + _features_str = strdup(buf); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { + warning("SHA intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + } + + if (UseCRC32Intrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + warning("CRC32Intrinsics instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + } + + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + warning("CRC32CIntrinsics instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + } + + if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } +#endif + + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } + + UNSUPPORTED_OPTION(CriticalJNINatives); +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); + } + CodeBuffer c(stub_blob); + VM_Version_StubGenerator g(&c); + get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, + g.generate_get_cpu_info()); + + get_processor_features(); +} diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp new file mode 100644 index 00000000000..733a0af295a --- /dev/null +++ b/src/hotspot/cpu/mips/vm_version_mips.hpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP +#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP + +#include "runtime/abstract_vm_version.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version: public Abstract_VM_Version { +public: + + union Loongson_Cpucfg_Id1 { + uint32_t value; + struct { + uint32_t FP_CFG : 1, + FPREV : 3, + MMI : 1, + MSA1 : 1, + MSA2 : 1, + CGP : 1, + WRP : 1, + LSX1 : 1, + LSX2 : 1, + LASX : 1, + R6FXP : 1, + R6CRCP : 1, + R6FPP : 1, + CNT64 : 1, + LSLDR0 : 1, + LSPREF : 1, + LSPREFX : 1, + LSSYNCI : 1, + LSUCA : 1, + LLSYNC : 1, + TGTSYNC : 1, + LLEXC : 1, + SCRAND : 1, + MUALP : 1, + KMUALEn : 1, + ITLBT : 1, + LSUPERF : 1, + SFBP : 1, + CDMAP : 1, + : 1; + } bits; + }; + + union Loongson_Cpucfg_Id2 { + uint32_t value; + struct { + uint32_t LEXT1 : 1, + LEXT2 : 1, + LEXT3 : 1, + LSPW : 1, + LBT1 : 1, + LBT2 : 1, + LBT3 : 1, + LBTMMU : 1, + LPMP : 1, + LPMRev : 3, + LAMO : 1, + LPIXU : 1, + LPIXNU : 1, + LVZP : 1, + LVZRev : 3, + LGFTP : 1, + LGFTRev : 3, + LLFTP : 1, + LLFTRev : 3, + LCSRP : 1, + DISBLKLY : 1, + : 3; + } bits; + }; + +protected: + + enum { + CPU_LOONGSON = (1 << 1), + CPU_LOONGSON_GS464 = (1 << 2), + CPU_LOONGSON_GS464E = (1 << 3), + CPU_LOONGSON_GS264 = (1 << 4), + CPU_MMI = (1 << 11), + CPU_MSA1_0 = (1 << 12), + CPU_MSA2_0 = (1 << 13), + CPU_CGP = (1 << 14), + CPU_LSX1 = (1 << 15), + CPU_LSX2 = (1 << 16), + CPU_LASX = (1 << 17), + CPU_LEXT1 = (1 << 18), + CPU_LEXT2 = (1 << 19), + CPU_LEXT3 = (1 << 20), + CPU_LAMO = (1 << 21), + CPU_LPIXU = (1 << 22), + CPU_LLSYNC = (1 << 23), + CPU_TGTSYNC = (1 << 24), + CPU_ULSYNC = (1 << 25), + CPU_MUALP = (1 << 26), + + //////////////////////add some other feature here////////////////// + } cpuFeatureFlags; + + enum Loongson_Family { + L_3A1000 = 0, + L_3B1500 = 1, + L_3A2000 = 2, + L_3B2000 = 3, + L_3A3000 = 4, + L_3B3000 = 5, + L_2K1000 = 6, + L_UNKNOWN = 7 + }; + + struct Loongson_Cpuinfo { + Loongson_Family id; + const char* const match_str; + }; + + static int _cpuFeatures; + static const char* _features_str; + static volatile bool _is_determine_cpucfg_supported_running; + static bool _is_cpucfg_instruction_supported; + static bool _cpu_info_is_initialized; + + struct CpuidInfo { + uint32_t cpucfg_info_id0; + Loongson_Cpucfg_Id1 cpucfg_info_id1; + Loongson_Cpucfg_Id2 cpucfg_info_id2; + uint32_t cpucfg_info_id3; + uint32_t cpucfg_info_id4; + uint32_t cpucfg_info_id5; + uint32_t cpucfg_info_id6; + uint32_t cpucfg_info_id8; + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + static uint32_t get_feature_flags_by_cpucfg(); + static int get_feature_flags_by_cpuinfo(int features); + static void get_processor_features(); + +public: + // Offsets for cpuid asm stub + static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } + static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } + static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } + static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } + static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } + static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } + static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } + static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } + + static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } + + static void clean_cpuFeatures() { _cpuFeatures = 0; } + + // Initialization + static void initialize(); + + static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } + + static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } + static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } + + static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } + static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } + static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } + static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } + static bool supports_dsp() { return 0; /*not supported yet*/} + static bool supports_ps() { return 0; /*not supported yet*/} + static bool supports_3d() { return 0; /*not supported yet*/} + static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } + static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } + static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } + static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } + static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } + static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } + static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } + static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } + static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } + static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } + static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } + static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } + static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } + static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } + static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } + static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } + + //mips has no such instructions, use ll/sc instead + static bool supports_compare_and_exchange() { return false; } + + static const char* cpu_features() { return _features_str; } + +}; + +#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp new file mode 100644 index 00000000000..86bd74d4305 --- /dev/null +++ b/src/hotspot/cpu/mips/vmreg_mips.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp new file mode 100644 index 00000000000..8ccc8c513c8 --- /dev/null +++ b/src/hotspot/cpu/mips/vmreg_mips.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP +#define CPU_MIPS_VM_VMREG_MIPS_HPP + +inline Register as_Register() { + assert( is_Register(), "must be"); + return ::as_Register(value() >> 1); +} + +inline FloatRegister as_FloatRegister() { + assert( is_FloatRegister(), "must be" ); + assert( is_even(value()), "must be" ); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); + if(is_Register()) return true; + if(is_FloatRegister()) return true; + assert(false, "what register?"); + return false; +} + +#endif // CPU_MIPS_VM_VMREG_MIPS_HPP diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp new file mode 100644 index 00000000000..12ad7361aa5 --- /dev/null +++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp new file mode 100644 index 00000000000..75c23e80887 --- /dev/null +++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_mips.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klass.inline.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_mips.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); +#endif + +// used by compiler only; reciever in T0. +// used registers : +// Rmethod : receiver klass & method +// NOTE: If this code is used by the C1, the receiver_location is always 0. +// when reach here, receiver in T0, klass in T8 +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li + // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. + const int index_dependent_slop = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + Register t1 = T8, t2 = Rmethod; +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + start_pc = __ pc(); + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ lw(t1, AT , 0); + __ addiu(t1, t1, 1); + __ sw(t1, AT,0); + } +#endif + + // get receiver (need to skip return address on top of stack) + //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); + + // get receiver klass + address npe_addr = __ pc(); + //add for compressedoops + __ load_klass(t1, T0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ lw(t2, t1, in_bytes(Klass::vtable_length_offset())); + assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); + __ move(AT, vtable_index*vtableEntry::size()); + __ slt(AT, AT, t2); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ move(A2, vtable_index); + __ move(A1, A0); + + // VTABLE TODO: find upper bound for call_VM length. + start_pc = __ pc(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); + const ptrdiff_t estimate = 512; + const ptrdiff_t codesize = __ pc() - start_pc; + slop_delta = estimate - codesize; // call_VM varies in length, depending on data + assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); + __ bind(L); + } +#endif // PRODUCT + const Register method = Rmethod; + + // load methodOop and target address + start_pc = __ pc(); + // lookup_virtual_method generates 18 instructions (worst case) + __ lookup_virtual_method(t1, vtable_index, method); + slop_delta = 18*BytesPerInstWord - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ beq(method, R0, L); + __ delayed()->nop(); + __ ld(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("Vtable entry is NULL"); + __ bind(L); + } +#endif // PRODUCT + + // T8: receiver klass + // T0: receiver + // Rmethod: methodOop + // T9: entry + address ame_addr = __ pc(); + __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); + __ jr(T9); + __ delayed()->nop(); + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); + + return s; +} + + +// used registers : +// T1 T2 +// when reach here, the receiver in T0, klass in T1 +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler *masm = new MacroAssembler(&cb); + + // we T8,T9 as temparary register, they are free from register allocator + Register t1 = T8, t2 = T2; + // Entry arguments: + // T1: Interface + // T0: Receiver + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + start_pc = __ pc(); + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ lw(T8, AT, 0); + __ addiu(T8, T8,1); + __ sw(T8, AT, 0); + } +#endif // PRODUCT + + const Register holder_klass_reg = T1; // declaring interface klass (DECC) + const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) + + const Register icholder_reg = T1; + __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); + + Label L_no_such_interface; + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(t1, T0); + { + // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. + // No dynamic code size variance here, so slop_bytes is not needed. + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ daddiu(t2, t1, base); + __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); + __ dsll(AT, AT, Address::times_8); + __ daddu(t2, t2, AT); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + __ bind(entry); + + // Check that the entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ beq(AT, R0, L_no_such_interface); + __ delayed()->nop(); + + __ bne(AT, resolved_klass_reg, entry); + __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); + + } + + // add for compressedoops + __ load_klass(t1, T0); + // compute itable entry offset (in words) + const int base = in_bytes(Klass::vtable_start_offset()); + __ daddiu(t2, t1, base); + __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); + __ dsll(AT, AT, Address::times_8); + __ daddu(t2, t2, AT); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + __ bind(entry); + + // Check that the entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ beq(AT, R0, L_no_such_interface); + __ delayed()->nop(); + + __ bne(AT, holder_klass_reg, entry); + __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); + + // We found a hit, move offset into T9 + __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); + + // Compute itableMethodEntry. + const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + + itableMethodEntry::method_offset_in_bytes(); + + // Get methodOop and entrypoint for compiler + const Register method = Rmethod; + __ dsll(AT, t2, Address::times_1); + __ addu(AT, AT, t1 ); + start_pc = __ pc(); + __ set64(t1, method_offset); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ addu(AT, AT, t1 ); + __ ld_ptr(method, AT, 0); + +#ifdef ASSERT + if (DebugVtables) { + Label L1; + __ beq(method, R0, L1); + __ delayed()->nop(); + __ ld(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L1); + __ delayed()->nop(); + __ stop("methodOop is null"); + __ bind(L1); + } +#endif // ASSERT + + // Rmethod: methodOop + // T0: receiver + // T9: entry point + address ame_addr = __ pc(); + __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); + __ jr(T9); + __ delayed()->nop(); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + start_pc = __ pc(); + __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub()); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ jr(T9); + __ delayed()->nop(); + + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); + + return s; +} + +// NOTE : whenever you change the code above, dont forget to change the const here +int VtableStub::pd_code_alignment() { + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index 847f7d61d2f..f5709460901 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { Bytecodes::Code code = op->bytecode(); @@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L __ bind(skip); } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp index d34ea45c0bd..f6b6dbdee39 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp @@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ move(temp, addr); } - -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { LIR_Opr tmp = FrameMap::R0_opr; __ load(new LIR_Address(base, disp, T_INT), tmp, info); - __ cmp(condition, tmp, c); + __ cmp_branch(condition, tmp, c, T_INT, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, - int disp, BasicType type, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { LIR_Opr tmp = FrameMap::R0_opr; __ load(new LIR_Address(base, disp, type), tmp, info); - __ cmp(condition, reg, tmp); + __ cmp_branch(condition, reg, tmp, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { assert(left != result, "should be different registers"); diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp index ef9b0833d38..c6b25bf10ed 100644 --- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp @@ -62,3 +62,24 @@ void LIR_Address::verify() const { #endif } #endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index 897be2209e2..0c27cc20f36 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); @@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L } } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp index ae297ac6357..c786803e0fa 100644 --- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp @@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { LIR_Opr scratch = FrameMap::Z_R1_opr; __ load(new LIR_Address(base, disp, T_INT), scratch, info); - __ cmp(condition, scratch, c); + __ cmp_branch(condition, scratch, c, T_INT, tgt); } -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); + __ branch(condition, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { if (tmp->is_valid()) { if (is_power_of_2(c + 1)) { diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp index 9507ca08561..2116e9af2ba 100644 --- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp @@ -56,3 +56,23 @@ void LIR_Address::verify() const { } #endif // PRODUCT +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp index e503159eb71..2e5609fec8c 100644 --- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp @@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { // The peephole pass fills the delay slot } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { Bytecodes::Code code = op->bytecode(); @@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L __ bind(skip); } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "unused on this code path"); diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp index a09a159722d..a02ffafc771 100644 --- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp @@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ move(temp, addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { LIR_Opr o7opr = FrameMap::O7_opr; __ load(new LIR_Address(base, disp, T_INT), o7opr, info); - __ cmp(condition, o7opr, c); + __ cmp_branch(condition, o7opr, c, T_INT, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { LIR_Opr o7opr = FrameMap::O7_opr; __ load(new LIR_Address(base, disp, type), o7opr, info); - __ cmp(condition, reg, o7opr); + __ cmp_branch(condition, reg, o7opr, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { assert(left != result, "should be different registers"); diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp index c21d2c1d9ad..9cebb387e23 100644 --- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp @@ -54,3 +54,24 @@ void LIR_Address::verify() const { "wrong type for addresses"); } #endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index cee3140f4f7..7b76eb0b9ef 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); LIR_Opr dest = op->result_opr(); @@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L } } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 905708a9fa4..1c6774e1d6a 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { __ cmp_mem_int(condition, base, disp, c, info); + __ branch(condition, T_INT, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); + __ branch(condition, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { if (tmp->is_valid() && c > 0 && c < max_jint) { diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp index 92277ee0631..20e283e3022 100644 --- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp @@ -72,3 +72,24 @@ void LIR_Address::verify() const { #endif } #endif // PRODUCT + +template +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { + cmp(condition, left, right, info); + branch(condition, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); +template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); + +void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); +} + +void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); +} diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp index 95d7e515013..8d7b623ee79 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp @@ -263,7 +263,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, #define __ ce->masm()-> void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, - LIR_Opr ref) const { + LIR_Opr ref, + LIR_Opr res) const { __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread)); } diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp index 3687754e71a..791e4ed43fd 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp @@ -77,7 +77,8 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { #ifdef COMPILER1 void generate_c1_load_barrier_test(LIR_Assembler* ce, - LIR_Opr ref) const; + LIR_Opr ref, + LIR_Opr res) const; void generate_c1_load_barrier_stub(LIR_Assembler* ce, ZLoadBarrierStubC1* stub) const; diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 8519ac6879b..b8d5f4eef6b 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -23,6 +23,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2021 Loongson Technology, and are made + * available on the same license terms set forth above. + */ + // no precompiled headers #include "jvm.h" #include "classfile/classLoader.hpp" @@ -4076,6 +4082,8 @@ size_t os::Linux::find_large_page_size() { IA64_ONLY(256 * M) PPC_ONLY(4 * M) S390_ONLY(1 * M) + MIPS64_ONLY(4 * M) + LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA SPARC_ONLY(4 * M); #endif // ZERO diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp new file mode 100644 index 00000000000..30719a0340b --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp new file mode 100644 index 00000000000..8403e7838ab --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP + +#include "runtime/vm_version.hpp" + +// Implementation of class atomic + +template +struct Atomic::PlatformAdd + : Atomic::AddAndFetch > +{ + template + D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + //Unimplemented(); + return __sync_add_and_fetch(dest, add_value); + } +}; + +template<> +template +inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + T __ret, __tmp; + + STATIC_ASSERT(4 == sizeof(T)); + __asm__ __volatile__ ( + "1: ll.w %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc.w %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) + : "memory" + ); + + return __ret; +} + +template<> +template +inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + T __ret; + jlong __tmp; + __asm__ __volatile__ ( + "1: ll.d %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc.d %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) + : "memory" + ); + + return __ret; +} + +#if 0 +template<> +template +inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(1 == sizeof(T)); +} + +#else +// No direct support for cmpxchg of bytes; emulate using int. +template<> +struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; +#endif + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + T __prev; + jint __cmp; + + __asm__ __volatile__ ( + "1: ll.w %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $r0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc.w %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" + " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +template<> +template +inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + T __prev; + jlong __cmp; + + __asm__ __volatile__ ( + "1: ll.d %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $r0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc.d %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" + " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + return __prev; +} + + +#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..c9f675baca4 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + +#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..826c1fe39ac --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp new file mode 100644 index 00000000000..0b5247aa0b6 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 2048); + +define_pd_global(intx, CompilerThreadStackSize, 2048); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s new file mode 100644 index 00000000000..ebd73af0c53 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp new file mode 100644 index 00000000000..5429a1055ab --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP + +#include "runtime/os.hpp" + +// Included in orderAccess.hpp header file. + +// Implementation of class OrderAccess. +#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ + __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); + +inline void OrderAccess::loadload() { inlasm_sync(0x15); } +inline void OrderAccess::storestore() { inlasm_sync(0x1a); } +inline void OrderAccess::loadstore() { inlasm_sync(0x16); } +inline void OrderAccess::storeload() { inlasm_sync(0x19); } + +inline void OrderAccess::acquire() { inlasm_sync(0x14); } +inline void OrderAccess::release() { inlasm_sync(0x12); } +inline void OrderAccess::fence() { inlasm_sync(0x10); } + + +#undef inlasm_sync + +#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp new file mode 100644 index 00000000000..cf5fff0d043 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp @@ -0,0 +1,710 @@ +/* + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/allocation.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" +#include "compiler/disassembler.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_SP 3 +#define REG_FP 22 + +NOINLINE address os::current_stack_pointer() { + register void *sp __asm__ ("$r3"); + return (address) sp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__pc; +} + +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__pc = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { + address pc = (address) os::Linux::ucontext_get_pc(uc); + if (Interpreter::contains(pc)) { + // interpreter performs stack banging after the fixed frame header has + // been generated while the compilers perform it before. To maintain + // semantic consistency between interpreted and compiled frames, the + // method returns the Java sender of the current frame. + *fr = os::fetch_frame_from_context(uc); + if (!fr->is_first_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + *fr = fr->java_sender(); + } + } else { + // more complex code with compiled code + assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); + CodeBlob* cb = CodeCache::find_blob(pc); + if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { + // Not sure where the pc points to, fallback to default + // stack overflow handling + return false; + } else { + // In compiled code, the stack banging is performed before LR + // has been saved in the frame. RA is live, and SP and FP + // belong to the caller. + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); + address pc = (address)(uc->uc_mcontext.__gregs[1]); + *fr = frame(sp, fp, pc); + if (!fr->is_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + assert(!fr->is_first_frame(), "Safety check"); + *fr = fr->java_sender(); + } + } + } + assert(fr->is_java_frame(), "Safety check"); + return true; +} + +// By default, gcc always save frame pointer on stack. It may get +// turned off by -fomit-frame-pointer, +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + +frame os::current_frame() { + intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset]; + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +extern "C" int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", + info->si_signo, + info->si_code, + info->si_errno, + info->si_addr); +#endif + + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = Thread::current_or_null_safe(); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + +#ifdef CAN_SHOW_REGISTERS_ON_ASSERT + if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { + handle_assert_poison_fault(ucVoid, info->si_addr); + return 1; + } +#endif + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a java thread"); +#endif + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a VM thread\n"); +#endif + vmthread = (VMThread *)t; + } + } + } + + // Handle SafeFetch faults: + if (uc != NULL) { + address const pc = (address) os::Linux::ucontext_get_pc(uc); + if (pc && StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } + } + + // decide if this trap can be handled by a stub + address stub = NULL; + address pc = NULL; + + pc = (address) os::Linux::ucontext_get_pc(uc); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("pc=%lx", pc); + os::print_context(tty, uc); +#endif + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("handle all stack overflow variations: "); + /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", + addr, + thread->stack_base(), + thread->stack_base() - thread->stack_size()); + */ +#endif + + // check if fault address is within thread stack + if (thread->on_local_stack(addr)) { + // stack overflow +#ifdef PRINT_SIGNAL_HANDLE + tty->print("stack exception check \n"); +#endif + if (thread->in_stack_yellow_reserved_zone(addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in yellow zone\n"); +#endif + if (thread->thread_state() == _thread_in_Java) { + if (thread->in_stack_reserved_zone(addr)) { + frame fr; + if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { + assert(fr.is_java_frame(), "Must be a Java frame"); + frame activation = + SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); + if (activation.sp() != NULL) { + thread->disable_stack_reserved_zone(); + if (activation.is_interpreted_frame()) { + thread->set_reserved_stack_activation((address)( + activation.fp() + frame::interpreter_frame_initial_sp_offset)); + } else { + thread->set_reserved_stack_activation((address)activation.unextended_sp()); + } + return 1; + } + } + } + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in java\n"); +#endif + thread->disable_stack_yellow_reserved_zone(); + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in vm or native codes and return\n"); +#endif + thread->disable_stack_yellow_reserved_zone(); + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in red zone\n"); +#endif + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is neither in yellow zone nor in the red one\n"); +#endif + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } // sig == SIGSEGV + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub +#ifdef PRINT_SIGNAL_HANDLE + tty->print("java thread running in java code\n"); +#endif + + // Handle signal from NativeJump::patch_verified_entry(). + if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); +#endif + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("cb = %lx, nm = %lx\n", cb, nm); +#endif + if (nm != NULL && nm->has_unsafe_access()) { + address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { + // HACK: si_code does not work on linux 2.2.12-20!!! + int op = pc[0] & 0x3f; + int op1 = pc[3] & 0x3f; + //FIXME, Must port to LA code!! + switch (op) { + case 0x1e: //ddiv + case 0x1f: //ddivu + case 0x1a: //div + case 0x1b: //divu + case 0x34: //trap + // In LA, div_by_zero exception can only be triggered by explicit 'trap'. + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + break; + default: + // TODO: handle more cases if we are using other x86 instructions + // that can generate SIGFPE signal on linux. + tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); + //fatal("please update this code."); + } + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("continuation for implicit exception\n"); +#endif + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); +#endif + } + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("SIGBUS in vm thread \n"); +#endif + address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("jni fast get trap: "); +#endif + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("addr = %d, stub = %lx", addr, stub); +#endif + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("write protecting the memory serialiazation page\n"); +#endif + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + if (stub != NULL) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("resolved stub=%lx\n",stub); +#endif + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + os::Linux::ucontext_set_pc(uc, stub); + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("signal chaining\n"); +#endif + return true; + } + + if (!abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("abort becauce of unrecognized\n"); +#endif + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("VMError in signal handler\n"); +#endif + VMError::report_and_die(t, sig, pc, info, ucVoid); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { +} + +int os::Linux::get_fpu_control_word(void) { + return 0; // mute compiler +} + +void os::Linux::set_fpu_control_word(int fpu_control) { +} + +bool os::is_allocatable(size_t bytes) { + + if (bytes < 2 * G) { + return true; + } + + char* addr = reserve_memory(bytes, NULL); + + if (addr != NULL) { + release_memory(addr, bytes); + } + + return addr != NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +// Minimum usable stack sizes required to get to user code. Space for +// HotSpot guard pages is added later. +size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; +size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; + +// Return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // Default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler +void os::print_register_info(outputStream *st, const void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + // this is horrendously verbose but the layout of the registers in the + // // context does not match how we defined our abstract Register set, so + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers + st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); + st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); + st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); + st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); + st->cr(); + st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); + st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); + st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); + st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); + st->cr(); + st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); + st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); + st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); + st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); + st->cr(); + st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); + st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); + st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); + st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); + st->cr(); + st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); + st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); + st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); + st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); + st->cr(); + st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); + st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); + st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); + st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); + st->cr(); + st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); + st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); + st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); + st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); + st->cr(); + st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); + st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); + st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); + st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); + st->cr(); + +} + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t *uc = (const ucontext_t*)context; + st->print_cr("Registers:"); + st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); + st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); + st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); + st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); + st->cr(); + st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); + st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); + st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); + st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); + st->cr(); + st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); + st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); + st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); + st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); + st->cr(); + st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); + st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); + st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); + st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); + st->cr(); + st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); + st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); + st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); + st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); + st->cr(); + st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); + st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); + st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); + st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); + st->cr(); + st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); + st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); + st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); + st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); + st->cr(); + st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); + st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); + st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); + st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); + Disassembler::decode(pc - 80, pc + 80, st); +} + +void os::setup_fpu() { + // no use for LA +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +int os::extra_bang_size_in_bytes() { + // LA does not require the additional stack bang. + return 0; +} + +bool os::is_ActiveCoresMP() { + return UseActiveCoresMP && _initial_active_processor_count == 1; +} diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp new file mode 100644 index 00000000000..fa02f8ba2f9 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP + + static void setup_fpu(); + static bool is_allocatable(size_t bytes); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + static bool is_ActiveCoresMP(); + +#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..cf3a596387c --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP + + +inline void Prefetch::read (void *loc, intx interval) { +// According to previous and present SPECjbb2015 score, +// comment prefetch is better than if (interval >= 0) prefetch branch. +// So choose comment prefetch as the base line. +#if 0 + __asm__ __volatile__ ( + " preld 0, %[__loc] \n" + : + : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); +#endif +} + +inline void Prefetch::write(void *loc, intx interval) { +// Ditto +#if 0 + __asm__ __volatile__ ( + " preld 8, %[__loc] \n" + : + : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); +#endif +} + +#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp new file mode 100644 index 00000000000..a1a9f181bdc --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +void JavaThread::pd_initialize() +{ + _anchor.clear(); +} + +frame JavaThread::pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + if (_anchor.last_Java_pc() != NULL) { + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); + } else { + // This will pick up pc from sp + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); + } +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { + // In the middle of a trampoline call. Bail out for safety. + // This happens rarely so shouldn't affect profiling. + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + // C2 and JVMCI use ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif // COMPILER2_OR_JVMCI + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp new file mode 100644 index 00000000000..a3ac28ebd3c --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP + + private: + void pd_initialize(); + + frame pd_last_frame(); + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp new file mode 100644 index 00000000000..a39cb79bb1e --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP diff --git a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp new file mode 100644 index 00000000000..edc148ef915 --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" + +#include +#include + +#ifndef HWCAP_LOONGARCH_LAM +#define HWCAP_LOONGARCH_LAM (1 << 1) +#endif + +#ifndef HWCAP_LOONGARCH_UAL +#define HWCAP_LOONGARCH_UAL (1 << 2) +#endif + +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif + +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +#ifndef HWCAP_LOONGARCH_COMPLEX +#define HWCAP_LOONGARCH_COMPLEX (1 << 7) +#endif + +#ifndef HWCAP_LOONGARCH_CRYPTO +#define HWCAP_LOONGARCH_CRYPTO (1 << 8) +#endif + +#ifndef HWCAP_LOONGARCH_LBT_X86 +#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) +#endif + +#ifndef HWCAP_LOONGARCH_LBT_ARM +#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) +#endif + +#ifndef HWCAP_LOONGARCH_LBT_MIPS +#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) +#endif + +void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); + + STATIC_ASSERT(CPU_LAM == HWCAP_LOONGARCH_LAM); + STATIC_ASSERT(CPU_UAL == HWCAP_LOONGARCH_UAL); + STATIC_ASSERT(CPU_LSX == HWCAP_LOONGARCH_LSX); + STATIC_ASSERT(CPU_LASX == HWCAP_LOONGARCH_LASX); + STATIC_ASSERT(CPU_COMPLEX == HWCAP_LOONGARCH_COMPLEX); + STATIC_ASSERT(CPU_CRYPTO == HWCAP_LOONGARCH_CRYPTO); + STATIC_ASSERT(CPU_LBT_X86 == HWCAP_LOONGARCH_LBT_X86); + STATIC_ASSERT(CPU_LBT_ARM == HWCAP_LOONGARCH_LBT_ARM); + STATIC_ASSERT(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS); + + _features = auxv & ( + HWCAP_LOONGARCH_LAM | + HWCAP_LOONGARCH_UAL | + HWCAP_LOONGARCH_LSX | + HWCAP_LOONGARCH_LASX | + HWCAP_LOONGARCH_COMPLEX | + HWCAP_LOONGARCH_CRYPTO | + HWCAP_LOONGARCH_LBT_X86 | + HWCAP_LOONGARCH_LBT_ARM | + HWCAP_LOONGARCH_LBT_MIPS); +} diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp new file mode 100644 index 00000000000..30719a0340b --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp new file mode 100644 index 00000000000..cd7cecad63a --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP + +#include "runtime/vm_version.hpp" + +// Implementation of class atomic + +template +struct Atomic::PlatformAdd + : Atomic::AddAndFetch > +{ + template + D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + //Unimplemented(); + return __sync_add_and_fetch(dest, add_value); + } +}; + +template<> +template +inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + T __ret, __tmp; + + STATIC_ASSERT(4 == sizeof(T)); + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync\n\t" + " ll %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) + : "memory" + ); + + return __ret; +} + +template<> +template +inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + T __ret; + jlong __tmp; + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync\n\t" + " lld %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " scd %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) + : "memory" + ); + return __ret; +} + +#if 0 +template<> +template +inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(1 == sizeof(T)); +} + +#else +// No direct support for cmpxchg of bytes; emulate using int. +template<> +struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; +#endif + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + T __prev; + jint __cmp; + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1:sync \n\t" + " ll %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + " nop \n\t" + "2: \n\t" + " sync \n\t" + + " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +template<> +template +inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + T __prev; + jlong __cmp; + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1:sync \n\t" + " lld %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $0 \n\t" + " move %[__cmp], %[__new] \n\t" + " scd %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + " nop \n\t" + "2: \n\t" + " sync \n\t" + + " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + return __prev; +} + + +#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp new file mode 100644 index 00000000000..5b5cd10aa55 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + +#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp new file mode 100644 index 00000000000..3fd6ef7b36c --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp new file mode 100644 index 00000000000..f1599ac5f17 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +#ifdef MIPS64 +define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 1024); +#else +// ThreadStackSize 320 allows a couple of test cases to run while +// keeping the number of threads that can be created high. System +// default ThreadStackSize appears to be 512 which is too big. +define_pd_global(intx, ThreadStackSize, 320); +define_pd_global(intx, VMThreadStackSize, 512); +#endif // MIPS64 + +define_pd_global(intx, CompilerThreadStackSize, 0); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s new file mode 100644 index 00000000000..36c8d810c3c --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp new file mode 100644 index 00000000000..bf9d6797306 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP + +#include "runtime/os.hpp" + +// Included in orderAccess.hpp header file. + +// Implementation of class OrderAccess. +#define inlasm_sync() if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ + __asm__ __volatile__ ("sync" : : : "memory"); + +inline void OrderAccess::loadload() { inlasm_sync(); } +inline void OrderAccess::storestore() { inlasm_sync(); } +inline void OrderAccess::loadstore() { inlasm_sync(); } +inline void OrderAccess::storeload() { inlasm_sync(); } + +inline void OrderAccess::acquire() { inlasm_sync(); } +inline void OrderAccess::release() { inlasm_sync(); } +inline void OrderAccess::fence() { inlasm_sync(); } + + +#undef inlasm_sync + +#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp new file mode 100644 index 00000000000..d035d8edbb8 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp @@ -0,0 +1,1020 @@ +/* + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/allocation.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" +#include "compiler/disassembler.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_SP 29 +#define REG_FP 30 + +address os::current_stack_pointer() { + register void *sp __asm__ ("$29"); + return (address) sp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.pc; +} + +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.pc = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { + address pc = (address) os::Linux::ucontext_get_pc(uc); + if (Interpreter::contains(pc)) { + // interpreter performs stack banging after the fixed frame header has + // been generated while the compilers perform it before. To maintain + // semantic consistency between interpreted and compiled frames, the + // method returns the Java sender of the current frame. + *fr = os::fetch_frame_from_context(uc); + if (!fr->is_first_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + *fr = fr->java_sender(); + } + } else { + // more complex code with compiled code + assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); + CodeBlob* cb = CodeCache::find_blob(pc); + if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { + // Not sure where the pc points to, fallback to default + // stack overflow handling + return false; + } else { + // In compiled code, the stack banging is performed before LR + // has been saved in the frame. RA is live, and SP and FP + // belong to the caller. + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); + address pc = (address)(uc->uc_mcontext.gregs[31]); + *fr = frame(sp, fp, pc); + if (!fr->is_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + assert(!fr->is_first_frame(), "Safety check"); + *fr = fr->java_sender(); + } + } + } + assert(fr->is_java_frame(), "Safety check"); + return true; +} + +// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get +// turned off by -fomit-frame-pointer, +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + +//intptr_t* _get_previous_fp() { +intptr_t* __attribute__((noinline)) os::get_previous_fp() { + int *pc; + intptr_t sp; + int *pc_limit = (int*)(void*)&os::get_previous_fp; + int insn; + + { + l_pc:; + pc = (int*)&&l_pc; + __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); + } + + do { + insn = *pc; + switch(bitfield(insn, 16, 16)) { + case 0x27bd: /* addiu $sp,$sp,-i */ + case 0x67bd: /* daddiu $sp,$sp,-i */ + assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); + sp -= (short)bitfield(insn, 0, 16); + return (intptr_t*)sp; + } + --pc; + } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. + + ShouldNotReachHere(); + return NULL; // mute compiler +} + + +frame os::current_frame() { + intptr_t* fp = (intptr_t*)get_previous_fp(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +//x86 add 2 new assemble function here! +extern "C" int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", + info->si_signo, + info->si_code, + info->si_errno, + info->si_addr); +#endif + + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = Thread::current_or_null_safe(); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + +#ifdef CAN_SHOW_REGISTERS_ON_ASSERT + if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { + handle_assert_poison_fault(ucVoid, info->si_addr); + return 1; + } +#endif + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a java thread"); +#endif + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a VM thread\n"); +#endif + vmthread = (VMThread *)t; + } + } + } + + // Handle SafeFetch faults: + if (uc != NULL) { + address const pc = (address) os::Linux::ucontext_get_pc(uc); + if (pc && StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } + } + + // decide if this trap can be handled by a stub + address stub = NULL; + address pc = NULL; + + pc = (address) os::Linux::ucontext_get_pc(uc); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("pc=%lx", pc); + os::print_context(tty, uc); +#endif + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("handle all stack overflow variations: "); + /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", + addr, + thread->stack_base(), + thread->stack_base() - thread->stack_size()); + */ +#endif + + // check if fault address is within thread stack + if (thread->on_local_stack(addr)) { + // stack overflow +#ifdef PRINT_SIGNAL_HANDLE + tty->print("stack exception check \n"); +#endif + if (thread->in_stack_yellow_reserved_zone(addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in yellow zone\n"); +#endif + if (thread->thread_state() == _thread_in_Java) { + if (thread->in_stack_reserved_zone(addr)) { + frame fr; + if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { + assert(fr.is_java_frame(), "Must be a Java frame"); + frame activation = + SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); + if (activation.sp() != NULL) { + thread->disable_stack_reserved_zone(); + if (activation.is_interpreted_frame()) { + thread->set_reserved_stack_activation((address)( + activation.fp() + frame::interpreter_frame_initial_sp_offset)); + } else { + thread->set_reserved_stack_activation((address)activation.unextended_sp()); + } + return 1; + } + } + } + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in java\n"); +#endif + thread->disable_stack_yellow_reserved_zone(); + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in vm or native codes and return\n"); +#endif + thread->disable_stack_yellow_reserved_zone(); + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in red zone\n"); +#endif + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is neither in yellow zone nor in the red one\n"); +#endif + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } //addr < + } //sig == SIGSEGV + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub +#ifdef PRINT_SIGNAL_HANDLE + tty->print("java thread running in java code\n"); +#endif + + // Handle signal from NativeJump::patch_verified_entry(). + if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); +#endif + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("cb = %lx, nm = %lx\n", cb, nm); +#endif + if (nm != NULL && nm->has_unsafe_access()) { + address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { + // HACK: si_code does not work on linux 2.2.12-20!!! + int op = pc[0] & 0x3f; + int op1 = pc[3] & 0x3f; + //FIXME, Must port to mips code!! + switch (op) { + case 0x1e: //ddiv + case 0x1f: //ddivu + case 0x1a: //div + case 0x1b: //divu + case 0x34: //trap + /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. + * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() + */ + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + break; + default: + // TODO: handle more cases if we are using other x86 instructions + // that can generate SIGFPE signal on linux. + tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); + //fatal("please update this code."); + } + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("continuation for implicit exception\n"); +#endif + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); +#endif + } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { + //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. + //The method is to trigger kernel emulation of float emulation. + int inst = *(int*)pc; + int ops = (inst >> 26) & 0x3f; + int ops_fmt = (inst >> 21) & 0x1f; + int op = inst & 0x3f; + if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { + int ft, fs, fd; + ft = (inst >> 16) & 0x1f; + fs = (inst >> 11) & 0x1f; + fd = (inst >> 6) & 0x1f; + float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; + double ft_value, fs_value, fd_value; + ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; + fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; + __asm__ __volatile__ ( + "cvt.s.pl %0, %4\n\t" + "cvt.s.pu %1, %4\n\t" + "cvt.s.pl %2, %5\n\t" + "cvt.s.pu %3, %5\n\t" + : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) + : "f" (fs_value), "f" (ft_value) + ); + + switch (op) { + case Assembler::fadd_op: + __asm__ __volatile__ ( + "add.s %1, %3, %5\n\t" + "add.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + case Assembler::fsub_op: + //fd = fs - ft + __asm__ __volatile__ ( + "sub.s %1, %3, %5\n\t" + "sub.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + case Assembler::fmul_op: + __asm__ __volatile__ ( + "mul.s %1, %3, %5\n\t" + "mul.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + default: + tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); + } + } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { + // madd.ps is not used, the code below were not tested + int fr, ft, fs, fd; + float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; + double fr_value, ft_value, fs_value, fd_value; + switch (op) { + case Assembler::madd_ps_op: + // fd = (fs * ft) + fr + fr = (inst >> 21) & 0x1f; + ft = (inst >> 16) & 0x1f; + fs = (inst >> 11) & 0x1f; + fd = (inst >> 6) & 0x1f; + fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; + ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; + fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; + __asm__ __volatile__ ( + "cvt.s.pu %3, %9\n\t" + "cvt.s.pl %4, %9\n\t" + "cvt.s.pu %5, %10\n\t" + "cvt.s.pl %6, %10\n\t" + "cvt.s.pu %7, %11\n\t" + "cvt.s.pl %8, %11\n\t" + "madd.s %1, %3, %5, %7\n\t" + "madd.s %2, %4, %6, %8\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) + : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + default: + tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); + } + } + } //SIGILL + } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { + // thread->thread_state() != _thread_in_Java + // SIGILL must be caused by VM_Version::determine_features(). + VM_Version::set_supports_cpucfg(false); + stub = pc + 4; // continue with next instruction. + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("SIGBUS in vm thread \n"); +#endif + address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("jni fast get trap: "); +#endif + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("addr = %d, stub = %lx", addr, stub); +#endif + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("write protecting the memory serialiazation page\n"); +#endif + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + // Execution protection violation + // + // This should be kept as the last step in the triage. We don't + // have a dedicated trap number for a no-execute fault, so be + // conservative and allow other handlers the first shot. + // + // Note: We don't test that info->si_code == SEGV_ACCERR here. + // this si_code is so generic that it is almost meaningless; and + // the si_code for this condition may change in the future. + // Furthermore, a false-positive should be harmless. + if (UnguardOnExecutionViolation > 0 && + //(sig == SIGSEGV || sig == SIGBUS) && + //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { + (sig == SIGSEGV || sig == SIGBUS +#ifdef OPT_RANGECHECK + || sig == SIGSYS +#endif + ) && + //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { + (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("execution protection violation\n"); +#endif + + int page_size = os::vm_page_size(); + address addr = (address) info->si_addr; + address pc = os::Linux::ucontext_get_pc(uc); + // Make sure the pc and the faulting address are sane. + // + // If an instruction spans a page boundary, and the page containing + // the beginning of the instruction is executable but the following + // page is not, the pc and the faulting address might be slightly + // different - we still want to unguard the 2nd page in this case. + // + // 15 bytes seems to be a (very) safe value for max instruction size. + bool pc_is_near_addr = + (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); +Untested("Unimplemented yet"); + bool instr_spans_page_boundary = +/* + (align_size_down((intptr_t) pc ^ (intptr_t) addr, + (intptr_t) page_size) > 0); +*/ + (align_down((intptr_t) pc ^ (intptr_t) addr, + (intptr_t) page_size) > 0); + + if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { + static volatile address last_addr = + (address) os::non_memory_address_word(); + + // In conservative mode, don't unguard unless the address is in the VM + if (addr != last_addr && + (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { + + // Set memory to RWX and retry +Untested("Unimplemented yet"); +/* + address page_start = + (address) align_size_down((intptr_t) addr, (intptr_t) page_size); +*/ + address page_start = align_down(addr, page_size); + bool res = os::protect_memory((char*) page_start, page_size, + os::MEM_PROT_RWX); + + if (PrintMiscellaneous && Verbose) { + char buf[256]; + jio_snprintf(buf, sizeof(buf), "Execution protection violation " + "at " INTPTR_FORMAT + ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, + page_start, (res ? "success" : "failed"), errno); + tty->print_raw_cr(buf); + } + stub = pc; + + // Set last_addr so if we fault again at the same address, we don't end + // up in an endless loop. + // + // There are two potential complications here. Two threads trapping at + // the same address at the same time could cause one of the threads to + // think it already unguarded, and abort the VM. Likely very rare. + // + // The other race involves two threads alternately trapping at + // different addresses and failing to unguard the page, resulting in + // an endless loop. This condition is probably even more unlikely than + // the first. + // + // Although both cases could be avoided by using locks or thread local + // last_addr, these solutions are unnecessary complication: this + // handler is a best-effort safety net, not a complete solution. It is + // disabled by default and should only be used as a workaround in case + // we missed any no-execute-unsafe VM code. + + last_addr = addr; + } + } + } + + if (stub != NULL) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("resolved stub=%lx\n",stub); +#endif + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + os::Linux::ucontext_set_pc(uc, stub); + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("signal chaining\n"); +#endif + return true; + } + + if (!abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("abort becauce of unrecognized\n"); +#endif + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("VMError in signal handler\n"); +#endif + VMError::report_and_die(t, sig, pc, info, ucVoid); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +// FCSR:...|24| 23 |22|21|... +// ...|FS|FCC0|FO|FN|... +void os::Linux::init_thread_fpu_state(void) { + if (SetFSFOFN == 999) + return; + int fs = (SetFSFOFN / 100)? 1:0; + int fo = ((SetFSFOFN % 100) / 10)? 1:0; + int fn = (SetFSFOFN % 10)? 1:0; + int mask = fs << 24 | fo << 22 | fn << 21; + + int fcsr = get_fpu_control_word(); + fcsr = fcsr | mask; + set_fpu_control_word(fcsr); + /* + if (fcsr != get_fpu_control_word()) + tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); + */ +} + +int os::Linux::get_fpu_control_word(void) { + int fcsr; + __asm__ __volatile__ ( + ".set noat;" + "daddiu %0, $0, 0;" + "cfc1 %0, $31;" + : "=r" (fcsr) + ); + return fcsr; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { + __asm__ __volatile__ ( + ".set noat;" + "ctc1 %0, $31;" + : + : "r" (fpu_control) + ); +} + +bool os::is_allocatable(size_t bytes) { + + if (bytes < 2 * G) { + return true; + } + + char* addr = reserve_memory(bytes, NULL); + + if (addr != NULL) { + release_memory(addr, bytes); + } + + return addr != NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +//size_t os::Linux::min_stack_allowed = 96 * K; +size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; +size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; + + +/* +// Test if pthread library can support variable thread stack size. LinuxThreads +// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads +// in floating stack mode and NPTL support variable stack size. +bool os::Linux::supports_variable_stack_size() { + if (os::Linux::is_NPTL()) { + // NPTL, yes + return true; + + } else { + // Note: We can't control default stack size when creating a thread. + // If we use non-default stack size (pthread_attr_setstacksize), both + // floating stack and non-floating stack LinuxThreads will return the + // same value. This makes it impossible to implement this function by + // detecting thread stack size directly. + // + // An alternative approach is to check %gs. Fixed-stack LinuxThreads + // do not use %gs, so its value is 0. Floating-stack LinuxThreads use + // %gs (either as LDT selector or GDT selector, depending on kernel) + // to access thread specific data. + // + // Note that %gs is a reserved glibc register since early 2001, so + // applications are not allowed to change its value (Ulrich Drepper from + // Redhat confirmed that all known offenders have been modified to use + // either %fs or TSD). In the worst case scenario, when VM is embedded in + // a native application that plays with %gs, we might see non-zero %gs + // even LinuxThreads is running in fixed stack mode. As the result, we'll + // return true and skip _thread_safety_check(), so we may not be able to + // detect stack-heap collisions. But otherwise it's harmless. + // + return false; + } +} +*/ + +// Return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // Default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler +void os::print_register_info(outputStream *st, const void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + // this is horrendously verbose but the layout of the registers in the + // // context does not match how we defined our abstract Register set, so + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers + st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]); + st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]); + st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]); + st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]); + st->cr(); + st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]); + st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]); + st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]); + st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]); + st->cr(); + st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]); + st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]); + st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]); + st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]); + st->cr(); + st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]); + st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]); + st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]); + st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]); + st->cr(); + st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]); + st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]); + st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]); + st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]); + st->cr(); + st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]); + st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]); + st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]); + st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]); + st->cr(); + st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]); + st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]); + st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]); + st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]); + st->cr(); + st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]); + st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]); + st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]); + st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]); + st->cr(); + +} + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t *uc = (const ucontext_t*)context; + st->print_cr("Registers:"); + st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); + st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); + st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); + st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); + st->cr(); + st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); + st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); + st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); + st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); + st->cr(); + st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); + st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); + st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); + st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); + st->cr(); + st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); + st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); + st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); + st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); + st->cr(); + st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); + st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); + st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); + st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); + st->cr(); + st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); + st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); + st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); + st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); + st->cr(); + st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); + st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); + st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); + st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); + st->cr(); + st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); + st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); + st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); + st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); + Disassembler::decode(pc - 80, pc + 80, st); +} + +void os::setup_fpu() { + /* + //no use for MIPS + int fcsr; + address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); + __asm__ __volatile__ ( + ".set noat;" + "cfc1 %0, $31;" + "sw %0, 0(%1);" + : "=r" (fcsr) + : "r" (fpu_cntrl) + : "memory" + ); + printf("fpu_cntrl: %lx\n", fpu_cntrl); + */ +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +int os::extra_bang_size_in_bytes() { + // MIPS does not require the additional stack bang. + return 0; +} + +bool os::is_ActiveCoresMP() { + return UseActiveCoresMP && _initial_active_processor_count == 1; +} diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp new file mode 100644 index 00000000000..c07d08156f2 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP + + static void setup_fpu(); + static bool is_allocatable(size_t bytes); + static intptr_t *get_previous_fp(); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + static bool is_ActiveCoresMP(); + +#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp new file mode 100644 index 00000000000..93490345f0b --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP + + +inline void Prefetch::read (void *loc, intx interval) { + // 'pref' is implemented as NOP in Loongson 3A + __asm__ __volatile__ ( + " .set push\n" + " .set mips32\n" + " .set noreorder\n" + " pref 0, 0(%[__loc]) \n" + " .set pop\n" + : [__loc] "=&r"(loc) + : + : "memory" + ); +} + +inline void Prefetch::write(void *loc, intx interval) { + __asm__ __volatile__ ( + " .set push\n" + " .set mips32\n" + " .set noreorder\n" + " pref 1, 0(%[__loc]) \n" + " .set pop\n" + : [__loc] "=&r"(loc) + : + : "memory" + ); + +} + +#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp new file mode 100644 index 00000000000..dbe8efe1641 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/compileBroker.hpp" +#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +void JavaThread::pd_initialize() +{ + _anchor.clear(); +} + +frame JavaThread::pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + if (_anchor.last_Java_pc() != NULL) { + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); + } else { + // This will pick up pc from sp + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); + } +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { + // In the middle of a trampoline call. Bail out for safety. + // This happens rarely so shouldn't affect profiling. + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + // C2 and JVMCI use ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif // COMPILER2_OR_JVMCI + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp new file mode 100644 index 00000000000..8b8dbe219c7 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP + + private: + void pd_initialize(); + + frame pd_last_frame(); + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp new file mode 100644 index 00000000000..b7454bf045a --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp new file mode 100644 index 00000000000..93e4bea04c6 --- /dev/null +++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" diff --git a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp index 2b0fa83c1ad..270e0bc180a 100644 --- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp @@ -85,4 +85,6 @@ const uintptr_t ZPlatformAddressSpaceSize = ((uintptr_t)1 << ZPlatformAddres const size_t ZPlatformCacheLineSize = 64; +const bool ZPlatformLoadBarrierTestResultInRegister = false; + #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp index 4912f880564..a420f7807b5 100644 --- a/src/hotspot/share/asm/codeBuffer.cpp +++ b/src/hotspot/share/asm/codeBuffer.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2023. These + * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/codeBuffer.hpp" #include "compiler/disassembler.hpp" @@ -351,6 +357,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) assert(rtype == relocInfo::none || rtype == relocInfo::runtime_call_type || rtype == relocInfo::internal_word_type|| + NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) rtype == relocInfo::section_word_type || rtype == relocInfo::external_word_type, "code needs relocation information"); diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp index aff12954b38..caa93fc804b 100644 --- a/src/hotspot/share/c1/c1_Compiler.cpp +++ b/src/hotspot/share/c1/c1_Compiler.cpp @@ -44,6 +44,12 @@ #include "utilities/bitMap.inline.hpp" #include "utilities/macros.hpp" +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + Compiler::Compiler() : AbstractCompiler(compiler_c1) { } @@ -211,7 +217,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: -#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) +#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64) case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: #endif diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index e30d39f73d1..7461b7449a5 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -250,6 +250,18 @@ void LIR_Op2::verify() const { #endif } +void LIR_Op4::verify() const { +#ifdef ASSERT + switch (code()) { + case lir_cmp_cmove: + break; + + default: + assert(!result_opr()->is_register() || !result_opr()->is_oop_register(), + "can't produce oops from arith"); + } +#endif +} LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) @@ -308,6 +320,56 @@ void LIR_OpBranch::negate_cond() { } +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(stub->entry()) + , _block(NULL) + , _ublock(NULL) + , _stub(stub) { +} + +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(block->label()) + , _block(block) + , _ublock(NULL) + , _stub(NULL) { +} + +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_float_branch, cond, left, right, info) + , _label(block->label()) + , _block(block) + , _ublock(ublock) + , _stub(NULL) { +} + +void LIR_OpCmpBranch::change_block(BlockBegin* b) { + assert(_block != NULL, "must have old block"); + assert(_block->label() == label(), "must be equal"); + + _block = b; + _label = b->label(); +} + +void LIR_OpCmpBranch::change_ublock(BlockBegin* b) { + assert(_ublock != NULL, "must have old block"); + + _ublock = b; +} + +void LIR_OpCmpBranch::negate_cond() { + switch (condition()) { + case lir_cond_equal: set_condition(lir_cond_notEqual); break; + case lir_cond_notEqual: set_condition(lir_cond_equal); break; + case lir_cond_less: set_condition(lir_cond_greaterEqual); break; + case lir_cond_lessEqual: set_condition(lir_cond_greater); break; + case lir_cond_greaterEqual: set_condition(lir_cond_less); break; + case lir_cond_greater: set_condition(lir_cond_lessEqual); break; + default: ShouldNotReachHere(); + } +} + LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, @@ -509,10 +571,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(opConvert->_info == NULL, "must be"); if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); if (opConvert->_result->is_valid()) do_output(opConvert->_result); -#ifdef PPC32 - if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); - if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); -#endif + if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); do_stub(opConvert->_stub); break; @@ -611,6 +670,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) { break; } +// LIR_OpCmpBranch; + case lir_cmp_branch: // may have info, input and result register always invalid + case lir_cmp_float_branch: // may have info, input and result register always invalid + { + assert(op->as_OpCmpBranch() != NULL, "must be"); + LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op; + assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() && + opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used"); + + if (opCmpBranch->_info) do_info(opCmpBranch->_info); + if (opCmpBranch->_opr1->is_valid()) do_input(opCmpBranch->_opr1); + if (opCmpBranch->_opr2->is_valid()) do_input(opCmpBranch->_opr2); + if (opCmpBranch->_tmp1->is_valid()) do_temp(opCmpBranch->_tmp1); + if (opCmpBranch->_stub != NULL) opCmpBranch->stub()->visit(this); + assert(opCmpBranch->_result->is_illegal(), "not used"); + + break; + } + // special handling for cmove: right input operand must not be equal // to the result operand, otherwise the backend fails case lir_cmove: @@ -711,6 +789,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) { break; } +// LIR_Op4 + // special handling for cmp cmove: src2(opr4) operand must not be equal + // to the result operand, otherwise the backend fails + case lir_cmp_cmove: + { + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; + + assert(op4->_info == NULL, "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && + op4->_opr3->is_valid() && op4->_opr4->is_valid() && + op4->_result->is_valid(), "used"); + + do_input(op4->_opr1); + do_input(op4->_opr2); + do_input(op4->_opr3); + do_input(op4->_opr4); + do_temp(op4->_opr4); + do_output(op4->_result); + + break; + } + // LIR_OpJavaCall case lir_static_call: case lir_optvirtual_call: @@ -1028,6 +1129,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) { masm->emit_op2(this); } +void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) { + masm->emit_opCmpBranch(this); + if (stub()) { + masm->append_code_stub(stub()); + } +} + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { masm->emit_alloc_array(this); masm->append_code_stub(stub()); @@ -1048,6 +1156,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { masm->emit_op3(this); } +void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); +} + void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { @@ -1424,8 +1536,7 @@ void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_nu if (deoptimize_on_null) { // Emit an explicit null check and deoptimize if opr is null CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_null_check, Deoptimization::Action_none); - cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); - branch(lir_cond_equal, T_OBJECT, deopt); + cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt); } else { // Emit an implicit null check append(new LIR_Op1(lir_null_check, opr, info)); @@ -1680,6 +1791,8 @@ const char * LIR_Op::name() const { case lir_cmp_l2i: s = "cmp_l2i"; break; case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; case lir_cmp_fd2i: s = "comp_fd2i"; break; + case lir_cmp_branch: s = "cmp_branch"; break; + case lir_cmp_float_branch: s = "cmp_fbranch"; break; case lir_cmove: s = "cmove"; break; case lir_add: s = "add"; break; case lir_sub: s = "sub"; break; @@ -1705,6 +1818,8 @@ const char * LIR_Op::name() const { case lir_irem: s = "irem"; break; case lir_fmad: s = "fmad"; break; case lir_fmaf: s = "fmaf"; break; + // LIR_Op4 + case lir_cmp_cmove: s = "cmp_cmove"; break; // LIR_OpJavaCall case lir_static_call: s = "static"; break; case lir_optvirtual_call: s = "optvirtual"; break; @@ -1856,6 +1971,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const { } } +// LIR_OpCmpBranch +void LIR_OpCmpBranch::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + if (block() != NULL) { + out->print("[B%d] ", block()->block_id()); + } else if (stub() != NULL) { + out->print("["); + stub()->print_name(out); + out->print(": " INTPTR_FORMAT "]", p2i(stub())); + if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci()); + } else { + out->print("[label:" INTPTR_FORMAT "] ", p2i(label())); + } + if (ublock() != NULL) { + out->print("unordered: [B%d] ", ublock()->block_id()); + } +} + void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { switch(cond) { case lir_cond_equal: out->print("[EQ]"); break; @@ -1876,12 +2011,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { print_bytecode(out, bytecode()); in_opr()->print(out); out->print(" "); result_opr()->print(out); out->print(" "); -#ifdef PPC32 - if(tmp1()->is_valid()) { - tmp1()->print(out); out->print(" "); - tmp2()->print(out); out->print(" "); + if(tmp()->is_valid()) { + tmp()->print(out); out->print(" "); } -#endif } void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { @@ -1979,6 +2111,19 @@ void LIR_Op3::print_instr(outputStream* out) const { } +// LIR_Op4 +void LIR_Op4::print_instr(outputStream* out) const { + if (code() == lir_cmp_cmove) { + print_condition(out, condition()); out->print(" "); + } + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + in_opr3()->print(out); out->print(" "); + in_opr4()->print(out); out->print(" "); + result_opr()->print(out); +} + + void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); obj_opr()->print(out); out->print(" "); diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 3234ca018b7..1f46e44c777 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -864,9 +864,11 @@ class LIR_OpConvert; class LIR_OpAllocObj; class LIR_OpRoundFP; class LIR_Op2; +class LIR_OpCmpBranch; class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; +class LIR_Op4; class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; @@ -933,6 +935,8 @@ enum LIR_Code { , lir_cmp_l2i , lir_ucmp_fd2i , lir_cmp_fd2i + , lir_cmp_branch + , lir_cmp_float_branch , lir_cmove , lir_add , lir_sub @@ -964,6 +968,9 @@ enum LIR_Code { , lir_fmad , lir_fmaf , end_op3 + , begin_op4 + , lir_cmp_cmove + , end_op4 , begin_opJavaCall , lir_static_call , lir_optvirtual_call @@ -1128,12 +1135,14 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; } virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; } virtual LIR_OpBranch* as_OpBranch() { return NULL; } + virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; } virtual LIR_OpRTCall* as_OpRTCall() { return NULL; } virtual LIR_OpConvert* as_OpConvert() { return NULL; } virtual LIR_Op0* as_Op0() { return NULL; } virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } + virtual LIR_Op4* as_Op4() { return NULL; } virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } @@ -1463,15 +1472,18 @@ class LIR_OpConvert: public LIR_Op1 { private: Bytecodes::Code _bytecode; ConversionStub* _stub; + LIR_Opr _tmp; public: - LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) + LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) : LIR_Op1(lir_convert, opr, result) , _stub(stub) - , _bytecode(code) {} + , _bytecode(code) + , _tmp(tmp) {} Bytecodes::Code bytecode() const { return _bytecode; } ConversionStub* stub() const { return _stub; } + LIR_Opr tmp() const { return _tmp; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpConvert* as_OpConvert() { return this; } @@ -1626,7 +1638,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) { - assert(code == lir_cmp || code == lir_assert, "code check"); + assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check"); } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) @@ -1658,7 +1670,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, @@ -1674,7 +1686,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(tmp3) , _tmp4(tmp4) , _tmp5(tmp5) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } @@ -1686,10 +1698,12 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { - assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; + assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); + return _condition; } void set_condition(LIR_Condition condition) { - assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; + assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove"); + _condition = condition; } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } @@ -1703,6 +1717,43 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +class LIR_OpCmpBranch: public LIR_Op2 { + friend class LIR_OpVisitState; + + private: + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block + CodeStub* _stub; // if this is a branch to a stub, this is the stub + + public: + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(lbl) + , _block(NULL) + , _ublock(NULL) + , _stub(NULL) { } + + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL); + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL); + + // for unordered comparisons + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL); + + Label* label() const { return _label; } + BlockBegin* block() const { return _block; } + BlockBegin* ublock() const { return _ublock; } + CodeStub* stub() const { return _stub; } + + void change_block(BlockBegin* b); + void change_ublock(BlockBegin* b); + void negate_cond(); + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; @@ -1767,6 +1818,48 @@ class LIR_Op3: public LIR_Op { }; +class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _opr1; + LIR_Opr _opr2; + LIR_Opr _opr3; + LIR_Opr _opr4; + BasicType _type; + LIR_Condition _condition; + + void verify() const; + + public: + LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type) + : LIR_Op(code, result, NULL) + , _opr1(opr1) + , _opr2(opr2) + , _opr3(opr3) + , _opr4(opr4) + , _type(type) + , _condition(condition) { + assert(is_in_range(code, begin_op4, end_op4), "code check"); + assert(type != T_ILLEGAL, "cmove should have type"); + } + LIR_Opr in_opr1() const { return _opr1; } + LIR_Opr in_opr2() const { return _opr2; } + LIR_Opr in_opr3() const { return _opr3; } + LIR_Opr in_opr4() const { return _opr4; } + BasicType type() const { return _type; } + LIR_Condition condition() const { + assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition; + } + void set_condition(LIR_Condition condition) { + assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); _condition = condition; + } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_Op4* as_Op4() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + //-------------------------------- class LabelObj: public CompilationResourceObj { private: @@ -2115,7 +2208,9 @@ class LIR_List: public CompilationResourceObj { void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } - void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } + void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { + append(new LIR_OpConvert(code, left, dst, stub, tmp)); + } void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } @@ -2146,6 +2241,15 @@ class LIR_List: public CompilationResourceObj { cmp(condition, left, LIR_OprFact::intConst(right), info); } + // machine dependent + template + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL); + template + void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { + cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info); + } + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered); + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); @@ -2153,6 +2257,9 @@ class LIR_List: public CompilationResourceObj { append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); } + // machine dependent + void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type); + void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr); void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 160483d5f74..bec297ebd2f 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -777,6 +777,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { } +void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch (op->code()) { + case lir_cmp_cmove: + cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type()); + break; + + default: + Unimplemented(); + break; + } +} + void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 44a5bcbe542..114b155f926 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -190,7 +190,9 @@ class LIR_Assembler: public CompilationResourceObj { void emit_op1(LIR_Op1* op); void emit_op2(LIR_Op2* op); void emit_op3(LIR_Op3* op); + void emit_op4(LIR_Op4* op); void emit_opBranch(LIR_OpBranch* op); + void emit_opCmpBranch(LIR_OpCmpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); void emit_updatecrc32(LIR_OpUpdateCRC32* op); @@ -223,6 +225,7 @@ class LIR_Assembler: public CompilationResourceObj { void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); + void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type); void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 88f6d30697d..1d5a6668eae 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -480,13 +480,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { CodeStub* stub = new RangeCheckStub(range_check_info, index, array); if (index->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), - index->as_jint(), null_check_info); - __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch + cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), + index->as_jint(), stub, null_check_info); // forward branch } else { - cmp_reg_mem(lir_cond_aboveEqual, index, array, - arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); - __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch + cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), + T_INT, stub, null_check_info); // forward branch } } @@ -494,12 +492,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { CodeStub* stub = new RangeCheckStub(info, index); if (index->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); - __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch + cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), + index->as_jint(), stub, info); // forward branch } else { - cmp_reg_mem(lir_cond_aboveEqual, index, buffer, - java_nio_Buffer::limit_offset(), T_INT, info); - __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch + cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(), + T_INT, stub, info); // forward branch } __ move(index, result); } @@ -935,7 +932,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { return tmp; } -void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { +void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) { if (if_instr->should_profile()) { ciMethod* method = if_instr->profiled_method(); assert(method != NULL, "method should be set if branch is profiled"); @@ -956,10 +953,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { __ metadata2reg(md->constant_encoding(), md_reg); LIR_Opr data_offset_reg = new_pointer_register(); - __ cmove(lir_cond(cond), - LIR_OprFact::intptrConst(taken_count_offset), - LIR_OprFact::intptrConst(not_taken_count_offset), - data_offset_reg, as_BasicType(if_instr->x()->type())); + if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) { + __ cmove(lir_cond(cond), + LIR_OprFact::intptrConst(taken_count_offset), + LIR_OprFact::intptrConst(not_taken_count_offset), + data_offset_reg, as_BasicType(if_instr->x()->type())); + } else { + __ cmp_cmove(lir_cond(cond), left, right, + LIR_OprFact::intptrConst(taken_count_offset), + LIR_OprFact::intptrConst(not_taken_count_offset), + data_offset_reg, as_BasicType(if_instr->x()->type())); + } // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); @@ -1316,8 +1320,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { } __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); - __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0)); - __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); + __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0), + LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); } @@ -1599,8 +1603,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { if (GenerateRangeChecks && needs_range_check) { if (use_length) { - __ cmp(lir_cond_belowEqual, length.result(), index.result()); - __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); + CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); + __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); } else { array_range_check(array.result(), index.result(), null_check_info, range_check_info); // range_check also does the null check @@ -1778,12 +1782,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { CodeEmitInfo* info = state_for(x); CodeStub* stub = new RangeCheckStub(info, index.result()); if (index.result()->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); - __ branch(lir_cond_belowEqual, T_INT, stub); + cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info); } else { - cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), - java_nio_Buffer::limit_offset(), T_INT, info); - __ branch(lir_cond_aboveEqual, T_INT, stub); + cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, stub, info); } __ move(index.result(), result); } else { @@ -1861,8 +1862,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { } else if (use_length) { // TODO: use a (modified) version of array_range_check that does not require a // constant length to be loaded to a register - __ cmp(lir_cond_belowEqual, length.result(), index.result()); - __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); + CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); + __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); } else { array_range_check(array.result(), index.result(), null_check_info, range_check_info); // The range check performs the null check, so clear it out for the load @@ -2235,19 +2236,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi int high_key = one_range->high_key(); BlockBegin* dest = one_range->sux(); if (low_key == high_key) { - __ cmp(lir_cond_equal, value, low_key); - __ branch(lir_cond_equal, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); } else if (high_key - low_key == 1) { - __ cmp(lir_cond_equal, value, low_key); - __ branch(lir_cond_equal, T_INT, dest); - __ cmp(lir_cond_equal, value, high_key); - __ branch(lir_cond_equal, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest); } else { LabelObj* L = new LabelObj(); - __ cmp(lir_cond_less, value, low_key); - __ branch(lir_cond_less, T_INT, L->label()); - __ cmp(lir_cond_lessEqual, value, high_key); - __ branch(lir_cond_lessEqual, T_INT, dest); + __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label()); + __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest); __ branch_destination(L->label()); } } @@ -2347,12 +2343,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); for (int i = 0; i < len; i++) { int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); - __ cmp(lir_cond_equal, value, i + lo_key); __ move(data_offset_reg, tmp_reg); - __ cmove(lir_cond_equal, - LIR_OprFact::intptrConst(count_offset), - tmp_reg, - data_offset_reg, T_INT); + __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(i + lo_key), + LIR_OprFact::intptrConst(count_offset), + tmp_reg, + data_offset_reg, T_INT); } LIR_Opr data_reg = new_pointer_register(); @@ -2366,8 +2361,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); } else { for (int i = 0; i < len; i++) { - __ cmp(lir_cond_equal, value, i + lo_key); - __ branch(lir_cond_equal, T_INT, x->sux_at(i)); + __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i)); } __ jump(x->default_sux()); } @@ -2405,12 +2399,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); for (int i = 0; i < len; i++) { int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); - __ cmp(lir_cond_equal, value, x->key_at(i)); __ move(data_offset_reg, tmp_reg); - __ cmove(lir_cond_equal, - LIR_OprFact::intptrConst(count_offset), - tmp_reg, - data_offset_reg, T_INT); + __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)), + LIR_OprFact::intptrConst(count_offset), + tmp_reg, + data_offset_reg, T_INT); } LIR_Opr data_reg = new_pointer_register(); @@ -2425,8 +2418,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { } else { int len = x->length(); for (int i = 0; i < len; i++) { - __ cmp(lir_cond_equal, value, x->key_at(i)); - __ branch(lir_cond_equal, T_INT, x->sux_at(i)); + __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i)); } __ jump(x->default_sux()); } @@ -2936,8 +2928,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { f_val.dont_load_item(); LIR_Opr reg = rlock_result(x); - __ cmp(lir_cond(x->cond()), left.result(), right.result()); - __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(), + t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); } #ifdef JFR_HAVE_INTRINSICS @@ -2981,8 +2973,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { __ move(LIR_OprFact::oopConst(NULL), result); LIR_Opr jobj = new_register(T_METADATA); __ move_wide(jobj_addr, jobj); - __ cmp(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0)); - __ branch(lir_cond_equal, T_OBJECT, L_end->label()); + __ cmp_branch(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0), T_OBJECT, L_end->label()); access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result); @@ -3287,21 +3278,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { if (compilation()->count_backedges()) { + LIR_Opr step = new_register(T_INT); + LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); + LIR_Opr zero = LIR_OprFact::intConst(0); #if defined(X86) && !defined(_LP64) // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. LIR_Opr left_copy = new_register(left->type()); __ move(left, left_copy); __ cmp(cond, left_copy, right); -#else - __ cmp(cond, left, right); -#endif - LIR_Opr step = new_register(T_INT); - LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); - LIR_Opr zero = LIR_OprFact::intConst(0); __ cmove(cond, (left_bci < bci) ? plus_one : zero, (right_bci < bci) ? plus_one : zero, step, left->type()); +#else + __ cmp_cmove(cond, left, right, + (left_bci < bci) ? plus_one : zero, + (right_bci < bci) ? plus_one : zero, + step, left->type()); +#endif increment_backedge_counter(info, step, bci); } } @@ -3340,8 +3334,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) { // DeoptimizeStub will reexecute from the current state in code info. CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured, Deoptimization::Action_make_not_entrant); - __ cmp(lir_cond_lessEqual, result, LIR_OprFact::intConst(0)); - __ branch(lir_cond_lessEqual, T_INT, deopt); + __ cmp_branch(lir_cond_lessEqual, result, LIR_OprFact::intConst(0), T_INT, deopt); } } @@ -3387,8 +3380,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, int freq = frequency << InvocationCounter::count_shift; if (freq == 0) { if (!step->is_constant()) { - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ branch(lir_cond_notEqual, T_ILLEGAL, overflow); + __ cmp_branch(lir_cond_notEqual, step, LIR_OprFact::intConst(0), T_ILLEGAL, overflow); } else { __ branch(lir_cond_always, T_ILLEGAL, overflow); } @@ -3396,12 +3388,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr mask = load_immediate(freq, T_INT); if (!step->is_constant()) { // If step is 0, make sure the overflow check below always fails - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); + __ cmp_cmove(lir_cond_notEqual, step, LIR_OprFact::intConst(0), + result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); } __ logical_and(result, mask, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); - __ branch(lir_cond_equal, T_INT, overflow); + __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow); } __ branch_destination(overflow->continuation()); } @@ -3514,8 +3505,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { CodeEmitInfo *info = state_for(x, x->state()); CodeStub* stub = new PredicateFailedStub(info); - __ cmp(lir_cond(cond), left, right); - __ branch(lir_cond(cond), right->type(), stub); + __ cmp_branch(lir_cond(cond), left, right, right->type(), stub); } } @@ -3662,8 +3652,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); int diffbit = Klass::layout_helper_boolean_diffbit(); __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); - __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0)); - __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE); + __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), + value_fixed, value, value_fixed, T_BYTE); value = value_fixed; return value; } diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index 3ad325d759c..f377b278594 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -363,8 +363,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); // machine dependent - void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); - void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info); + template + void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info); + template + void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info); void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type); @@ -391,7 +393,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_Opr safepoint_poll_register(); - void profile_branch(If* if_instr, If::Condition cond); + void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr); void increment_event_counter_impl(CodeEmitInfo* info, ciMethod *method, LIR_Opr step, int frequency, int bci, bool backedge, bool notify); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index acc969ac9cf..16379656139 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -35,6 +35,12 @@ #include "runtime/timerTrace.hpp" #include "utilities/bitMap.inline.hpp" +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef PRODUCT static LinearScanStatistic _stat_before_alloc; @@ -1258,6 +1264,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { } break; } + case lir_cmp_cmove: { + assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; + + LIR_Opr move_from = cmove->in_opr3(); + LIR_Opr move_to = cmove->result_opr(); + + if (move_to->is_register() && move_from->is_register()) { + Interval* from = interval_at(reg_num(move_from)); + Interval* to = interval_at(reg_num(move_to)); + if (from != NULL && to != NULL) { + to->set_register_hint(from); + TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num())); + } + } + break; + } default: break; } @@ -3350,7 +3373,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { check_live = (move->patch_code() == lir_patch_none); } LIR_OpBranch* branch = op->as_OpBranch(); - if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) { + LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch(); + if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) || + (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) { // Don't bother checking the stub in this case since the // exception stub will never return to normal control flow. check_live = false; @@ -6200,6 +6225,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); LIR_OpBranch* branch = (LIR_OpBranch*)op; + if (branch->block() == target_from) { + branch->change_block(target_to); + } + if (branch->ublock() == target_from) { + branch->change_ublock(target_to); + } + } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) { + assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); + LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op; + if (branch->block() == target_from) { branch->change_block(target_to); } @@ -6328,6 +6363,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { } } } + } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) { + assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); + LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op; + + if (prev_branch->stub() == NULL) { + if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) { + TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id())); + + // eliminate a conditional branch to the immediate successor + prev_branch->change_block(last_branch->block()); + prev_branch->negate_cond(); + instructions->trunc_to(instructions->length() - 1); + } + } } } } @@ -6403,6 +6452,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid"); assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid"); } + + LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch(); + + if (op_cmp_branch != NULL) { + assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid"); + assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid"); + } } for (j = 0; j < block->number_of_sux() - 1; j++) { @@ -6647,6 +6703,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { break; } + case lir_cmp_branch: + case lir_cmp_float_branch: { + LIR_OpCmpBranch* branch = op->as_OpCmpBranch(); + if (branch->block() == NULL) { + inc_counter(counter_stub_branch); + } else { + inc_counter(counter_cond_branch); + } + inc_counter(counter_cmp); + break; + } + + case lir_cmp_cmove: { + inc_counter(counter_misc_inst); + inc_counter(counter_cmp); + break; + } + case lir_neg: case lir_add: case lir_sub: diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp index 747971af416..093831ac09a 100644 --- a/src/hotspot/share/code/nmethod.cpp +++ b/src/hotspot/share/code/nmethod.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "jvm.h" #include "code/codeCache.hpp" @@ -2155,7 +2161,8 @@ void nmethod::verify_scopes() { //verify_interrupt_point(iter.addr()); break; case relocInfo::runtime_call_type: - case relocInfo::runtime_call_w_cp_type: { + NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) + { address destination = iter.reloc()->value(); // Right now there is no way to find out which entries support // an interrupt point. It would be nice if we had this @@ -2392,7 +2399,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { return st.as_string(); } case relocInfo::runtime_call_type: - case relocInfo::runtime_call_w_cp_type: { + NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) + { stringStream st; st.print("runtime_call"); CallRelocation* r = (CallRelocation*)iter.reloc(); diff --git a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp index a20de8dde64..c6f49cf7d60 100644 --- a/src/hotspot/share/code/relocInfo.cpp +++ b/src/hotspot/share/code/relocInfo.cpp @@ -433,6 +433,7 @@ void virtual_call_Relocation::unpack_data() { _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point); } +#ifndef MIPS64 void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2)); dest->set_locs_end((relocInfo*) p); @@ -441,6 +442,7 @@ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { void runtime_call_w_cp_Relocation::unpack_data() { _offset = unpack_1_int() << 2; } +#endif void static_stub_Relocation::pack_data_to(CodeSection* dest) { short* p = (short*) dest->locs_end(); @@ -910,7 +912,7 @@ void RelocIterator::print_current() { break; } case relocInfo::runtime_call_type: - case relocInfo::runtime_call_w_cp_type: + NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) { CallRelocation* r = (CallRelocation*) reloc(); tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination())); diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp index 57931a1a6a7..fb56fd3ab18 100644 --- a/src/hotspot/share/code/relocInfo.hpp +++ b/src/hotspot/share/code/relocInfo.hpp @@ -269,7 +269,11 @@ class relocInfo { poll_return_type = 11, // polling instruction for safepoints at return metadata_type = 12, // metadata that used to be oops trampoline_stub_type = 13, // stub-entry for trampoline +#ifndef MIPS64 runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool +#else + internal_pc_type = 14, // tag for internal data +#endif data_prefix_tag = 15, // tag for a prefix (carries data arguments) type_mask = 15 // A mask which selects only the above values }; @@ -304,13 +308,13 @@ class relocInfo { visitor(static_call) \ visitor(static_stub) \ visitor(runtime_call) \ - visitor(runtime_call_w_cp) \ + NOT_MIPS64(visitor(runtime_call_w_cp)) \ visitor(external_word) \ visitor(internal_word) \ visitor(poll) \ visitor(poll_return) \ - visitor(section_word) \ visitor(trampoline_stub) \ + NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc))) public: @@ -1174,6 +1178,15 @@ class runtime_call_Relocation : public CallRelocation { }; +#ifdef MIPS64 +// to handle the set_last_java_frame pc +class internal_pc_Relocation : public Relocation { + relocInfo::relocType type() { return relocInfo::internal_pc_type; } + public: + address pc() { return pd_get_address_from_code(); } + void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); +}; +#else class runtime_call_w_cp_Relocation : public CallRelocation { relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; } @@ -1202,6 +1215,7 @@ class runtime_call_w_cp_Relocation : public CallRelocation { void pack_data_to(CodeSection * dest); void unpack_data(); }; +#endif // Trampoline Relocations. // A trampoline allows to encode a small branch in the code, even if there diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp index 3c986f40ff7..23d07f0505c 100644 --- a/src/hotspot/share/code/vtableStubs.cpp +++ b/src/hotspot/share/code/vtableStubs.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" @@ -98,7 +104,11 @@ int VtableStubs::_itab_stub_size = 0; #if defined(PRODUCT) // These values are good for the PRODUCT case (no tracing). +#if defined MIPS64 || defined LOONGARCH64 + static const int first_vtableStub_size = 128; +#else static const int first_vtableStub_size = 64; +#endif static const int first_itableStub_size = 256; #else // These values are good for the non-PRODUCT case (when tracing can be switched on). @@ -109,6 +119,7 @@ int VtableStubs::_itab_stub_size = 0; // vtable itable // aarch64: 460 324 // arm: ? ? + // mips64: 728 328 // ppc (linux, BE): 404 288 // ppc (linux, LE): 356 276 // ppc (AIX): 416 296 diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp index 4289e5e5c4b..9502463bd5a 100644 --- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp +++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp @@ -74,7 +74,6 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr, // Read the marking-in-progress flag. LIR_Opr flag_val = gen->new_register(T_INT); __ load(mark_active_flag_addr, flag_val); - __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); LIR_PatchCode pre_val_patch_code = lir_patch_none; @@ -103,7 +102,7 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr, slow = new G1PreBarrierStub(pre_val); } - __ branch(lir_cond_notEqual, T_INT, slow); + __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); __ branch_destination(slow->continuation()); } @@ -168,10 +167,9 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprD } assert(new_val->is_register(), "must be a register at this point"); - __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); - CodeStub* slow = new G1PostBarrierStub(addr, new_val); - __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); + __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), + LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); __ branch_destination(slow->continuation()); } diff --git a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp index 98a2fe7f1c3..b43a441066c 100644 --- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP @@ -71,6 +77,7 @@ template inline void G1FullGCMarker::mark_and_push(T* p) { _oop_stack.push(obj); assert(_bitmap->is_marked(obj), "Must be marked now - map self"); } else { + DEBUG_ONLY(OrderAccess::loadload()); assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj), "Must be marked by other or closed archive object"); } diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp index 1ef900783d4..b30456429d8 100644 --- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp +++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp @@ -51,8 +51,9 @@ template inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { if (p != NULL) { // XXX: error if p != NULL here oop o = RawAccess::oop_load(p); - if (o->is_forwarded()) { - o = o->forwardee(); + markOop m = o->mark_raw(); + if (m->is_marked()) { + o = (oop) m->decode_pointer(); // Card mark if (PSScavenge::is_obj_in_young(o)) { PSScavenge::card_table()->inline_write_ref_field_gc(p, o); @@ -282,13 +283,17 @@ inline void PSPromotionManager::copy_and_push_safe_barrier(T* p) { assert(should_scavenge(p, true), "revisiting object?"); oop o = RawAccess::oop_load(p); - oop new_obj = o->is_forwarded() - ? o->forwardee() - : copy_to_survivor_space(o); + oop new_obj; + markOop m = o->mark_raw(); + if (m->is_marked()) { + new_obj = (oop) m->decode_pointer(); + } else { + new_obj = copy_to_survivor_space(o); + } // This code must come after the CAS test, or it will print incorrect // information. - if (log_develop_is_enabled(Trace, gc, scavenge) && o->is_forwarded()) { + if (log_develop_is_enabled(Trace, gc, scavenge) && m->is_marked()) { log_develop_trace(gc, scavenge)("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", "forwarding", new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); diff --git a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp index 0c58fd4b3f7..415990ff5fb 100644 --- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp +++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp @@ -104,8 +104,9 @@ class PSScavengeFromCLDClosure: public OopClosure { oop o = *p; oop new_obj; - if (o->is_forwarded()) { - new_obj = o->forwardee(); + markOop m = o->mark_raw(); + if (m->is_marked()) { + new_obj = (oop) m->decode_pointer(); } else { new_obj = _pm->copy_to_survivor_space(o); } diff --git a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp index 5241322a911..0ddabb4dae9 100644 --- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp +++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp @@ -192,8 +192,7 @@ void BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) { /* Normalize boolean value returned by unsafe operation, i.e., value != 0 ? value = true : value false. */ if (mask_boolean) { LabelObj* equalZeroLabel = new LabelObj(); - __ cmp(lir_cond_equal, result, 0); - __ branch(lir_cond_equal, T_BOOLEAN, equalZeroLabel->label()); + __ cmp_branch(lir_cond_equal, result, 0, T_BOOLEAN, equalZeroLabel->label()); __ move(LIR_OprFact::intConst(1), result); __ branch_destination(equalZeroLabel->label()); } @@ -320,14 +319,12 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) { referent_off = gen->new_register(T_LONG); __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); } - __ cmp(lir_cond_notEqual, offset, referent_off); - __ branch(lir_cond_notEqual, offset->type(), cont->label()); + __ cmp_branch(lir_cond_notEqual, offset, referent_off, offset->type(), cont->label()); } if (gen_source_check) { // offset is a const and equals referent offset // if (source == null) -> continue - __ cmp(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL)); - __ branch(lir_cond_equal, T_OBJECT, cont->label()); + __ cmp_branch(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, cont->label()); } LIR_Opr src_klass = gen->new_register(T_METADATA); if (gen_type_check) { @@ -337,8 +334,7 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) { LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE); LIR_Opr reference_type = gen->new_register(T_INT); __ move(reference_type_addr, reference_type); - __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); - __ branch(lir_cond_equal, T_INT, cont->label()); + __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, cont->label()); } } } diff --git a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp index 84815adea80..57e29f12955 100644 --- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp +++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp @@ -89,8 +89,7 @@ void CardTableBarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, L __ move(card_addr, cur_value); LabelObj* L_already_dirty = new LabelObj(); - __ cmp(lir_cond_equal, cur_value, dirty); - __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label()); + __ cmp_branch(lir_cond_equal, cur_value, dirty, T_BYTE, L_already_dirty->label()); __ move(dirty, card_addr); __ branch_destination(L_already_dirty->label()); } else { diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp index f51d186484c..506f0301fe2 100644 --- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp +++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp @@ -73,7 +73,6 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info, // Read the marking-in-progress flag. LIR_Opr flag_val = gen->new_register(T_INT); __ load(mark_active_flag_addr, flag_val); - __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); LIR_PatchCode pre_val_patch_code = lir_patch_none; @@ -101,7 +100,7 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info, slow = new ShenandoahPreBarrierStub(pre_val); } - __ branch(lir_cond_notEqual, T_INT, slow); + __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); __ branch_destination(slow->continuation()); } @@ -144,10 +143,9 @@ LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, L __ logical_and(flag_val, mask_reg, masked_flag); flag_val = masked_flag; } - __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2); - __ branch(lir_cond_notEqual, T_INT, slow); + __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); __ branch_destination(slow->continuation()); return result; diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp index 9f8ce742433..3c1862d826d 100644 --- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp @@ -105,15 +105,20 @@ class LIR_OpZLoadBarrierTest : public LIR_Op { virtual void visit(LIR_OpVisitState* state) { state->do_input(_opr); + if (_result->is_valid()) { + state->do_temp(_opr); + state->do_output(_result); + } } virtual void emit_code(LIR_Assembler* ce) { - ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr); + ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr, result_opr()); } virtual void print_instr(outputStream* out) const { _opr->print(out); out->print(" "); + result_opr()->print(out); } #ifndef PRODUCT @@ -149,13 +154,21 @@ address ZBarrierSetC1::load_barrier_on_oop_field_preloaded_runtime_stub(Decorato #endif void ZBarrierSetC1::load_barrier(LIRAccess& access, LIR_Opr result) const { + LIR_Op* op = new LIR_OpZLoadBarrierTest(result); + // Fast path - __ append(new LIR_OpZLoadBarrierTest(result)); + __ append(op); // Slow path const address runtime_stub = load_barrier_on_oop_field_preloaded_runtime_stub(access.decorators()); CodeStub* const stub = new ZLoadBarrierStubC1(access, result, runtime_stub); - __ branch(lir_cond_notEqual, T_ADDRESS, stub); + if (ZPlatformLoadBarrierTestResultInRegister) { + LIR_Opr res = access.gen()->new_register(result->type()); + op->set_result_opr(res); + __ cmp_branch(lir_cond_notEqual, res, LIR_OprFact::intptrConst(NULL_WORD), T_ADDRESS, stub); + } else { + __ branch(lir_cond_notEqual, T_ADDRESS, stub); + } __ branch_destination(stub->continuation()); } diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index 6483159136a..f40e304f9aa 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/systemDictionary.hpp" @@ -1497,7 +1503,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth // preparing the same method will be sure to see non-null entry & mirror. IRT_END -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) if (src_address == dest_address) { return; diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp index 87e84c893f2..3043fa634b0 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.hpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP @@ -146,7 +152,7 @@ class InterpreterRuntime: AllStatic { Method* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); #endif diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp index 965f6b0d102..07942993cdb 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP @@ -114,9 +120,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { void restore_native_result(void); #endif // SPARC -#ifdef AARCH64 +#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); -#endif // AARCH64 +#endif // AARCH64 || MIPS64 || LOONGARCH64 #ifdef PPC void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false); diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp index e01a242a57e..0661f3b9d1d 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP #define SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP @@ -102,7 +108,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) return true; -#elif defined(SPARC) || defined(ARM) || defined(AARCH64) +#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) return false; #else #warning "Unconfigured platform" diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index 89270633304..b5bb5c2887a 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "code/codeBlob.hpp" #include "compiler/abstractCompiler.hpp" @@ -715,6 +721,35 @@ #endif +#ifdef LOONGARCH64 + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + declare_constant(VM_Version::CPU_LA32) \ + declare_constant(VM_Version::CPU_LA64) \ + declare_constant(VM_Version::CPU_LLEXC) \ + declare_constant(VM_Version::CPU_SCDLY) \ + declare_constant(VM_Version::CPU_LLDBAR) \ + declare_constant(VM_Version::CPU_LBT_X86) \ + declare_constant(VM_Version::CPU_LBT_ARM) \ + declare_constant(VM_Version::CPU_LBT_MIPS) \ + declare_constant(VM_Version::CPU_CCDMA) \ + declare_constant(VM_Version::CPU_COMPLEX) \ + declare_constant(VM_Version::CPU_FP) \ + declare_constant(VM_Version::CPU_CRYPTO) \ + declare_constant(VM_Version::CPU_LSX) \ + declare_constant(VM_Version::CPU_LASX) \ + declare_constant(VM_Version::CPU_LAM) \ + declare_constant(VM_Version::CPU_LLSYNC) \ + declare_constant(VM_Version::CPU_TGTSYNC) \ + declare_constant(VM_Version::CPU_ULSYNC) \ + declare_constant(VM_Version::CPU_UAL) + +#endif + + #ifdef X86 #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp index c3a884fafe7..103789d9b16 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp @@ -1083,12 +1083,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // Don't use large pages for the class space. bool large_pages = false; -#if !(defined(AARCH64) || defined(PPC64)) +#if !(defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)) ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), _reserve_alignment, large_pages, requested_addr); -#else // AARCH64 || PPC64 +#else // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 ReservedSpace metaspace_rs; @@ -1114,7 +1114,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // below 32g to get a zerobased CCS. For simplicity we reuse the search // strategy for AARCH64. - size_t increment = AARCH64_ONLY(4*)G; + // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization. + size_t increment = AARCH64_ONLY(4*)MIPS64_ONLY(4*)LOONGARCH64_ONLY(4*)G; for (char *a = align_up(requested_addr, increment); a < (char*)(1024*G); a += increment) { @@ -1145,7 +1146,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a } } -#endif // AARCH64 || PPC64 +#endif // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 if (!metaspace_rs.is_reserved()) { #if INCLUDE_CDS diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp index 6c631f54584..98651067205 100644 --- a/src/hotspot/share/oops/oop.inline.hpp +++ b/src/hotspot/share/oops/oop.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP #define SHARE_VM_OOPS_OOP_INLINE_HPP @@ -389,7 +395,7 @@ oop oopDesc::forward_to_atomic(oop p, atomic_memory_order order) { // forwarding pointer. oldMark = curMark; } - return forwardee(); + return (oop)oldMark->decode_pointer(); } // Note that the forwardee is not the same thing as the displaced_mark. diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 569fbc6d69b..c1f1b82ffad 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -1204,7 +1204,7 @@ class Compile : public Phase { bool in_scratch_emit_size() const { return _in_scratch_emit_size; } enum ScratchBufferBlob { -#if defined(PPC64) +#if defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64) MAX_inst_size = 2048, #else MAX_inst_size = 1024, diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp index b6540e06a3a..52d1fc9fb9f 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/assembler.inline.hpp" #include "asm/macroAssembler.inline.hpp" @@ -731,6 +737,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { // Add the safepoint in the DebugInfoRecorder if( !mach->is_MachCall() ) { mcall = NULL; +#if defined(MIPS) || defined(LOONGARCH) + // safepoint_pc_offset should point to tha last instruction in safePoint. + // In X86 and sparc, their safePoints only contain one instruction. + // However, we should add current_offset with the size of safePoint in MIPS. + // 0x2d6ff22c: lw s2, 0x14(s2) + // last_pd->pc_offset()=308, pc_offset=304, bci=64 + // last_pd->pc_offset()=312, pc_offset=312, bci=64 + // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") + // + // ;; Safepoint: + // ---> pc_offset=304 + // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} + // ;*goto + // ; - java.util.Hashtable::get@64 (line 353) + // ---> last_pd(308) + // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto + // ; - java.util.Hashtable::get@64 (line 353) + // ; {poll} + // 0x2d6ff238: addiu s0, zero, 0x0 + safepoint_pc_offset += sfn->size(_regalloc) - 4; +#endif debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); } else { mcall = mach->as_MachCall(); @@ -1393,6 +1420,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { DEBUG_ONLY(uint instr_offset = cb->insts_size()); n->emit(*cb, _regalloc); current_offset = cb->insts_size(); +#if defined(MIPS) || defined(LOONGARCH) + if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { + // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime + // is not the instruction which access memory. adjust is needed. previous_offset points to the + // instruction which access memory. Instruction size is 4. cb->insts_size() and + // cb->insts()->end() are the location of current instruction. + int adjust = 4; + NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); + if (inst->is_sync()) { + // a sync may be the last instruction, see store_B_immI_enc_sync + adjust += 4; + inst = (NativeInstruction*) (cb->insts()->end() - 8); + } + previous_offset = current_offset - adjust; + } +#endif // Above we only verified that there is enough space in the instruction section. // However, the instruction may emit stubs that cause code buffer expansion. diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp index 7d767c47c97..23ec34e5e24 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "ci/ciMethodData.hpp" #include "ci/ciTypeFlow.hpp" @@ -78,6 +84,12 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ +#elif defined(LOONGARCH64) + { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ #else // all other { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp index 84123b29ecd..77fbacf2d8c 100644 --- a/src/hotspot/share/runtime/java.cpp +++ b/src/hotspot/share/runtime/java.cpp @@ -68,6 +68,7 @@ #include "runtime/thread.inline.hpp" #include "runtime/timer.hpp" #include "runtime/vmOperations.hpp" +#include "runtime/vmThread.hpp" #include "services/memTracker.hpp" #include "utilities/dtrace.hpp" #include "utilities/globalDefinitions.hpp" diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index ce23aafa8f3..d3dfb74d5b7 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2023, These + * modifications are Copyright (c) 2023, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/vmSymbols.hpp" #include "jfr/jfrEvents.hpp" @@ -308,6 +314,9 @@ void ObjectMonitor::enter(TRAPS) { } assert(_owner != Self, "invariant"); + // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr. + // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion. + DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) assert(_succ != Self, "invariant"); assert(Self->is_Java_thread(), "invariant"); JavaThread * jt = (JavaThread *) Self; @@ -469,6 +478,7 @@ void ObjectMonitor::EnterI(TRAPS) { } // The Spin failed -- Enqueue and park the thread ... + DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) assert(_succ != Self, "invariant"); assert(_owner != Self, "invariant"); assert(_Responsible != Self, "invariant"); diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index e0f4a2af1f7..09cc4b1ba5d 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "jvm.h" #include "classfile/classLoader.hpp" @@ -1242,7 +1248,8 @@ bool os::is_first_C_frame(frame* fr) { if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true; uintptr_t old_fp = (uintptr_t)fr->link_or_null(); - if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp || + // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs. + if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) || is_pointer_bad(fr->link_or_null())) return true; // stack grows downwards; if old_fp is below current fp or if the stack diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp index e086f794cd1..f4801957755 100644 --- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + + #include "precompiled.hpp" #include "jni.h" #include "runtime/interfaceSupport.inline.hpp" @@ -512,6 +519,14 @@ static int __ieee754_rem_pio2(double x, double *y) { * sin(x) = x + (S1*x + (x *(r-y/2)+y)) */ +#if defined(MIPS)|| defined(LOONGARCH) +#undef S1 +#undef S2 +#undef S3 +#undef S4 +#undef S5 +#undef S6 +#endif static const double S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index c758fc57432..a8c4638f6a2 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -1161,6 +1161,15 @@ inline int exact_log2_long(jlong x) { return log2_long(x); } +#if defined(MIPS64) || defined(LOONGARCH64) +// returns integer round-up to the nearest multiple of s (s must be a power of two) +inline intptr_t round_to(intptr_t x, uintx s) { + assert(is_power_of_2(s), "s must be a power of 2: " JLONG_FORMAT, x); + const uintx m = s - 1; + return mask_bits(x + m, ~m); +} +#endif + inline bool is_odd (intx x) { return x & 1; } inline bool is_even(intx x) { return !is_odd(x); } diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp index 6605ab367c7..5a2be6ef156 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_UTILITIES_MACROS_HPP #define SHARE_VM_UTILITIES_MACROS_HPP @@ -535,6 +541,38 @@ #define NOT_SPARC(code) code #endif +#ifdef MIPS64 +#ifndef MIPS +#define MIPS +#endif +#define MIPS64_ONLY(code) code +#define NOT_MIPS64(code) +#else +#undef MIPS +#define MIPS64_ONLY(code) +#define NOT_MIPS64(code) code +#endif + +#ifdef LOONGARCH64 +#ifndef LOONGARCH +#define LOONGARCH +#endif +#define LOONGARCH64_ONLY(code) code +#define NOT_LOONGARCH64(code) +#else +#undef LOONGARCH +#define LOONGARCH64_ONLY(code) +#define NOT_LOONGARCH64(code) code +#endif + +#if defined(MIPS64) || defined(LOONGARCH64) +#define LOONGARCH64_AND_MIPS64_ONLY(code) code +#define NOT_LOONGARCH64_AND_MIPS64(code) +#else +#define LOONGARCH64_AND_MIPS64_ONLY(code) +#define NOT_LOONGARCH64_AND_MIPS64(code) code +#endif + #if defined(PPC32) || defined(PPC64) #ifndef PPC #define PPC @@ -627,16 +665,34 @@ // OS_CPU_HEADER(vmStructs) --> vmStructs_linux_sparc.hpp // // basename.hpp / basename.inline.hpp +#if defined(MIPS) && !defined(ZERO) +#define CPU_HEADER_H(basename) XSTR(basename ## _mips.h) +#define CPU_HEADER(basename) XSTR(basename ## _mips.hpp) +#define CPU_HEADER_INLINE(basename) XSTR(basename ## _mips.inline.hpp) +#elif defined(LOONGARCH) && !defined(ZERO) +#define CPU_HEADER_H(basename) XSTR(basename ## _loongarch.h) +#define CPU_HEADER(basename) XSTR(basename ## _loongarch.hpp) +#define CPU_HEADER_INLINE(basename) XSTR(basename ## _loongarch.inline.hpp) +#else #define CPU_HEADER_H(basename) XSTR(CPU_HEADER_STEM(basename).h) #define CPU_HEADER(basename) XSTR(CPU_HEADER_STEM(basename).hpp) #define CPU_HEADER_INLINE(basename) XSTR(CPU_HEADER_STEM(basename).inline.hpp) +#endif // basename.hpp / basename.inline.hpp #define OS_HEADER_H(basename) XSTR(OS_HEADER_STEM(basename).h) #define OS_HEADER(basename) XSTR(OS_HEADER_STEM(basename).hpp) #define OS_HEADER_INLINE(basename) XSTR(OS_HEADER_STEM(basename).inline.hpp) // basename.hpp / basename.inline.hpp +#if defined(MIPS) && !defined(ZERO) +#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_mips.hpp) +#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp) +#elif defined(LOONGARCH) && !defined(ZERO) +#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_loongarch.hpp) +#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp) +#else #define OS_CPU_HEADER(basename) XSTR(OS_CPU_HEADER_STEM(basename).hpp) #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp) +#endif // basename.hpp / basename.inline.hpp #define COMPILER_HEADER(basename) XSTR(COMPILER_HEADER_STEM(basename).hpp) #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp) diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c index 0d834302c57..6afafea095f 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ + #include #include "libproc.h" #include "proc_service.h" @@ -54,10 +61,18 @@ #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h" #endif +#if defined(mips64) || defined(mips64el) +#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h" +#endif + #ifdef aarch64 #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif +#ifdef loongarch64 +#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -397,7 +412,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo return (err == PS_OK)? array : 0; } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) +#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(loongarch64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -425,8 +440,14 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG #endif +#ifdef loongarch64 +#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG +#endif #if defined(ppc64) || defined(ppc64le) #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG +#endif +#if defined(mips64) || defined(mips64el) +#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG #endif @@ -534,6 +555,18 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo } #endif /* aarch64 */ +#if defined(loongarch64) + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg + + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(PC)] = gregs.csr_era; + } +#endif /* loongarch64 */ + #if defined(ppc64) || defined(ppc64le) #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg @@ -574,6 +607,45 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #endif +#if defined(mips64) || defined(mips64el) + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg + + regs[REG_INDEX(ZERO)] = gregs.regs[0]; + regs[REG_INDEX(AT)] = gregs.regs[1]; + regs[REG_INDEX(V0)] = gregs.regs[2]; + regs[REG_INDEX(V1)] = gregs.regs[3]; + regs[REG_INDEX(A0)] = gregs.regs[4]; + regs[REG_INDEX(A1)] = gregs.regs[5]; + regs[REG_INDEX(A2)] = gregs.regs[6]; + regs[REG_INDEX(A3)] = gregs.regs[7]; + regs[REG_INDEX(T0)] = gregs.regs[8]; + regs[REG_INDEX(T1)] = gregs.regs[9]; + regs[REG_INDEX(T2)] = gregs.regs[10]; + regs[REG_INDEX(T3)] = gregs.regs[11]; + regs[REG_INDEX(T4)] = gregs.regs[12]; + regs[REG_INDEX(T5)] = gregs.regs[13]; + regs[REG_INDEX(T6)] = gregs.regs[14]; + regs[REG_INDEX(T7)] = gregs.regs[15]; + regs[REG_INDEX(S0)] = gregs.regs[16]; + regs[REG_INDEX(S1)] = gregs.regs[17]; + regs[REG_INDEX(S2)] = gregs.regs[18]; + regs[REG_INDEX(S3)] = gregs.regs[19]; + regs[REG_INDEX(S4)] = gregs.regs[20]; + regs[REG_INDEX(S5)] = gregs.regs[21]; + regs[REG_INDEX(S6)] = gregs.regs[22]; + regs[REG_INDEX(S7)] = gregs.regs[23]; + regs[REG_INDEX(T8)] = gregs.regs[24]; + regs[REG_INDEX(T9)] = gregs.regs[25]; + regs[REG_INDEX(K0)] = gregs.regs[26]; + regs[REG_INDEX(K1)] = gregs.regs[27]; + regs[REG_INDEX(GP)] = gregs.regs[28]; + regs[REG_INDEX(SP)] = gregs.regs[29]; + regs[REG_INDEX(FP)] = gregs.regs[30]; + regs[REG_INDEX(S8)] = gregs.regs[30]; + regs[REG_INDEX(RA)] = gregs.regs[31]; +#endif /* mips */ + (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); return array; } diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h index 8318e8e0213..07064e76ee1 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ + #ifndef _LIBPROC_H_ #define _LIBPROC_H_ @@ -37,13 +44,17 @@ #include #define user_regs_struct pt_regs #endif -#if defined(aarch64) || defined(arm64) +#if defined(aarch64) || defined(arm64) || defined(loongarch64) #include #define user_regs_struct user_pt_regs #elif defined(arm) #include #define user_regs_struct pt_regs #endif +#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) +#include +#define user_regs_struct pt_regs +#endif // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c index de5254d859e..eefe55959c0 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include #include #include @@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use #define PTRACE_GETREGS_REQ PT_GETREGS #endif -#ifdef PTRACE_GETREGS_REQ +#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); return false; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java index 0f5f0119c73..1b2f11a0658 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java @@ -23,6 +23,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ package sun.jvm.hotspot; import java.rmi.RemoteException; @@ -39,6 +45,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; +import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; +import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; import sun.jvm.hotspot.debugger.NoSuchSymbolException; import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; @@ -598,6 +606,10 @@ private void setupDebuggerLinux() { } else { machDesc = new MachineDescriptionSPARC32Bit(); } + } else if (cpu.equals("mips64")) { + machDesc = new MachineDescriptionMIPS64(); + } else if (cpu.equals("loongarch64")) { + machDesc = new MachineDescriptionLOONGARCH64(); } else { try { machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java new file mode 100644 index 00000000000..99cea8c7f14 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + + public boolean isBigEndian() { + return false; + } + + public boolean isLP64() { + return true; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java new file mode 100644 index 00000000000..1b49efd2017 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + + public boolean isBigEndian() { + return "big".equals(System.getProperty("sun.cpu.endian")); + } + + public boolean isLP64() { + return true; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java index 5e5a6bb7141..7d7f6424e66 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -23,6 +23,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package sun.jvm.hotspot.debugger.linux; import java.io.*; @@ -34,12 +40,16 @@ import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.loongarch64.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; +import sun.jvm.hotspot.debugger.linux.mips64.*; +import sun.jvm.hotspot.debugger.linux.loongarch64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -102,7 +112,21 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { Address pc = context.getRegisterAsAddress(SPARCThreadContext.R_O7); if (pc == null) return null; return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); - } else if (cpu.equals("ppc64")) { + } else if (cpu.equals("mips64")) { + MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + if (sp == null) return null; + Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); + if (pc == null) return null; + return new LinuxMIPS64CFrame(dbg, sp, pc); + } else if (cpu.equals("loongarch64")) { + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxLOONGARCH64CFrame(dbg, fp, pc); + } else if (cpu.equals("ppc64")) { PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext(); Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP); if (sp == null) return null; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java index 4b786eecc95..4ead33827cc 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package sun.jvm.hotspot.debugger.linux; import java.lang.reflect.*; @@ -30,6 +36,8 @@ import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; +import sun.jvm.hotspot.debugger.linux.mips64.*; +import sun.jvm.hotspot.debugger.linux.loongarch64.*; class LinuxThreadContextFactory { static ThreadContext createThreadContext(LinuxDebugger dbg) { @@ -40,7 +48,11 @@ static ThreadContext createThreadContext(LinuxDebugger dbg) { return new LinuxAMD64ThreadContext(dbg); } else if (cpu.equals("sparc")) { return new LinuxSPARCThreadContext(dbg); - } else if (cpu.equals("ppc64")) { + } else if (cpu.equals("mips64")) { + return new LinuxMIPS64ThreadContext(dbg); + } else if (cpu.equals("loongarch64")) { + return new LinuxLOONGARCH64ThreadContext(dbg); + } else if (cpu.equals("ppc64")) { return new LinuxPPC64ThreadContext(dbg); } else { try { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java new file mode 100644 index 00000000000..0e6caee5a49 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.loongarch64.*; + +final public class LinuxLOONGARCH64CFrame extends BasicCFrame { + // package/class internals only + public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + Address nextFP; + Address nextPC; + + if ((fp == null) || fp.lessThan(sp)) { + return null; + } + + try { + nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); + } catch (Exception e) { + return null; + } + if (nextFP == null) { + return null; + } + + try { + nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); + } catch (Exception e) { + return null; + } + if (nextPC == null) { + return null; + } + + return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); + } + + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address fp; + private LinuxDebugger dbg; +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..604642598e0 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private LinuxDebugger debugger; + + public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java new file mode 100644 index 00000000000..2e3eb564da2 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.mips64.*; + +final public class LinuxMIPS64CFrame extends BasicCFrame { + // package/class internals only + public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { + super(dbg.getCDebugger()); + this.ebp = ebp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return ebp; + } + + public CFrame sender(ThreadProxy thread) { + MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); + Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + + if ( (ebp == null) || ebp.lessThan(esp) ) { + return null; + } + + Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); + if (nextEBP == null) { + return null; + } + Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); + } + + private static final int ADDRESS_SIZE = 4; + private Address pc; + private Address ebp; + private LinuxDebugger dbg; +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java new file mode 100644 index 00000000000..98e0f3f0bcf --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { + private LinuxDebugger debugger; + + public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..1de3cb1a472 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.loongarch64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on loongarch64 platforms; only a sub-portion + of the context is guaranteed to be present on all operating + systems. */ + +public abstract class LOONGARCH64ThreadContext implements ThreadContext { + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work): EAX, EBX, + // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int ZERO = 0; + public static final int RA = 1; + public static final int TP = 2; + public static final int SP = 3; + public static final int A0 = 4; + public static final int A1 = 5; + public static final int A2 = 6; + public static final int A3 = 7; + public static final int A4 = 8; + public static final int A5 = 9; + public static final int A6 = 10; + public static final int A7 = 11; + public static final int T0 = 12; + public static final int T1 = 13; + public static final int T2 = 14; + public static final int T3 = 15; + public static final int T4 = 16; + public static final int T5 = 17; + public static final int T6 = 18; + public static final int T7 = 19; + public static final int T8 = 20; + public static final int RX = 21; + public static final int FP = 22; + public static final int S0 = 23; + public static final int S1 = 24; + public static final int S2 = 25; + public static final int S3 = 26; + public static final int S4 = 27; + public static final int S5 = 28; + public static final int S6 = 29; + public static final int S7 = 30; + public static final int S8 = 31; + public static final int PC = 32; + public static final int NPRGREG = 33; + + private static final String[] regNames = { + "ZERO", "RA", "TP", "SP", + "A0", "A1", "A2", "A3", + "A4", "A5", "A6", "A7", + "T0", "T1", "T2", "T3", + "T4", "T5", "T6", "T7", + "T8", "RX", "FP", "S0", + "S1", "S2", "S3", "S4", + "S5", "S6", "S7", "S8", + "PC" + }; + + private long[] data; + + public LOONGARCH64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + return regNames[index]; + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java new file mode 100644 index 00000000000..d3479a65ea0 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.mips64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on mips64 platforms; only a sub-portion + of the context is guaranteed to be present on all operating + systems. */ + +public abstract class MIPS64ThreadContext implements ThreadContext { + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work): EAX, EBX, + // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int ZERO = 0; + public static final int AT = 1; + public static final int V0 = 2; + public static final int V1 = 3; + public static final int A0 = 4; + public static final int A1 = 5; + public static final int A2 = 6; + public static final int A3 = 7; + public static final int T0 = 8; + public static final int T1 = 9; + public static final int T2 = 10; + public static final int T3 = 11; + public static final int T4 = 12; + public static final int T5 = 13; + public static final int T6 = 14; + public static final int T7 = 15; + public static final int S0 = 16; + public static final int S1 = 17; + public static final int S2 = 18; + public static final int S3 = 19; + public static final int S4 = 20; + public static final int S5 = 21; + public static final int S6 = 22; + public static final int S7 = 23; + public static final int T8 = 24; + public static final int T9 = 25; + public static final int K0 = 26; + public static final int K1 = 27; + public static final int GP = 28; + public static final int SP = 29; + public static final int FP = 30; + public static final int RA = 31; + public static final int PC = 32; + public static final int NPRGREG = 33; + + private static final String[] regNames = { + "ZERO", "AT", "V0", "V1", + "A0", "A1", "A2", "A3", + "T0", "T1", "T2", "T3", + "T4", "T5", "T6", "T7", + "S0", "S1", "S2", "S3", + "S4", "S5", "S6", "S7", + "T8", "T9", "K0", "K1", + "GP", "SP", "FP", "RA", + "PC" + }; + + private long[] data; + + public MIPS64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + return regNames[index]; + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java index 7113a3a497b..de47531db7c 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package sun.jvm.hotspot.debugger.posix.elf; import java.io.FileInputStream; @@ -63,6 +69,8 @@ public interface ELFHeader { public static final int ARCH_i860 = 7; /** MIPS architecture type. */ public static final int ARCH_MIPS = 8; + /** LOONGARCH architecture type. */ + public static final int ARCH_LOONGARCH = 9; /** Returns a file type which is defined by the file type constants. */ public short getFileType(); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java index 74e957d94b8..46ece3611fb 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java @@ -32,11 +32,13 @@ import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.proc.amd64.*; import sun.jvm.hotspot.debugger.proc.aarch64.*; +import sun.jvm.hotspot.debugger.proc.mips64.*; import sun.jvm.hotspot.debugger.proc.sparc.*; import sun.jvm.hotspot.debugger.proc.ppc64.*; import sun.jvm.hotspot.debugger.proc.x86.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.mips64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.x86.*; @@ -90,6 +92,10 @@ public ProcDebuggerLocal(MachineDescription machDesc, boolean useCache) { threadFactory = new ProcAMD64ThreadFactory(this); pcRegIndex = AMD64ThreadContext.RIP; fpRegIndex = AMD64ThreadContext.RBP; + } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { + threadFactory = new ProcMIPS64ThreadFactory(this); + pcRegIndex = MIPS64ThreadContext.PC; + fpRegIndex = MIPS64ThreadContext.FP; } else if (cpu.equals("aarch64")) { threadFactory = new ProcAARCH64ThreadFactory(this); pcRegIndex = AARCH64ThreadContext.PC; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java new file mode 100644 index 00000000000..1f60fa6cfb2 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcLOONGARCH64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + /* + _NGREG in reg.h is defined to be 19. Because we have included + debug registers LOONGARCH64ThreadContext.NPRGREG is 25. + */ + + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { + return false; + } + + return (((ProcLOONGARCH64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..ef5597ac4e9 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private ProcDebugger debugger; + + public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java new file mode 100644 index 00000000000..abad1bb38b7 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcLOONGARCH64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java new file mode 100644 index 00000000000..5c1e0be8932 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcMIPS64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcMIPS64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + /* + _NGREG in reg.h is defined to be 19. Because we have included + debug registers MIPS64ThreadContext.NPRGREG is 25. + */ + + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { + return false; + } + + return (((ProcMIPS64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java new file mode 100644 index 00000000000..d44223d768a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { + private ProcDebugger debugger; + + public ProcMIPS64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java new file mode 100644 index 00000000000..bad478fc5ca --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcMIPS64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcMIPS64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcMIPS64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcMIPS64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java index b6253f6d63d..5eecb08a10b 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package sun.jvm.hotspot.debugger.remote; import java.rmi.*; @@ -34,6 +40,8 @@ import sun.jvm.hotspot.debugger.remote.x86.*; import sun.jvm.hotspot.debugger.remote.amd64.*; import sun.jvm.hotspot.debugger.remote.ppc64.*; +import sun.jvm.hotspot.debugger.remote.mips64.*; +import sun.jvm.hotspot.debugger.remote.loongarch64.*; /** An implementation of Debugger which wraps a RemoteDebugger, providing remote debugging via RMI. @@ -76,6 +84,16 @@ public RemoteDebuggerClient(RemoteDebugger remoteDebugger) throws DebuggerExcept cachePageSize = 4096; cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); unalignedAccessesOkay = true; + } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { + threadFactory = new RemoteMIPS64ThreadFactory(this); + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; + } else if (cpu.equals("loongarch64")) { + threadFactory = new RemoteLOONGARCH64ThreadFactory(this); + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; } else { try { Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java new file mode 100644 index 00000000000..242dd279e1a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteLOONGARCH64Thread extends RemoteThread { + public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..634d5ad049f --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java new file mode 100644 index 00000000000..4fb9cc7c069 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteLOONGARCH64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java new file mode 100644 index 00000000000..c2f7d841f20 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteMIPS64Thread extends RemoteThread { + public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java new file mode 100644 index 00000000000..23646905d74 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java new file mode 100644 index 00000000000..b39b0144901 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteMIPS64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java index 190062785a7..04681fa0e78 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package sun.jvm.hotspot.runtime; import java.util.*; @@ -39,6 +45,8 @@ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; @@ -99,6 +107,10 @@ private static synchronized void initialize(TypeDataBase db) { access = new LinuxPPC64JavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); + } else if (cpu.equals("mips64")) { + access = new LinuxMIPS64JavaThreadPDAccess(); + } else if (cpu.equals("loongarch64")) { + access = new LinuxLOONGARCH64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java new file mode 100644 index 00000000000..ee1003e352a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_loongarch64; + +import java.io.*; +import java.util.*; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.loongarch64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new LOONGARCH64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); + LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); + // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java new file mode 100644 index 00000000000..181f431b64b --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_mips64; + +import java.io.*; +import java.util.*; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.mips64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return null; + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new MIPS64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new MIPS64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); + MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new MIPS64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); + // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(MIPS64ThreadContext.SP); + } + + public Address getLastFP(Address addr) { + return getLastSP(addr).getAddressAt(0); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java new file mode 100644 index 00000000000..824270e1329 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; + +/**

Should be able to be used on all loongarch64 platforms we support + (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's + "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; + output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the + LOONGARCH64Frame is left to the caller, since we may need to subclass + LOONGARCH64Frame to support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated EBP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class LOONGARCH64CurrentFrameGuess { + private LOONGARCH64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") + != null; + + public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame eithe + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + // Bail out + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable EBP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from ESP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new LOONGARCH64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from LOONGARCH64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved ESP and + // EBP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + // We found a PC in the frame anchor. Check that it's plausible, and + // if it is, use it. + if (vm.isJavaPCDbg(pc)) { + setValues(sp, fp, pc); + } else { + setValues(sp, fp, null); + } + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct LOONGARCH64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java new file mode 100644 index 00000000000..058afc94d08 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the loongarch64 family of CPUs. */ + +public class LOONGARCH64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; + } + + // Java frames + private static final int JAVA_FRAME_LINK_OFFSET = 0; + private static final int JAVA_FRAME_RETURN_ADDR_OFFSET = 1; + private static final int JAVA_FRAME_SENDER_SP_OFFSET = 2; + + // Native frames + private static final int NATIVE_FRAME_LINK_OFFSET = -2; + private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET = -1; + private static final int NATIVE_FRAME_SENDER_SP_OFFSET = 0; + + // Interpreter frames + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; + private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + + // Entry frames + private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9; + + private static VMReg fp = new VMReg(22 << 1); + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private LOONGARCH64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + LOONGARCH64Frame frame = new LOONGARCH64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof LOONGARCH64Frame)) { + return false; + } + + LOONGARCH64Frame other = (LOONGARCH64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + LOONGARCH64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // On loongarch, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); + } + } + } + + private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = getSenderSP(); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET)); + + return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // On Intel the return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + // FIXME + // Check for null ebp? Need to do some tests. + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0); + return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0); + } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET); + return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET); + } + + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + public Address getSenderSP() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET); + return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET); + } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + if (getFP() != null) { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } else { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java new file mode 100644 index 00000000000..0625e10a411 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public LOONGARCH64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java new file mode 100644 index 00000000000..2cf904d3885 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class LOONGARCH64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected LOONGARCH64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java new file mode 100644 index 00000000000..c11458abe2c --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; + +/**

Should be able to be used on all mips64 platforms we support + (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's + "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; + output is SP, FP, and PC for an MIPS64Frame. Instantiation of the + MIPS64Frame is left to the caller, since we may need to subclass + MIPS64Frame to support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated EBP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class MIPS64CurrentFrameGuess { + private MIPS64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") + != null; + + public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame eithe + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + // Bail out + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable EBP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from ESP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new MIPS64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from MIPS64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved ESP and + // EBP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct MIPS64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java new file mode 100644 index 00000000000..65d88016ea7 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the mips64 family of CPUs. */ + +public class MIPS64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MIRROR_OFFSET; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; + + private static VMReg rbp; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + + ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); + if (VM.getVM().getAddressSize() == 4) { + rbp = new VMReg(5); + } else { + rbp = new VMReg(5 << 1); + } + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private MIPS64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public MIPS64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + MIPS64Frame frame = new MIPS64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof MIPS64Frame)) { + return false; + } + + MIPS64Frame other = (MIPS64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet (should be done for Solaris/MIPS) + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(MIPS64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + MIPS64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // On mips, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); + } + } + } + + private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(rbp, savedFPAddr); + } + + private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // On Intel the return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + // FIXME + // Check for null ebp? Need to do some tests. + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + if (getFP() != null) { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } else { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java new file mode 100644 index 00000000000..dfe3066af03 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class MIPS64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public MIPS64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java new file mode 100644 index 00000000000..f2da760af4a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class MIPS64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected MIPS64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + MIPS64RegisterMap retval = new MIPS64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java index 7d7a6107cab..06d79318d98 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ + package sun.jvm.hotspot.utilities; /** Provides canonicalized OS and CPU information for the rest of the @@ -54,7 +61,7 @@ public static String getOS() throws UnsupportedPlatformException { public static boolean knownCPU(String cpu) { final String[] KNOWN = - new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; + new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"}; for(String s : KNOWN) { if(s.equals(cpu)) @@ -101,6 +108,12 @@ public static String getCPU() throws UnsupportedPlatformException { if (cpu.equals("ppc64le")) return "ppc64"; + if (cpu.equals("mips64el")) + return "mips64"; + + if (cpu.equals("loongarch64")) + return "loongarch64"; + return cpu; } diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java new file mode 100644 index 00000000000..0d3953ddfff --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.vm.ci.hotspot.loongarch64; + +import static java.util.Collections.emptyMap; +import static jdk.vm.ci.common.InitTimer.timer; + +import java.util.EnumSet; +import java.util.Map; + +import jdk.vm.ci.loongarch64.LoongArch64; +import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature; +import jdk.vm.ci.code.Architecture; +import jdk.vm.ci.code.RegisterConfig; +import jdk.vm.ci.code.TargetDescription; +import jdk.vm.ci.code.stack.StackIntrospection; +import jdk.vm.ci.common.InitTimer; +import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; +import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; +import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory; +import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; +import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider; +import jdk.vm.ci.hotspot.HotSpotStackIntrospection; +import jdk.vm.ci.meta.ConstantReflectionProvider; +import jdk.vm.ci.runtime.JVMCIBackend; + +public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory { + + protected EnumSet computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { + // Configure the feature set using the HotSpot flag settings. + EnumSet features = EnumSet.noneOf(LoongArch64.CPUFeature.class); + + if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) { + features.add(LoongArch64.CPUFeature.LA32); + } + + if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) { + features.add(LoongArch64.CPUFeature.LA64); + } + + if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) { + features.add(LoongArch64.CPUFeature.LLEXC); + } + + if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) { + features.add(LoongArch64.CPUFeature.SCDLY); + } + + if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) { + features.add(LoongArch64.CPUFeature.LLDBAR); + } + + if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) { + features.add(LoongArch64.CPUFeature.LBT_X86); + } + + if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) { + features.add(LoongArch64.CPUFeature.LBT_ARM); + } + + if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) { + features.add(LoongArch64.CPUFeature.LBT_MIPS); + } + + if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) { + features.add(LoongArch64.CPUFeature.CCDMA); + } + + if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) { + features.add(LoongArch64.CPUFeature.COMPLEX); + } + + if ((config.vmVersionFeatures & config.loongarch64FP) != 0) { + features.add(LoongArch64.CPUFeature.FP); + } + + if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) { + features.add(LoongArch64.CPUFeature.CRYPTO); + } + + if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) { + features.add(LoongArch64.CPUFeature.LSX); + } + + if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) { + features.add(LoongArch64.CPUFeature.LASX); + } + + if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) { + features.add(LoongArch64.CPUFeature.LAM); + } + + if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) { + features.add(LoongArch64.CPUFeature.LLSYNC); + } + + if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) { + features.add(LoongArch64.CPUFeature.TGTSYNC); + } + + if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) { + features.add(LoongArch64.CPUFeature.ULSYNC); + } + + if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) { + features.add(LoongArch64.CPUFeature.UAL); + } + + return features; + } + + protected EnumSet computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { + EnumSet flags = EnumSet.noneOf(LoongArch64.Flag.class); + + if (config.useLSX) { + flags.add(LoongArch64.Flag.useLSX); + } + + if (config.useLASX) { + flags.add(LoongArch64.Flag.useLASX); + } + + return flags; + } + + protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) { + final int stackFrameAlignment = 16; + final int implicitNullCheckLimit = 4096; + final boolean inlineObjects = true; + Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config)); + return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); + } + + protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) { + return new HotSpotConstantReflectionProvider(runtime); + } + + protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) { + return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops); + } + + protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) { + return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig); + } + + protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) { + return new HotSpotMetaAccessProvider(runtime); + } + + @Override + public String getArchitecture() { + return "loongarch64"; + } + + @Override + public String toString() { + return "JVMCIBackend:" + getArchitecture(); + } + + @Override + @SuppressWarnings("try") + public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) { + + assert host == null; + LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore()); + TargetDescription target = createTarget(config); + + RegisterConfig regConfig; + HotSpotCodeCacheProvider codeCache; + ConstantReflectionProvider constantReflection; + HotSpotMetaAccessProvider metaAccess; + StackIntrospection stackIntrospection; + try (InitTimer t = timer("create providers")) { + try (InitTimer rt = timer("create MetaAccess provider")) { + metaAccess = createMetaAccess(runtime); + } + try (InitTimer rt = timer("create RegisterConfig")) { + regConfig = createRegisterConfig(config, target); + } + try (InitTimer rt = timer("create CodeCache provider")) { + codeCache = createCodeCache(runtime, target, regConfig); + } + try (InitTimer rt = timer("create ConstantReflection provider")) { + constantReflection = createConstantReflection(runtime); + } + try (InitTimer rt = timer("create StackIntrospection provider")) { + stackIntrospection = new HotSpotStackIntrospection(runtime); + } + } + try (InitTimer rt = timer("instantiate backend")) { + return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection); + } + } + + protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, + StackIntrospection stackIntrospection) { + return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection); + } +} diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java new file mode 100644 index 00000000000..2ee6a4b8472 --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.vm.ci.hotspot.loongarch64; + +import static jdk.vm.ci.loongarch64.LoongArch64.ra; +import static jdk.vm.ci.loongarch64.LoongArch64.a0; +import static jdk.vm.ci.loongarch64.LoongArch64.a1; +import static jdk.vm.ci.loongarch64.LoongArch64.a2; +import static jdk.vm.ci.loongarch64.LoongArch64.a3; +import static jdk.vm.ci.loongarch64.LoongArch64.a4; +import static jdk.vm.ci.loongarch64.LoongArch64.a5; +import static jdk.vm.ci.loongarch64.LoongArch64.a6; +import static jdk.vm.ci.loongarch64.LoongArch64.a7; +import static jdk.vm.ci.loongarch64.LoongArch64.SCR1; +import static jdk.vm.ci.loongarch64.LoongArch64.SCR2; +import static jdk.vm.ci.loongarch64.LoongArch64.t0; +import static jdk.vm.ci.loongarch64.LoongArch64.v0; +import static jdk.vm.ci.loongarch64.LoongArch64.s5; +import static jdk.vm.ci.loongarch64.LoongArch64.s6; +import static jdk.vm.ci.loongarch64.LoongArch64.sp; +import static jdk.vm.ci.loongarch64.LoongArch64.fp; +import static jdk.vm.ci.loongarch64.LoongArch64.tp; +import static jdk.vm.ci.loongarch64.LoongArch64.rx; +import static jdk.vm.ci.loongarch64.LoongArch64.f0; +import static jdk.vm.ci.loongarch64.LoongArch64.f1; +import static jdk.vm.ci.loongarch64.LoongArch64.f2; +import static jdk.vm.ci.loongarch64.LoongArch64.f3; +import static jdk.vm.ci.loongarch64.LoongArch64.f4; +import static jdk.vm.ci.loongarch64.LoongArch64.f5; +import static jdk.vm.ci.loongarch64.LoongArch64.f6; +import static jdk.vm.ci.loongarch64.LoongArch64.f7; +import static jdk.vm.ci.loongarch64.LoongArch64.fv0; +import static jdk.vm.ci.loongarch64.LoongArch64.zero; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import jdk.vm.ci.loongarch64.LoongArch64; +import jdk.vm.ci.code.Architecture; +import jdk.vm.ci.code.CallingConvention; +import jdk.vm.ci.code.CallingConvention.Type; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.code.RegisterArray; +import jdk.vm.ci.code.RegisterAttributes; +import jdk.vm.ci.code.RegisterConfig; +import jdk.vm.ci.code.StackSlot; +import jdk.vm.ci.code.TargetDescription; +import jdk.vm.ci.code.ValueKindFactory; +import jdk.vm.ci.common.JVMCIError; +import jdk.vm.ci.hotspot.HotSpotCallingConventionType; +import jdk.vm.ci.meta.AllocatableValue; +import jdk.vm.ci.meta.JavaKind; +import jdk.vm.ci.meta.JavaType; +import jdk.vm.ci.meta.PlatformKind; +import jdk.vm.ci.meta.Value; +import jdk.vm.ci.meta.ValueKind; + +public class LoongArch64HotSpotRegisterConfig implements RegisterConfig { + + private final TargetDescription target; + + private final RegisterArray allocatable; + + /** + * The caller saved registers always include all parameter registers. + */ + private final RegisterArray callerSaved; + + private final boolean allAllocatableAreCallerSaved; + + private final RegisterAttributes[] attributesMap; + + @Override + public RegisterArray getAllocatableRegisters() { + return allocatable; + } + + @Override + public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) { + ArrayList list = new ArrayList<>(); + for (Register reg : registers) { + if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) { + list.add(reg); + } + } + + return new RegisterArray(list); + } + + @Override + public RegisterAttributes[] getAttributesMap() { + return attributesMap.clone(); + } + + private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7); + private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7); + private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7); + + public static final Register heapBaseRegister = s5; + public static final Register TREG = s6; + + private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG); + + private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) { + RegisterArray allRegisters = arch.getAvailableValueRegisters(); + Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)]; + List reservedRegistersList = reservedRegisters.asList(); + + int idx = 0; + for (Register reg : allRegisters) { + if (reservedRegistersList.contains(reg)) { + // skip reserved registers + continue; + } + if (reserveForHeapBase && reg.equals(heapBaseRegister)) { + // skip heap base register + continue; + } + + registers[idx++] = reg; + } + + assert idx == registers.length; + return new RegisterArray(registers); + } + + public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) { + this(target, initAllocatable(target.arch, useCompressedOops)); + assert callerSaved.size() >= allocatable.size(); + } + + public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) { + this.target = target; + + this.allocatable = allocatable; + Set callerSaveSet = new HashSet<>(); + allocatable.addTo(callerSaveSet); + floatParameterRegisters.addTo(callerSaveSet); + javaGeneralParameterRegisters.addTo(callerSaveSet); + nativeGeneralParameterRegisters.addTo(callerSaveSet); + callerSaved = new RegisterArray(callerSaveSet); + + allAllocatableAreCallerSaved = true; + attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters); + } + + @Override + public RegisterArray getCallerSaveRegisters() { + return callerSaved; + } + + @Override + public RegisterArray getCalleeSaveRegisters() { + return null; + } + + @Override + public boolean areAllAllocatableRegistersCallerSaved() { + return allAllocatableAreCallerSaved; + } + + @Override + public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory valueKindFactory) { + HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; + if (type == HotSpotCallingConventionType.NativeCall) { + return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); + } + // On x64, parameter locations are the same whether viewed + // from the caller or callee perspective + return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); + } + + @Override + public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) { + HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; + switch (kind) { + case Boolean: + case Byte: + case Short: + case Char: + case Int: + case Long: + case Object: + return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; + case Float: + case Double: + return floatParameterRegisters; + default: + throw JVMCIError.shouldNotReachHere(); + } + } + + private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type, + ValueKindFactory valueKindFactory) { + AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; + + int currentGeneral = 0; + int currentFloat = 0; + int currentStackOffset = 0; + + for (int i = 0; i < parameterTypes.length; i++) { + final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind(); + + switch (kind) { + case Byte: + case Boolean: + case Short: + case Char: + case Int: + case Long: + case Object: + if (currentGeneral < generalParameterRegisters.size()) { + Register register = generalParameterRegisters.get(currentGeneral++); + locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); + } + break; + case Float: + case Double: + if (currentFloat < floatParameterRegisters.size()) { + Register register = floatParameterRegisters.get(currentFloat++); + locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); + } else if (currentGeneral < generalParameterRegisters.size()) { + Register register = generalParameterRegisters.get(currentGeneral++); + locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); + } + break; + default: + throw JVMCIError.shouldNotReachHere(); + } + + if (locations[i] == null) { + ValueKind valueKind = valueKindFactory.getValueKind(kind); + locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out); + currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize); + } + } + + JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind(); + AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind())); + return new CallingConvention(currentStackOffset, returnLocation, locations); + } + + @Override + public Register getReturnRegister(JavaKind kind) { + switch (kind) { + case Boolean: + case Byte: + case Char: + case Short: + case Int: + case Long: + case Object: + return v0; + case Float: + case Double: + return fv0; + case Void: + case Illegal: + return null; + default: + throw new UnsupportedOperationException("no return register for type " + kind); + } + } + + @Override + public Register getFrameRegister() { + return sp; + } + + @Override + public String toString() { + return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave: " + getCallerSaveRegisters() + "%n"); + } +} diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java new file mode 100644 index 00000000000..c8605976a0d --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.vm.ci.hotspot.loongarch64; + +import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; +import jdk.vm.ci.hotspot.HotSpotVMConfigStore; +import jdk.vm.ci.services.Services; + +/** + * Used to access native configuration details. + * + * All non-static, public fields in this class are so that they can be compiled as constants. + */ +class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess { + + LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) { + super(config); + } + + final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); + + // CPU Capabilities + + /* + * These flags are set based on the corresponding command line flags. + */ + final boolean useLSX = getFlag("UseLSX", Boolean.class); + final boolean useLASX = getFlag("UseLASX", Boolean.class); + + final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t"); + + /* + * These flags are set if the corresponding support is in the hardware. + */ + // Checkstyle: stop + final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class); + final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class); + final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class); + final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class); + final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class); + final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class); + final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class); + final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class); + final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class); + final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class); + final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class); + final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class); + final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class); + final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class); + final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class); + final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class); + final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class); + final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class); + final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class); + // Checkstyle: resume +} diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java new file mode 100644 index 00000000000..1048ea9d64b --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * The LoongArch64 HotSpot specific portions of the JVMCI API. + */ +package jdk.vm.ci.hotspot.loongarch64; diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java new file mode 100644 index 00000000000..1bb12e7a5f7 --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.vm.ci.loongarch64; + +import java.nio.ByteOrder; +import java.util.EnumSet; + +import jdk.vm.ci.code.Architecture; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.code.Register.RegisterCategory; +import jdk.vm.ci.code.RegisterArray; +import jdk.vm.ci.meta.JavaKind; +import jdk.vm.ci.meta.PlatformKind; + +/** + * Represents the LoongArch64 architecture. + */ +public class LoongArch64 extends Architecture { + + public static final RegisterCategory CPU = new RegisterCategory("CPU"); + + // General purpose CPU registers + public static final Register zero = new Register(0, 0, "r0", CPU); + public static final Register ra = new Register(1, 1, "r1", CPU); + public static final Register tp = new Register(2, 2, "r2", CPU); + public static final Register sp = new Register(3, 3, "r3", CPU); + public static final Register a0 = new Register(4, 4, "r4", CPU); + public static final Register a1 = new Register(5, 5, "r5", CPU); + public static final Register a2 = new Register(6, 6, "r6", CPU); + public static final Register a3 = new Register(7, 7, "r7", CPU); + public static final Register a4 = new Register(8, 8, "r8", CPU); + public static final Register a5 = new Register(9, 9, "r9", CPU); + public static final Register a6 = new Register(10, 10, "r10", CPU); + public static final Register a7 = new Register(11, 11, "r11", CPU); + public static final Register t0 = new Register(12, 12, "r12", CPU); + public static final Register t1 = new Register(13, 13, "r13", CPU); + public static final Register t2 = new Register(14, 14, "r14", CPU); + public static final Register t3 = new Register(15, 15, "r15", CPU); + public static final Register t4 = new Register(16, 16, "r16", CPU); + public static final Register t5 = new Register(17, 17, "r17", CPU); + public static final Register t6 = new Register(18, 18, "r18", CPU); + public static final Register t7 = new Register(19, 19, "r19", CPU); + public static final Register t8 = new Register(20, 20, "r20", CPU); + public static final Register rx = new Register(21, 21, "r21", CPU); + public static final Register fp = new Register(22, 22, "r22", CPU); + public static final Register s0 = new Register(23, 23, "r23", CPU); + public static final Register s1 = new Register(24, 24, "r24", CPU); + public static final Register s2 = new Register(25, 25, "r25", CPU); + public static final Register s3 = new Register(26, 26, "r26", CPU); + public static final Register s4 = new Register(27, 27, "r27", CPU); + public static final Register s5 = new Register(28, 28, "r28", CPU); + public static final Register s6 = new Register(29, 29, "r29", CPU); + public static final Register s7 = new Register(30, 30, "r30", CPU); + public static final Register s8 = new Register(31, 31, "r31", CPU); + + public static final Register SCR1 = t7; + public static final Register SCR2 = t4; + public static final Register v0 = a0; + + // @formatter:off + public static final RegisterArray cpuRegisters = new RegisterArray( + zero, ra, tp, sp, a0, a1, a2, a3, + a4, a5, a6, a7, t0, t1, t2, t3, + t4, t5, t6, t7, t8, rx, fp, s0, + s1, s2, s3, s4, s5, s6, s7, s8 + ); + // @formatter:on + + public static final RegisterCategory SIMD = new RegisterCategory("SIMD"); + + // Simd registers + public static final Register f0 = new Register(32, 0, "f0", SIMD); + public static final Register f1 = new Register(33, 1, "f1", SIMD); + public static final Register f2 = new Register(34, 2, "f2", SIMD); + public static final Register f3 = new Register(35, 3, "f3", SIMD); + public static final Register f4 = new Register(36, 4, "f4", SIMD); + public static final Register f5 = new Register(37, 5, "f5", SIMD); + public static final Register f6 = new Register(38, 6, "f6", SIMD); + public static final Register f7 = new Register(39, 7, "f7", SIMD); + public static final Register f8 = new Register(40, 8, "f8", SIMD); + public static final Register f9 = new Register(41, 9, "f9", SIMD); + public static final Register f10 = new Register(42, 10, "f10", SIMD); + public static final Register f11 = new Register(43, 11, "f11", SIMD); + public static final Register f12 = new Register(44, 12, "f12", SIMD); + public static final Register f13 = new Register(45, 13, "f13", SIMD); + public static final Register f14 = new Register(46, 14, "f14", SIMD); + public static final Register f15 = new Register(47, 15, "f15", SIMD); + public static final Register f16 = new Register(48, 16, "f16", SIMD); + public static final Register f17 = new Register(49, 17, "f17", SIMD); + public static final Register f18 = new Register(50, 18, "f18", SIMD); + public static final Register f19 = new Register(51, 19, "f19", SIMD); + public static final Register f20 = new Register(52, 20, "f20", SIMD); + public static final Register f21 = new Register(53, 21, "f21", SIMD); + public static final Register f22 = new Register(54, 22, "f22", SIMD); + public static final Register f23 = new Register(55, 23, "f23", SIMD); + public static final Register f24 = new Register(56, 24, "f24", SIMD); + public static final Register f25 = new Register(57, 25, "f25", SIMD); + public static final Register f26 = new Register(58, 26, "f26", SIMD); + public static final Register f27 = new Register(59, 27, "f27", SIMD); + public static final Register f28 = new Register(60, 28, "f28", SIMD); + public static final Register f29 = new Register(61, 29, "f29", SIMD); + public static final Register f30 = new Register(62, 30, "f30", SIMD); + public static final Register f31 = new Register(63, 31, "f31", SIMD); + + public static final Register fv0 = f0; + + // @formatter:off + public static final RegisterArray simdRegisters = new RegisterArray( + f0, f1, f2, f3, f4, f5, f6, f7, + f8, f9, f10, f11, f12, f13, f14, f15, + f16, f17, f18, f19, f20, f21, f22, f23, + f24, f25, f26, f27, f28, f29, f30, f31 + ); + // @formatter:on + + // @formatter:off + public static final RegisterArray allRegisters = new RegisterArray( + zero, ra, tp, sp, a0, a1, a2, a3, + a4, a5, a6, a7, t0, t1, t2, t3, + t4, t5, t6, t7, t8, rx, fp, s0, + s1, s2, s3, s4, s5, s6, s7, s8, + + f0, f1, f2, f3, f4, f5, f6, f7, + f8, f9, f10, f11, f12, f13, f14, f15, + f16, f17, f18, f19, f20, f21, f22, f23, + f24, f25, f26, f27, f28, f29, f30, f31 + ); + // @formatter:on + + /** + * Basic set of CPU features mirroring what is returned from the cpuid instruction. See: + * {@code VM_Version::cpuFeatureFlags}. + */ + public enum CPUFeature { + LA32, + LA64, + LLEXC, + SCDLY, + LLDBAR, + LBT_X86, + LBT_ARM, + LBT_MIPS, + CCDMA, + COMPLEX, + FP, + CRYPTO, + LSX, + LASX, + LAM, + LLSYNC, + TGTSYNC, + ULSYNC, + UAL + } + + private final EnumSet features; + + /** + * Set of flags to control code emission. + */ + public enum Flag { + useLSX, + useLASX + } + + private final EnumSet flags; + + public LoongArch64(EnumSet features, EnumSet flags) { + super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0); + this.features = features; + this.flags = flags; + } + + public EnumSet getFeatures() { + return features; + } + + public EnumSet getFlags() { + return flags; + } + + @Override + public PlatformKind getPlatformKind(JavaKind javaKind) { + switch (javaKind) { + case Boolean: + case Byte: + return LoongArch64Kind.BYTE; + case Short: + case Char: + return LoongArch64Kind.WORD; + case Int: + return LoongArch64Kind.DWORD; + case Long: + case Object: + return LoongArch64Kind.QWORD; + case Float: + return LoongArch64Kind.SINGLE; + case Double: + return LoongArch64Kind.DOUBLE; + default: + return null; + } + } + + @Override + public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) { + LoongArch64Kind kind = (LoongArch64Kind) platformKind; + if (kind.isInteger()) { + return category.equals(CPU); + } else if (kind.isSIMD()) { + return category.equals(SIMD); + } + return false; + } + + @Override + public LoongArch64Kind getLargestStorableKind(RegisterCategory category) { + if (category.equals(CPU)) { + return LoongArch64Kind.QWORD; + } else if (category.equals(SIMD)) { + return LoongArch64Kind.V256_QWORD; + } else { + return null; + } + } +} diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java new file mode 100644 index 00000000000..84b7f2027f1 --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.vm.ci.loongarch64; + +import jdk.vm.ci.meta.PlatformKind; + +public enum LoongArch64Kind implements PlatformKind { + + // scalar + BYTE(1), + WORD(2), + DWORD(4), + QWORD(8), + UBYTE(1), + UWORD(2), + UDWORD(4), + SINGLE(4), + DOUBLE(8), + + // SIMD + V128_BYTE(16, BYTE), + V128_WORD(16, WORD), + V128_DWORD(16, DWORD), + V128_QWORD(16, QWORD), + V128_SINGLE(16, SINGLE), + V128_DOUBLE(16, DOUBLE), + V256_BYTE(32, BYTE), + V256_WORD(32, WORD), + V256_DWORD(32, DWORD), + V256_QWORD(32, QWORD), + V256_SINGLE(32, SINGLE), + V256_DOUBLE(32, DOUBLE); + + private final int size; + private final int vectorLength; + + private final LoongArch64Kind scalar; + private final EnumKey key = new EnumKey<>(this); + + LoongArch64Kind(int size) { + this.size = size; + this.scalar = this; + this.vectorLength = 1; + } + + LoongArch64Kind(int size, LoongArch64Kind scalar) { + this.size = size; + this.scalar = scalar; + + assert size % scalar.size == 0; + this.vectorLength = size / scalar.size; + } + + public LoongArch64Kind getScalar() { + return scalar; + } + + @Override + public int getSizeInBytes() { + return size; + } + + @Override + public int getVectorLength() { + return vectorLength; + } + + @Override + public Key getKey() { + return key; + } + + public boolean isInteger() { + switch (this) { + case BYTE: + case WORD: + case DWORD: + case QWORD: + case UBYTE: + case UWORD: + case UDWORD: + return true; + default: + return false; + } + } + + public boolean isSIMD() { + switch (this) { + case SINGLE: + case DOUBLE: + case V128_BYTE: + case V128_WORD: + case V128_DWORD: + case V128_QWORD: + case V128_SINGLE: + case V128_DOUBLE: + case V256_BYTE: + case V256_WORD: + case V256_DWORD: + case V256_QWORD: + case V256_SINGLE: + case V256_DOUBLE: + return true; + default: + return false; + } + } + + @Override + public char getTypeChar() { + switch (this) { + case BYTE: + return 'b'; + case WORD: + return 'w'; + case DWORD: + return 'd'; + case QWORD: + return 'q'; + case SINGLE: + return 'S'; + case DOUBLE: + return 'D'; + case V128_BYTE: + case V128_WORD: + case V128_DWORD: + case V128_QWORD: + case V128_SINGLE: + case V128_DOUBLE: + case V256_BYTE: + case V256_WORD: + case V256_DWORD: + case V256_QWORD: + case V256_SINGLE: + case V256_DOUBLE: + return 'v'; + default: + return '-'; + } + } +} diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java new file mode 100644 index 00000000000..9d020833eaf --- /dev/null +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * The LoongArch64 platform independent portions of the JVMCI API. + */ +package jdk.vm.ci.loongarch64; diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java index fed310d386e..661f106d307 100644 --- a/src/jdk.internal.vm.ci/share/classes/module-info.java +++ b/src/jdk.internal.vm.ci/share/classes/module-info.java @@ -23,6 +23,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + module jdk.internal.vm.ci { exports jdk.vm.ci.services to jdk.internal.vm.compiler; exports jdk.vm.ci.runtime to @@ -37,6 +43,7 @@ provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory, jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory, jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory; } diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile index 2514a895da6..08fbe3b953f 100644 --- a/src/utils/hsdis/Makefile +++ b/src/utils/hsdis/Makefile @@ -94,6 +94,9 @@ CC = gcc endif CFLAGS += -O DLDFLAGS += -shared +ifeq ($(ARCH), mips64) +DLDFLAGS += -Wl,-z,noexecstack +endif LDFLAGS += -ldl OUTFLAGS += -o $@ else diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java index ac17e567b09..9b004a20336 100644 --- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java @@ -21,12 +21,18 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* * @test * @library /test/lib / * @modules java.base/jdk.internal.misc * java.management - * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled + * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled * @build sun.hotspot.WhiteBox * @run driver ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java index 60b2d033219..981a2399799 100644 --- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* * @test * @library /test/lib / @@ -28,7 +34,7 @@ * java.management * * @build sun.hotspot.WhiteBox - * @requires !(vm.cpu.features ~= ".*aes.*") + * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") * @requires vm.compiler1.enabled | !vm.graal.enabled * @run driver ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java index faa9fdbae67..a635f03d24b 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package compiler.intrinsics.sha.cli.testcases; import compiler.intrinsics.sha.cli.SHAOptionsBase; @@ -32,19 +38,20 @@ /** * Generic test case for SHA-related options targeted to any CPU except - * AArch64, PPC, S390x, SPARC and X86. + * AArch64, PPC, S390x, SPARC, LoongArch64 and X86. */ public class GenericTestCaseForOtherCPU extends SHAOptionsBase.TestCase { public GenericTestCaseForOtherCPU(String optionName) { - // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. + // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC, LoongArch64 and X86. super(optionName, new NotPredicate( new OrPredicate(Platform::isAArch64, new OrPredicate(Platform::isS390x, new OrPredicate(Platform::isSparc, new OrPredicate(Platform::isPPC, + new OrPredicate(Platform::isLoongArch64, new OrPredicate(Platform::isX64, - Platform::isX86))))))); + Platform::isX86)))))))); } @Override diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java index 62d0e99155b..c3fa3fb93ef 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java @@ -29,6 +29,7 @@ import jdk.vm.ci.code.TargetDescription; import jdk.vm.ci.code.test.amd64.AMD64TestAssembler; import jdk.vm.ci.code.test.sparc.SPARCTestAssembler; +import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler; import jdk.vm.ci.hotspot.HotSpotCompiledCode; import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; @@ -37,6 +38,7 @@ import jdk.vm.ci.runtime.JVMCI; import jdk.vm.ci.runtime.JVMCIBackend; import jdk.vm.ci.sparc.SPARC; +import jdk.vm.ci.loongarch64.LoongArch64; import org.junit.Assert; import java.lang.reflect.Method; @@ -72,6 +74,8 @@ private TestAssembler createAssembler() { return new AMD64TestAssembler(codeCache, config); } else if (arch instanceof SPARC) { return new SPARCTestAssembler(codeCache, config); + } else if (arch instanceof LoongArch64) { + return new LoongArch64TestAssembler(codeCache, config); } else { Assert.fail("unsupported architecture"); return null; diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java index 8afc7d7b98e..520d7707a2f 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.meta @@ -32,7 +32,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.runtime * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java index 75d0748da52..a6826e2ffe5 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.code * jdk.internal.vm.ci/jdk.vm.ci.code.site @@ -32,7 +32,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.common * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java index a67fa2c1dfe..59cce6454da 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.meta @@ -33,7 +33,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.runtime * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java index d9e1f24c303..259218b305a 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library /test/lib / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.code @@ -33,7 +33,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.common * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest */ package jdk.vm.ci.code.test; diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java index 9b921140553..00d0f53cdb9 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.meta @@ -32,7 +32,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.runtime * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java index 5b2204868c4..ecfcb1cf019 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.meta @@ -32,7 +32,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.runtime * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java index a10e90acdaf..5b1a58c74bb 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java @@ -23,7 +23,7 @@ /** * @test - * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") + * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") * @library / * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot * jdk.internal.vm.ci/jdk.vm.ci.meta @@ -32,7 +32,8 @@ * jdk.internal.vm.ci/jdk.vm.ci.runtime * jdk.internal.vm.ci/jdk.vm.ci.amd64 * jdk.internal.vm.ci/jdk.vm.ci.sparc - * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java + * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest */ diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java new file mode 100644 index 00000000000..4c76868453a --- /dev/null +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.vm.ci.code.test.loongarch64; + +import jdk.vm.ci.loongarch64.LoongArch64; +import jdk.vm.ci.loongarch64.LoongArch64Kind; +import jdk.vm.ci.code.CallingConvention; +import jdk.vm.ci.code.CodeCacheProvider; +import jdk.vm.ci.code.DebugInfo; +import jdk.vm.ci.code.Register; +import jdk.vm.ci.code.RegisterArray; +import jdk.vm.ci.code.RegisterValue; +import jdk.vm.ci.code.StackSlot; +import jdk.vm.ci.code.site.ConstantReference; +import jdk.vm.ci.code.site.DataSectionReference; +import jdk.vm.ci.code.test.TestAssembler; +import jdk.vm.ci.code.test.TestHotSpotVMConfig; +import jdk.vm.ci.hotspot.HotSpotCallingConventionType; +import jdk.vm.ci.hotspot.HotSpotConstant; +import jdk.vm.ci.hotspot.HotSpotForeignCallTarget; +import jdk.vm.ci.meta.AllocatableValue; +import jdk.vm.ci.meta.JavaKind; +import jdk.vm.ci.meta.VMConstant; + +public class LoongArch64TestAssembler extends TestAssembler { + + private static final Register scratchRegister = LoongArch64.SCR1; + private static final Register doubleScratch = LoongArch64.f23; + private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, + LoongArch64.a1, LoongArch64.a2, + LoongArch64.a3, LoongArch64.a4, + LoongArch64.a5, LoongArch64.a6, + LoongArch64.a7); + private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0, + LoongArch64.f1, LoongArch64.f2, + LoongArch64.f3, LoongArch64.f4, + LoongArch64.f5, LoongArch64.f6, + LoongArch64.f7); + private static int currentGeneral = 0; + private static int currentFloat = 0; + public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { + super(codeCache, config, + 16 /* initialFrameSize */, 16 /* stackAlignment */, + LoongArch64Kind.UDWORD /* narrowOopKind */, + /* registers */ + LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3, + LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7); + } + + private static int low(int x, int l) { + assert l < 32; + return (x >> 0) & ((1 << l)-1); + } + + private static int low16(int x) { + return low(x, 16); + } + + private void emitNop() { + code.emitInt(0x3400000); + } + + private void emitPcaddu12i(Register rj, int si20) { + // pcaddu12i + code.emitInt((0b0001110 << 25) + | (low(si20, 20) << 5) + | rj.encoding); + } + + private void emitAdd(Register rd, Register rj, Register rk) { + // add_d + code.emitInt((0b00000000000100001 << 15) + | (rk.encoding << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitAdd(Register rd, Register rj, int si12) { + // addi_d + code.emitInt((0b0000001011 << 22) + | (low(si12, 12) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitSub(Register rd, Register rj, Register rk) { + // sub_d + code.emitInt((0b00000000000100011 << 15) + | (rk.encoding << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitShiftLeft(Register rd, Register rj, int shift) { + // slli_d + code.emitInt((0b00000000010000 << 18) + | (low(( (0b01 << 6) | shift ), 8) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitLu12i_w(Register rj, int imm20) { + // lu12i_w + code.emitInt((0b0001010 << 25) + | (low(imm20, 20)<<5) + | rj.encoding); + } + + private void emitOri(Register rd, Register rj, int ui12) { + // ori + code.emitInt((0b0000001110 << 22) + | (low(ui12, 12) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitLu32i_d(Register rj, int imm20) { + // lu32i_d + code.emitInt((0b0001011 << 25) + | (low(imm20, 20)<<5) + | rj.encoding); + } + + private void emitLu52i_d(Register rd, Register rj, int imm12) { + // lu52i_d + code.emitInt((0b0000001100 << 22) + | (low(imm12, 12) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitLoadImmediate(Register rd, int imm32) { + emitLu12i_w(rd, (imm32 >> 12) & 0xfffff); + emitOri(rd, rd, imm32 & 0xfff); + } + + private void emitLi52(Register rj, long imm) { + emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); + emitOri(rj, rj, (int) (imm & 0xfff)); + emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); + } + + private void emitLi64(Register rj, long imm) { + emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); + emitOri(rj, rj, (int) (imm & 0xfff)); + emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); + emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff)); + } + + private void emitOr(Register rd, Register rj, Register rk) { + // orr + code.emitInt((0b00000000000101010 << 15) + | (rk.encoding << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitMove(Register rd, Register rs) { + // move + emitOr(rd, rs, LoongArch64.zero); + } + + private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) { + // movfr2gr_s/movfr2gr_d + int opc = 0; + switch (kind) { + case SINGLE: opc = 0b0000000100010100101101; break; + case DOUBLE: opc = 0b0000000100010100101110; break; + default: throw new IllegalArgumentException(); + } + code.emitInt((opc << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { + // load + assert offset >= 0; + int opc = 0; + switch (kind) { + case BYTE: opc = 0b0010100000; break; + case WORD: opc = 0b0010100001; break; + case DWORD: opc = 0b0010100010; break; + case QWORD: opc = 0b0010100011; break; + case UDWORD: opc = 0b0010101010; break; + case SINGLE: opc = 0b0010101100; break; + case DOUBLE: opc = 0b0010101110; break; + default: throw new IllegalArgumentException(); + } + code.emitInt((opc << 22) + | (low(offset, 12) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { + // store + assert offset >= 0; + int opc = 0; + switch (kind) { + case BYTE: opc = 0b0010100100; break; + case WORD: opc = 0b0010100101; break; + case DWORD: opc = 0b0010100110; break; + case QWORD: opc = 0b0010100111; break; + case SINGLE: opc = 0b0010101101; break; + case DOUBLE: opc = 0b0010101111; break; + default: throw new IllegalArgumentException(); + } + code.emitInt((opc << 22) + | (low(offset, 12) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + private void emitJirl(Register rd, Register rj, int offs) { + // jirl + code.emitInt((0b010011 << 26) + | (low16(offs >> 2) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + + @Override + public void emitGrowStack(int size) { + assert size % 16 == 0; + if (size > -4096 && size < 0) { + emitAdd(LoongArch64.sp, LoongArch64.sp, -size); + } else if (size == 0) { + // No-op + } else if (size < 4096) { + emitAdd(LoongArch64.sp, LoongArch64.sp, -size); + } else if (size < 65535) { + emitLoadImmediate(scratchRegister, size); + emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister); + } else { + throw new IllegalArgumentException(); + } + } + + @Override + public void emitPrologue() { + // Must be patchable by NativeJump::patch_verified_entry + emitNop(); + emitGrowStack(32); + emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24); + emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); + emitGrowStack(-16); + emitMove(LoongArch64.fp, LoongArch64.sp); + setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); + } + + @Override + public void emitEpilogue() { + recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); + recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null); + emitCall(0xdeaddeaddeadL); + } + + @Override + public void emitCallPrologue(CallingConvention cc, Object... prim) { + emitGrowStack(cc.getStackSize()); + frameSize += cc.getStackSize(); + AllocatableValue[] args = cc.getArguments(); + for (int i = 0; i < args.length; i++) { + emitLoad(args[i], prim[i]); + } + currentGeneral = 0; + currentFloat = 0; + } + + @Override + public void emitCallEpilogue(CallingConvention cc) { + emitGrowStack(-cc.getStackSize()); + frameSize -= cc.getStackSize(); + } + + @Override + public void emitCall(long addr) { + // long call (absolute) + // lu12i_w(T4, split_low20(value >> 12)); + // lu32i_d(T4, split_low20(value >> 32)); + // jirl(RA, T4, split_low12(value)); + emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff)); + emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff)); + emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff)); + } + + @Override + public void emitLoad(AllocatableValue av, Object prim) { + if (av instanceof RegisterValue) { + Register reg = ((RegisterValue) av).getRegister(); + if (prim instanceof Float) { + if (currentFloat < floatParameterRegisters.size()) { + currentFloat++; + emitLoadFloat(reg, (Float) prim); + } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { + currentGeneral++; + emitLoadFloat(doubleScratch, (Float) prim); + emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch); + } + } else if (prim instanceof Double) { + if (currentFloat < floatParameterRegisters.size()) { + currentFloat++; + emitLoadDouble(reg, (Double) prim); + } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { + currentGeneral++; + emitLoadDouble(doubleScratch, (Double) prim); + emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch); + } + } else if (prim instanceof Integer) { + emitLoadInt(reg, (Integer) prim); + } else if (prim instanceof Long) { + emitLoadLong(reg, (Long) prim); + } + } else if (av instanceof StackSlot) { + StackSlot slot = (StackSlot) av; + if (prim instanceof Float) { + emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim)); + } else if (prim instanceof Double) { + emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim)); + } else if (prim instanceof Integer) { + emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim)); + } else if (prim instanceof Long) { + emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim)); + } else { + assert false : "Unimplemented"; + } + } else { + throw new IllegalArgumentException("Unknown value " + av); + } + } + + @Override + public Register emitLoadPointer(HotSpotConstant c) { + recordDataPatchInCode(new ConstantReference((VMConstant) c)); + + Register ret = newRegister(); + // need to match patchable_li52 instruction sequence + // lu12i_ori_lu32i + emitLi52(ret, 0xdeaddead); + return ret; + } + + @Override + public Register emitLoadPointer(Register b, int offset) { + Register ret = newRegister(); + emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); + return ret; + } + + @Override + public Register emitLoadNarrowPointer(DataSectionReference ref) { + recordDataPatchInCode(ref); + + Register ret = newRegister(); + emitPcaddu12i(ret, 0xdead >> 12); + emitAdd(ret, ret, 0xdead & 0xfff); + emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); + return ret; + } + + @Override + public Register emitLoadPointer(DataSectionReference ref) { + recordDataPatchInCode(ref); + + Register ret = newRegister(); + emitPcaddu12i(ret, 0xdead >> 12); + emitAdd(ret, ret, 0xdead & 0xfff); + emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); + return ret; + } + + private Register emitLoadDouble(Register reg, double c) { + DataSectionReference ref = new DataSectionReference(); + ref.setOffset(data.position()); + data.emitDouble(c); + + recordDataPatchInCode(ref); + emitPcaddu12i(scratchRegister, 0xdead >> 12); + emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); + emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); + return reg; + } + + private Register emitLoadFloat(Register reg, float c) { + DataSectionReference ref = new DataSectionReference(); + ref.setOffset(data.position()); + data.emitFloat(c); + + recordDataPatchInCode(ref); + emitPcaddu12i(scratchRegister, 0xdead >> 12); + emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); + emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); + return reg; + } + + @Override + public Register emitLoadFloat(float c) { + Register ret = LoongArch64.fv0; + return emitLoadFloat(ret, c); + } + + private Register emitLoadLong(Register reg, long c) { + emitLi64(reg, c); + return reg; + } + + @Override + public Register emitLoadLong(long c) { + Register ret = newRegister(); + return emitLoadLong(ret, c); + } + + private Register emitLoadInt(Register reg, int c) { + emitLoadImmediate(reg, c); + return reg; + } + + @Override + public Register emitLoadInt(int c) { + Register ret = newRegister(); + return emitLoadInt(ret, c); + } + + @Override + public Register emitIntArg0() { + return codeCache.getRegisterConfig() + .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) + .get(0); + } + + @Override + public Register emitIntArg1() { + return codeCache.getRegisterConfig() + .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) + .get(1); + } + + @Override + public Register emitIntAdd(Register a, Register b) { + emitAdd(a, a, b); + return a; + } + + @Override + public void emitTrap(DebugInfo info) { + // Dereference null pointer + emitMove(scratchRegister, LoongArch64.zero); + recordImplicitException(info); + emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0); + } + + @Override + public void emitIntRet(Register a) { + emitMove(LoongArch64.v0, a); + emitMove(LoongArch64.sp, LoongArch64.fp); + emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); + emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); + emitGrowStack(-16); + emitJirl(LoongArch64.zero, LoongArch64.ra, 0); + } + + @Override + public void emitFloatRet(Register a) { + assert a == LoongArch64.fv0 : "Unimplemented move " + a; + emitMove(LoongArch64.sp, LoongArch64.fp); + emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); + emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); + emitGrowStack(-16); + emitJirl(LoongArch64.zero, LoongArch64.ra, 0); + } + + @Override + public void emitPointerRet(Register a) { + emitIntRet(a); + } + + @Override + public StackSlot emitPointerToStack(Register a) { + return emitLongToStack(a); + } + + @Override + public StackSlot emitNarrowPointerToStack(Register a) { + return emitIntToStack(a); + } + + @Override + public Register emitUncompressPointer(Register compressed, long base, int shift) { + if (shift > 0) { + emitShiftLeft(compressed, compressed, shift); + } + + if (base != 0) { + emitLoadLong(scratchRegister, base); + emitAdd(compressed, compressed, scratchRegister); + } + + return compressed; + } + + private StackSlot emitDoubleToStack(StackSlot slot, Register a) { + emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize)); + return slot; + } + + @Override + public StackSlot emitDoubleToStack(Register a) { + StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE); + return emitDoubleToStack(ret, a); + } + + private StackSlot emitFloatToStack(StackSlot slot, Register a) { + emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize)); + return slot; + } + + @Override + public StackSlot emitFloatToStack(Register a) { + StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE); + return emitFloatToStack(ret, a); + } + + private StackSlot emitIntToStack(StackSlot slot, Register a) { + emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize)); + return slot; + } + + @Override + public StackSlot emitIntToStack(Register a) { + StackSlot ret = newStackSlot(LoongArch64Kind.DWORD); + return emitIntToStack(ret, a); + } + + private StackSlot emitLongToStack(StackSlot slot, Register a) { + emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize)); + return slot; + } + + @Override + public StackSlot emitLongToStack(Register a) { + StackSlot ret = newStackSlot(LoongArch64Kind.QWORD); + return emitLongToStack(ret, a); + } + +} diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java index acb86812d25..664ea11d0dd 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java @@ -21,10 +21,17 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* @test * @bug 8167409 * @requires (os.arch != "aarch64") & (os.arch != "arm") + * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs */ package compiler.runtime.criticalnatives.argumentcorruption; diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java index eab36f93113..ee5ab2f6dd7 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java @@ -21,10 +21,17 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* @test * @bug 8167408 * @requires (os.arch != "aarch64") & (os.arch != "arm") + * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp */ package compiler.runtime.criticalnatives.lookup; diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java index 7774dabcb5f..c1cb6e00f36 100644 --- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package compiler.testlibrary.sha.predicate; import jdk.test.lib.Platform; @@ -63,10 +69,12 @@ public class IntrinsicPredicates { = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), + // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. + new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), // x86 variants new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); + new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), @@ -74,12 +82,14 @@ public class IntrinsicPredicates { new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), + // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. + new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), // x86 variants new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), - new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); + new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java index 127bb6abcd9..c9277604ae9 100644 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* * @test ReservedStackTest * @@ -239,7 +245,7 @@ private static boolean isAlwaysSupportedPlatform() { return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || - Platform.isX86())) || + Platform.isX86() || Platform.isMIPS() || Platform.isLoongArch64())) || Platform.isOSX() || Platform.isSolaris(); } diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java index 77458554b76..05aee6b84cf 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), + ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isMIPS", "isLoongArch64"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java index 7990c49a1f6..025048c6b01 100644 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java @@ -23,6 +23,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2021, These + * modifications are Copyright (c) 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package jdk.jfr.event.os; import java.util.List; @@ -54,8 +60,8 @@ public static void main(String[] args) throws Throwable { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); + Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); + Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); } } } diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java index b14daf6c6d7..da33514c754 100644 --- a/test/jdk/sun/security/pkcs11/PKCS11Test.java +++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + // common infrastructure for SunPKCS11 tests import java.io.BufferedReader; @@ -747,6 +753,9 @@ private static Map getOsMap() { "/usr/lib64/" }); osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); + osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); + osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", + "/usr/lib64/" }); osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); osMap.put("Windows-x86-32", new String[] {}); osMap.put("Windows-amd64-64", new String[] {}); diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java index 5b3f1889cb7..aaf8867a7cd 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -21,6 +21,12 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package jdk.test.lib; import java.io.FileNotFoundException; @@ -226,6 +232,14 @@ public static boolean isX86() { return isArch("(i386)|(x86(?!_64))"); } + public static boolean isLoongArch64() { + return isArch("loongarch64"); + } + + public static boolean isMIPS() { + return isArch("mips.*"); + } + public static String getOsArch() { return osArch; } diff --git a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java new file mode 100644 index 00000000000..81fd956a4e6 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java @@ -0,0 +1,87 @@ +// +// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// +package org.openjdk.bench.java.lang; + +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@BenchmarkMode(Mode.Throughput) +public class RotateBenchmark { + + @Param({"1024"}) + public int TESTSIZE; + + @Param({"20"}) + public int SHIFT; + + public long [] larr; + public int [] iarr; + + public long [] lres; + public int [] ires; + + + @Setup(Level.Trial) + public void BmSetup() { + Random r = new Random(1024); + larr = new long[TESTSIZE]; + iarr = new int[TESTSIZE]; + lres = new long[TESTSIZE]; + ires = new int[TESTSIZE]; + + for (int i = 0; i < TESTSIZE; i++) { + larr[i] = r.nextLong(); + } + + for (int i = 0; i < TESTSIZE; i++) { + iarr[i] = r.nextInt(); + } + } + + @Benchmark + public void testRotateLeftI() { + for (int i = 0; i < TESTSIZE; i++) + ires[i] = Integer.rotateLeft(iarr[i], SHIFT); + } + @Benchmark + public void testRotateRightI() { + for (int i = 0; i < TESTSIZE; i++) + ires[i] = Integer.rotateRight(iarr[i], SHIFT); + } + @Benchmark + public void testRotateLeftL() { + for (int i = 0; i < TESTSIZE; i++) + lres[i] = Long.rotateLeft(larr[i], SHIFT); + } + @Benchmark + public void testRotateRightL() { + for (int i = 0; i < TESTSIZE; i++) + lres[i] = Long.rotateRight(larr[i], SHIFT); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java new file mode 100644 index 00000000000..58400cadf68 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Thread) +public class MacroLogicOpt { + @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN; + + private int [] ai = new int[VECLEN]; + private int [] bi = new int[VECLEN]; + private int [] ci = new int[VECLEN]; + private int [] ri = new int[VECLEN]; + + private long [] al = new long[VECLEN]; + private long [] bl = new long[VECLEN]; + private long [] cl = new long[VECLEN]; + private long [] dl = new long[VECLEN]; + private long [] el = new long[VECLEN]; + private long [] fl = new long[VECLEN]; + private long [] rl = new long[VECLEN]; + + private Random r = new Random(); + + @Setup + public void init() { + ai = new int[VECLEN]; + bi = new int[VECLEN]; + ci = new int[VECLEN]; + ri = new int[VECLEN]; + + al = new long[VECLEN]; + bl = new long[VECLEN]; + cl = new long[VECLEN]; + dl = new long[VECLEN]; + el = new long[VECLEN]; + fl = new long[VECLEN]; + rl = new long[VECLEN]; + for (int i=0; i