diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index d75b3edb011..a29b1a48668 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -3657,36 +3657,40 @@ encode %{ __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor); } - // Set tmp to be (markOop of object | UNLOCK_VALUE). - __ orr(tmp, disp_hdr, markOopDesc::unlocked_value); - - // Initialize the box. (Must happen before we update the object mark!) - __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - - // Compare object markOop with an unlocked value (tmp) and if - // equal exchange the stack address of our box with object markOop. - // On failure disp_hdr contains the possibly locked markOop. - __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true, - /*release*/ true, /*weak*/ false, disp_hdr); - __ br(Assembler::EQ, cont); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - // If the compare-and-exchange succeeded, then we found an unlocked - // object, will have now locked it will continue at label cont + if (UseAltFastLocking) { + __ fast_lock(oop, disp_hdr, tmp, rscratch1, cont); + } else { + // Set tmp to be (markOop of object | UNLOCK_VALUE). + __ orr(tmp, disp_hdr, markOopDesc::unlocked_value); - __ bind(cas_failed); - // We did not see an unlocked object so try the fast recursive case. + // Initialize the box. (Must happen before we update the object mark!) + __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - // Check if the owner is self by comparing the value in the - // markOop of object (disp_hdr) with the stack pointer. - __ mov(rscratch1, sp); - __ sub(disp_hdr, disp_hdr, rscratch1); - __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); - // If condition is true we are cont and hence we can store 0 as the - // displaced header in the box, which indicates that it is a recursive lock. - __ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result - __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + // Compare object markOop with an unlocked value (tmp) and if + // equal exchange the stack address of our box with object markOop. + // On failure disp_hdr contains the possibly locked markOop. + __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true, + /*release*/ true, /*weak*/ false, disp_hdr); + __ br(Assembler::EQ, cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // If the compare-and-exchange succeeded, then we found an unlocked + // object, will have now locked it will continue at label cont + + __ bind(cas_failed); + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the + // markOop of object (disp_hdr) with the stack pointer. + __ mov(rscratch1, sp); + __ sub(disp_hdr, disp_hdr, rscratch1); + __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); + // If condition is true we are cont and hence we can store 0 as the + // displaced header in the box, which indicates that it is a recursive lock. + __ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result + __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + } if ((EmitSync & 0x02) == 0) { __ b(cont); @@ -3708,12 +3712,14 @@ encode %{ __ ldr (rthread, Address(rthread, WispThread::thread_offset())); } - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::slow_enter. - __ mov(tmp, (address)markOopDesc::unused_mark()); - __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + if (!UseAltFastLocking) { + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + __ mov(tmp, (address)markOopDesc::unused_mark()); + __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + } } __ bind(cont); @@ -3742,26 +3748,33 @@ encode %{ __ biased_locking_exit(oop, tmp, cont); } - // Find the lock address and load the displaced header from the stack. - __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + if (!UseAltFastLocking) { + // Find the lock address and load the displaced header from the stack. + __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); - // If the displaced header is 0, we have a recursive unlock. - __ cmp(disp_hdr, zr); - __ br(Assembler::EQ, cont); + // If the displaced header is 0, we have a recursive unlock. + __ cmp(disp_hdr, zr); + __ br(Assembler::EQ, cont); + } // Handle existing monitor. if ((EmitSync & 0x02) == 0) { __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); - __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor); + __ tbnz(tmp, exact_log2(markOopDesc::monitor_value), object_has_monitor); } - // Check if it is still a light weight lock, this is is true if we - // see the stack address of the basicLock in the markOop of the - // object. + if (UseAltFastLocking) { + __ fast_unlock(oop, tmp, box, disp_hdr, cont); + __ b(cont); + } else { + // Check if it is still a light weight lock, this is is true if we + // see the stack address of the basicLock in the markOop of the + // object. - __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false, - /*release*/ true, /*weak*/ false, tmp); - __ b(cont); + __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false, + /*release*/ true, /*weak*/ false, tmp); + __ b(cont); + } assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -3769,6 +3782,20 @@ encode %{ if ((EmitSync & 0x02) == 0) { __ bind(object_has_monitor); __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor + + if (UseAltFastLocking) { + // If the owner is anonymous, we need to fix it -- in an outline stub. + Register tmp2 = disp_hdr; + __ ldr(tmp2, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + // We cannot use tbnz here, the target might be too far away and cannot + // be encoded. + __ tst(tmp2, (uint64_t)ObjectMonitor::ANONYMOUS_OWNER); + C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); + Compile::current()->add_stub(stub); + __ br(Assembler::NE, stub->entry()); + __ bind(stub->continuation()); + } + __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); if (UseWispMonitor) { diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 218113b7811..0d4eada3909 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -81,39 +81,43 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr // Load object header ldr(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked - orr(hdr, hdr, markOopDesc::unlocked_value); - // save unlocked object header into the displaced header location on the stack - str(hdr, Address(disp_hdr, 0)); - // test if object header is still the same (i.e. unlocked), and if so, store the - // displaced header address in the object header - if it is not the same, get the - // object header instead - lea(rscratch2, Address(obj, hdr_offset)); - cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL); - // if the object header was the same, we're done - // if the object header was not the same, it is now in the hdr register - // => test if it is a stack pointer into the same stack (recursive locking), i.e.: - // - // 1) (hdr & aligned_mask) == 0 - // 2) sp <= hdr - // 3) hdr <= sp + page_size - // - // these 3 tests can be done by evaluating the following expression: - // - // (hdr - sp) & (aligned_mask - page_size) - // - // assuming both the stack pointer and page_size have their least - // significant 2 bits cleared and page_size is a power of 2 - mov(rscratch1, sp); - sub(hdr, hdr, rscratch1); - ands(hdr, hdr, aligned_mask - os::vm_page_size()); - // for recursive locking, the result is zero => save it in the displaced header - // location (NULL in the displaced hdr location indicates recursive locking) - str(hdr, Address(disp_hdr, 0)); - // otherwise we don't care about the result and handle locking via runtime call - cbnz(hdr, slow_case); - // done - bind(done); + if (UseAltFastLocking) { + fast_lock(obj, hdr, rscratch1, rscratch2, slow_case); + } else { + // and mark it as unlocked + orr(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + str(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(rscratch2, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + mov(rscratch1, sp); + sub(hdr, hdr, rscratch1); + ands(hdr, hdr, aligned_mask - os::vm_page_size()); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + str(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + cbnz(hdr, slow_case); + // done + bind(done); + } if (PrintBiasedLockingStatistics) { lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); addmw(Address(rscratch2, 0), 1, rscratch1); @@ -134,29 +138,41 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ biased_locking_exit(obj, hdr, done); } - // load displaced header - ldr(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is NULL we had recursive locking - // if we had recursive locking, we are done - cbz(hdr, done); + if (!UseAltFastLocking) { + // load displaced header + ldr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + cbz(hdr, done); + } + if (!UseBiasedLocking) { // load object ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); } verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to - // the displaced header, get the object header instead - // if the object header was not pointing to the displaced header, - // we do unlocking via runtime call - if (hdr_offset) { - lea(rscratch1, Address(obj, hdr_offset)); - cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + if (UseAltFastLocking) { + ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + // We cannot use tbnz here, the target might be too far away and cannot + // be encoded. + tst(hdr, markOopDesc::monitor_value); + br(Assembler::NE, slow_case); + fast_unlock(obj, hdr, rscratch1, rscratch2, slow_case); } else { - cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(rscratch1, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + } + // done + bind(done); } - // done - bind(done); } diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp new file mode 100644 index 00000000000..a033bfcac84 --- /dev/null +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/c2_CodeStubs.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +#define __ masm. + +int C2HandleAnonOMOwnerStub::max_size() const { + // Max size of stub has been determined by testing with 0, in which case + // C2CodeStubList::emit() will throw an assertion and report the actual size that + // is needed. + return 24; +} + +void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register mon = monitor(); + Register t = tmp(); + assert(t != noreg, "need tmp register"); + + // Fix owner to be the current thread. + __ str(rthread, Address(mon, ObjectMonitor::owner_offset_in_bytes())); + + // Pop owner object from lock-stack. + __ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset())); + __ subw(t, t, oopSize); +#ifdef ASSERT + __ str(zr, Address(rthread, t)); +#endif + __ strw(t, Address(rthread, JavaThread::lock_stack_top_offset())); + + __ b(continuation()); +} + +#undef __ diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 491873b745e..c4f2bba56dc 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -732,64 +732,76 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) // Load object pointer into obj_reg %c_rarg3 ldr(obj_reg, Address(lock_reg, obj_offset)); - if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); - } - - // Load (object->mark() | 1) into swap_reg - ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - orr(swap_reg, rscratch1, 1); - - // Save (object->mark() | 1) into BasicLock's displaced header - str(swap_reg, Address(lock_reg, mark_offset)); - - assert(lock_offset == 0, - "displached header must be first word in BasicObjectLock"); - - Label fail; - if (PrintBiasedLockingStatistics) { - Label fast; - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); - bind(fast); - atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1, tmp); + if (UseAltFastLocking) { + ldr(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + fast_lock(obj_reg, tmp, rscratch1, rscratch2, slow_case); b(done); - bind(fail); } else { - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); - } + if (UseBiasedLocking) { + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); + } - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 7) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (7 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 3 bits clear. - // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg - // NOTE2: aarch64 does not like to subtract sp from rn so take a - // copy - mov(rscratch1, sp); - sub(swap_reg, swap_reg, rscratch1); - ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size())); - - // Save the test result, for recursive case, the result is zero - str(swap_reg, Address(lock_reg, mark_offset)); - - if (PrintBiasedLockingStatistics) { - br(Assembler::NE, slow_case); - atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1, tmp); + // Load (object->mark() | 1) into swap_reg + ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + orr(swap_reg, rscratch1, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + str(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + + Label fail; + if (PrintBiasedLockingStatistics) { + Label fast; + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); + bind(fast); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1, tmp); + b(done); + bind(fail); + } else { + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg + // NOTE2: aarch64 does not like to subtract sp from rn so take a + // copy + mov(rscratch1, sp); + sub(swap_reg, swap_reg, rscratch1); + ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size())); + + // Save the test result, for recursive case, the result is zero + str(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + br(Assembler::NE, slow_case); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1, tmp); + } + br(Assembler::EQ, done); } - br(Assembler::EQ, done); bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, - CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - lock_reg); + if (UseAltFastLocking) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + obj_reg); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } bind(done); } @@ -824,9 +836,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) save_bcp(); // Save in case of exception - // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into %r0 - lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + if (!UseAltFastLocking) { + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %r0 + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + } // Load oop into obj_reg(%c_rarg3) ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); @@ -834,19 +848,41 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) // Free entry str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - if (UseBiasedLocking) { - biased_locking_exit(obj_reg, header_reg, done); - } + if (UseAltFastLocking) { + Label slow_case; + + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. + Register tmp = rscratch1; + // First check for lock-stack underflow. + ldrw(tmp, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(tmp, (unsigned)LockStack::start_offset()); + br(Assembler::LE, slow_case); + // Then check if the top of the lock-stack matches the unlocked object. + subw(tmp, tmp, oopSize); + ldr(tmp, Address(rthread, tmp)); + cmpoop(tmp, obj_reg); + br(Assembler::NE, slow_case); + ldr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + tbnz(header_reg, exact_log2(markOopDesc::monitor_value), slow_case); + fast_unlock(obj_reg, header_reg, swap_reg, rscratch1, slow_case); + b(done); + bind(slow_case); + } else { + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } - // Load the old header from BasicLock structure - ldr(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); + // Load the old header from BasicLock structure + ldr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); - // Test for recursion - cbz(header_reg, done); + // Test for recursion + cbz(header_reg, done); - // Atomic swap back the old header - cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + } // Call the runtime routine for slow case. str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 4a1ab374cde..5803af77147 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -5981,3 +5981,97 @@ void MacroAssembler::get_thread(Register dst) { pop(saved_regs, sp); } + +// Implements fast-locking. +// Branches to slow upon failure to lock the object, with ZF cleared. +// Falls through upon success with ZF set. +// +// - obj: the object to be locked +// - hdr: the header, already loaded from obj, will be destroyed +// - t1, t2: temporary registers, will be destroyed +void MacroAssembler::fast_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { + assert(UseAltFastLocking, "sanity"); + assert_different_registers(obj, hdr, t1, t2); + + // Check if we would have space on lock-stack for the object. + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(t1, (unsigned)LockStack::end_offset() - 1); + br(Assembler::GT, slow); + + // Load (object->mark() | 1) into hdr + orr(hdr, hdr, markOopDesc::unlocked_value); + // Clear lock-bits, into t2 + eor(t2, hdr, markOopDesc::unlocked_value); + // Try to swing header from unlocked to locked + cmpxchg(/*addr*/ obj, /*expected*/ hdr, /*new*/ t2, Assembler::xword, + /*acquire*/ true, /*release*/ true, /*weak*/ false, t1); + br(Assembler::NE, slow); + + // After successful lock, push object on lock-stack + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + str(obj, Address(rthread, t1)); + addw(t1, t1, oopSize); + strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); +} + +// Implements fast-unlocking. +// Branches to slow upon failure, with ZF cleared. +// Falls through upon success, with ZF set. +// +// - obj: the object to be unlocked +// - hdr: the (pre-loaded) header of the object +// - t1, t2: temporary registers +void MacroAssembler::fast_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { + assert(UseAltFastLocking, "sanity"); + assert_different_registers(obj, hdr, t1, t2); + +#ifdef ASSERT + { + // The following checks rely on the fact that LockStack is only ever modified by + // its owning thread, even if the lock got inflated concurrently; removal of LockStack + // entries after inflation will happen delayed in that case. + + // Check for lock-stack underflow. + Label stack_ok; + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(t1, (unsigned)LockStack::start_offset()); + br(Assembler::GT, stack_ok); + STOP("Lock-stack underflow"); + bind(stack_ok); + } + { + // Check if the top of the lock-stack matches the unlocked object. + Label tos_ok; + subw(t1, t1, oopSize); + ldr(t1, Address(rthread, t1)); + cmpoop(t1, obj); + br(Assembler::EQ, tos_ok); + STOP("Top of lock-stack does not match the unlocked object"); + bind(tos_ok); + } + { + // Check that hdr is fast-locked. + Label hdr_ok; + tst(hdr, markOopDesc::lock_mask_in_place); + br(Assembler::EQ, hdr_ok); + STOP("Header is not fast-locked"); + bind(hdr_ok); + } +#endif + + // Load the new header (unlocked) into t1 + orr(t1, hdr, markOopDesc::unlocked_value); + + // Try to swing header from locked to unlocked + cmpxchg(obj, hdr, t1, Assembler::xword, + /*acquire*/ true, /*release*/ true, /*weak*/ false, t2); + br(Assembler::NE, slow); + + // After successful unlock, pop object from lock-stack + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + subw(t1, t1, oopSize); +#ifdef ASSERT + str(zr, Address(rthread, t1)); +#endif + strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 668230340cd..2c8037678b3 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1040,6 +1040,9 @@ class MacroAssembler: public Assembler { enum operand_size size, bool acquire, bool release, bool weak, Register result); + + void fast_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow); + void fast_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow); private: void compare_eq(Register rn, Register rm, enum operand_size size); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 517fdd37370..26a04cb4753 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1775,40 +1775,45 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ ldr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); - } + if (UseAltFastLocking) { + __ ldr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock(obj_reg, swap_reg, tmp, rscratch1, slow_path_lock); + } else { + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); + } - // Load (object->mark() | 1) into swap_reg %r0 - __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ orr(swap_reg, rscratch1, 1); + // Load (object->mark() | 1) into swap_reg %r0 + __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ orr(swap_reg, rscratch1, 1); - // Save (object->mark() | 1) into BasicLock's displaced header - __ str(swap_reg, Address(lock_reg, mark_word_offset)); + // Save (object->mark() | 1) into BasicLock's displaced header + __ str(swap_reg, Address(lock_reg, mark_word_offset)); - // src -> dest iff dest == r0 else r0 <- dest - { Label here; - __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL); - } + // src -> dest iff dest == r0 else r0 <- dest + { Label here; + __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL); + } - // Hmm should this move to the slow path code area??? + // Hmm should this move to the slow path code area??? - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 3) == 0, and - // 2) sp <= mark < mark + os::pagesize() - // These 3 tests can be done by evaluating the following - // expression: ((mark - sp) & (3 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 2 bits clear. - // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg - __ sub(swap_reg, sp, swap_reg); - __ neg(swap_reg, swap_reg); - __ ands(swap_reg, swap_reg, 3 - os::vm_page_size()); + __ sub(swap_reg, sp, swap_reg); + __ neg(swap_reg, swap_reg); + __ ands(swap_reg, swap_reg, 3 - os::vm_page_size()); - // Save the test result, for recursive case, the result is zero - __ str(swap_reg, Address(lock_reg, mark_word_offset)); - __ br(Assembler::NE, slow_path_lock); + // Save the test result, for recursive case, the result is zero + __ str(swap_reg, Address(lock_reg, mark_word_offset)); + __ br(Assembler::NE, slow_path_lock); + } // Slow path will re-enter here @@ -1933,10 +1938,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ biased_locking_exit(obj_reg, old_hdr, done); } - // Simple recursive lock? + if (!UseAltFastLocking) { + // Simple recursive lock? - __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - __ cbz(rscratch1, done); + __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ cbz(rscratch1, done); + } // Must save r0 if if it is live now because cmpxchg must use it if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { @@ -1944,15 +1951,21 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, } - // get address of the stack lock - __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - // get old displaced header - __ ldr(old_hdr, Address(r0, 0)); - - // Atomic swap old header if oop still contains the stack lock - Label succeed; - __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); - __ bind(succeed); + if (UseAltFastLocking) { + __ ldr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ tbnz(old_hdr, exact_log2(markOopDesc::monitor_value), slow_path_unlock); + __ fast_unlock(obj_reg, old_hdr, swap_reg, rscratch1, slow_path_unlock); + } else { + // get address of the stack lock + __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ ldr(old_hdr, Address(r0, 0)); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); + __ bind(succeed); + } // slow path re-enters here __ bind(unlock_done); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 172930fb6c2..001f9418625 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3496,7 +3496,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { __ jmp(*op->stub()->entry()); } else if (op->code() == lir_lock) { Register scratch = noreg; - if (UseBiasedLocking) { + if (UseBiasedLocking || UseAltFastLocking) { scratch = op->scratch_opr()->as_register(); } assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 3dc9860d268..6ac01c60328 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -312,6 +312,8 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { LIR_Opr scratch = LIR_OprFact::illegalOpr; if (UseBiasedLocking) { scratch = new_register(T_INT); + } else if (UseAltFastLocking) { + scratch = new_register(T_ADDRESS); } CodeEmitInfo* info_for_exception = NULL; diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 99857b604d0..61fe8f271d1 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -48,6 +48,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr Label done; int null_check_offset = -1; + if (UseAltFastLocking) { + assert_different_registers(hdr, obj, disp_hdr, scratch); + } + verify_oop(obj); // save object being locked into the BasicObjectLock @@ -62,45 +66,58 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr null_check_offset = offset(); } - // Load object header - movptr(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked - orptr(hdr, markOopDesc::unlocked_value); - // save unlocked object header into the displaced header location on the stack - movptr(Address(disp_hdr, 0), hdr); - // test if object header is still the same (i.e. unlocked), and if so, store the - // displaced header address in the object header - if it is not the same, get the - // object header instead - if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg! - cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); - // if the object header was the same, we're done - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::equal, - ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + if (UseAltFastLocking) { + assert(!UseBiasedLocking, "sanity"); +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = disp_hdr; + get_thread(thread); +#endif + // Load object header + movptr(hdr, Address(obj, hdr_offset)); + fast_lock_impl(obj, hdr, thread, scratch, slow_case); + } else { + // Load object header + movptr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + orptr(hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + movptr(Address(disp_hdr, 0), hdr); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg! + cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); + // if the object header was the same, we're done + if (PrintBiasedLockingStatistics) { + cond_inc32(Assembler::equal, + ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + } + jcc(Assembler::equal, done); + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) rsp <= hdr + // 3) hdr <= rsp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - rsp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + subptr(hdr, rsp); + andptr(hdr, aligned_mask - os::vm_page_size()); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + movptr(Address(disp_hdr, 0), hdr); + // otherwise we don't care about the result and handle locking via runtime call + jcc(Assembler::notZero, slow_case); + // done + bind(done); } - jcc(Assembler::equal, done); - // if the object header was not the same, it is now in the hdr register - // => test if it is a stack pointer into the same stack (recursive locking), i.e.: - // - // 1) (hdr & aligned_mask) == 0 - // 2) rsp <= hdr - // 3) hdr <= rsp + page_size - // - // these 3 tests can be done by evaluating the following expression: - // - // (hdr - rsp) & (aligned_mask - page_size) - // - // assuming both the stack pointer and page_size have their least - // significant 2 bits cleared and page_size is a power of 2 - subptr(hdr, rsp); - andptr(hdr, aligned_mask - os::vm_page_size()); - // for recursive locking, the result is zero => save it in the displaced header - // location (NULL in the displaced hdr location indicates recursive locking) - movptr(Address(disp_hdr, 0), hdr); - // otherwise we don't care about the result and handle locking via runtime call - jcc(Assembler::notZero, slow_case); - // done - bind(done); return null_check_offset; } @@ -112,33 +129,43 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - if (UseBiasedLocking) { - // load object - movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - biased_locking_exit(obj, hdr, done); - } - // load displaced header - movptr(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is NULL we had recursive locking - testptr(hdr, hdr); - // if we had recursive locking, we are done - jcc(Assembler::zero, done); - if (!UseBiasedLocking) { + if (UseAltFastLocking) { // load object movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + verify_oop(obj); + movptr(disp_hdr, Address(obj, hdr_offset)); + andptr(disp_hdr, ~(int32_t)markOopDesc::lock_mask_in_place); + fast_unlock_impl(obj, disp_hdr, hdr, slow_case); + } else { + if (UseBiasedLocking) { + // load object + movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + movptr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + testptr(hdr, hdr); + // if we had recursive locking, we are done + jcc(Assembler::zero, done); + if (!UseBiasedLocking) { + // load object + movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg! + cmpxchgptr(hdr, Address(obj, hdr_offset)); + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + jcc(Assembler::notEqual, slow_case); + // done + bind(done); } - verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to - // the displaced header, get the object header instead - if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg! - cmpxchgptr(hdr, Address(obj, hdr_offset)); - // if the object header was not pointing to the displaced header, - // we do unlocking via runtime call - jcc(Assembler::notEqual, slow_case); - // done - bind(done); } diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp new file mode 100644 index 00000000000..5e36faec4cf --- /dev/null +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/c2_CodeStubs.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +#define __ masm. + +#ifdef _LP64 +int C2HandleAnonOMOwnerStub::max_size() const { + // Max size of stub has been determined by testing with 0, in which case + // C2CodeStubList::emit() will throw an assertion and report the actual size that + // is needed. + return DEBUG_ONLY(36) NOT_DEBUG(21); +} + +void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register mon = monitor(); + Register t = tmp(); + __ movptr(Address(mon, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), r15_thread); + __ subl(Address(r15_thread, JavaThread::lock_stack_top_offset()), oopSize); +#ifdef ASSERT + __ movl(t, Address(r15_thread, JavaThread::lock_stack_top_offset())); + __ movptr(Address(r15_thread, t), 0); +#endif + __ jmp(continuation()); +} +#endif + +#undef __ diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index 928118b911d..75242430c54 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -1193,55 +1193,74 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, rklass_decode_tmp, false, done, &slow_case); } - // Load immediate 1 into swap_reg %rax - movl(swap_reg, (int32_t)1); + if (UseAltFastLocking) { +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = lock_reg; + get_thread(thread); +#endif + // Load object header, prepare for CAS from unlocked to locked. + movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + fast_lock_impl(obj_reg, swap_reg, thread, tmp_reg, slow_case); + jmp(done); + } else { + // Load immediate 1 into swap_reg %rax + movl(swap_reg, (int32_t)1); - // Load (object->mark() | 1) into swap_reg %rax - orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + // Load (object->mark() | 1) into swap_reg %rax + orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - // Save (object->mark() | 1) into BasicLock's displaced header - movptr(Address(lock_reg, mark_offset), swap_reg); + // Save (object->mark() | 1) into BasicLock's displaced header + movptr(Address(lock_reg, mark_offset), swap_reg); - assert(lock_offset == 0, - "displaced header must be first word in BasicObjectLock"); + assert(lock_offset == 0, + "displaced header must be first word in BasicObjectLock"); - if (os::is_MP()) lock(); - cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::zero, - ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); - } - jcc(Assembler::zero, done); - - const int zero_bits = LP64_ONLY(7) NOT_LP64(3); - - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & zero_bits) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant bits clear. - // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg - subptr(swap_reg, rsp); - andptr(swap_reg, zero_bits - os::vm_page_size()); - - // Save the test result, for recursive case, the result is zero - movptr(Address(lock_reg, mark_offset), swap_reg); - - if (PrintBiasedLockingStatistics) { - cond_inc32(Assembler::zero, - ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); + if (os::is_MP()) lock(); + cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + if (PrintBiasedLockingStatistics) { + cond_inc32(Assembler::zero, + ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); + } + jcc(Assembler::zero, done); + + const int zero_bits = LP64_ONLY(7) NOT_LP64(3); + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & zero_bits) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant bits clear. + // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg + subptr(swap_reg, rsp); + andptr(swap_reg, zero_bits - os::vm_page_size()); + + // Save the test result, for recursive case, the result is zero + movptr(Address(lock_reg, mark_offset), swap_reg); + + if (PrintBiasedLockingStatistics) { + cond_inc32(Assembler::zero, + ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr())); + } + jcc(Assembler::zero, done); } - jcc(Assembler::zero, done); bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, - CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - lock_reg); + if (UseAltFastLocking) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + obj_reg); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } bind(done); } @@ -1269,7 +1288,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); } else { - Label done; + Label done, slow_case; const Register swap_reg = rax; // Must use rax for cmpxchg instruction const Register header_reg = LP64_ONLY(c_rarg2) NOT_LP64(rbx); // Will contain the old oopMark @@ -1277,9 +1296,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { save_bcp(); // Save in case of exception - // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into %rax - lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + if (!UseAltFastLocking) { + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %rax + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + } // Load oop into obj_reg(%c_rarg3) movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); @@ -1287,26 +1308,46 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { // Free entry movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD); - if (UseBiasedLocking) { - biased_locking_exit(obj_reg, header_reg, done); - } + if (UseAltFastLocking) { +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = header_reg; + get_thread(thread); +#endif + // Handle unstructured locking. + Register tmp = swap_reg; + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + cmpptr(obj_reg, Address(thread, tmp, Address::times_1, -oopSize)); + jcc(Assembler::notEqual, slow_case); + // Try to swing header from locked to unlock. + movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andptr(swap_reg, ~(int32_t)markOopDesc::lock_mask_in_place); + fast_unlock_impl(obj_reg, swap_reg, header_reg, slow_case); + jmp(done); + bind(slow_case); + } else { + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } - // Load the old header from BasicLock structure - movptr(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); + // Load the old header from BasicLock structure + movptr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); - // Test for recursion - testptr(header_reg, header_reg); + // Test for recursion + testptr(header_reg, header_reg); - // zero for recursive case - jcc(Assembler::zero, done); + // zero for recursive case + jcc(Assembler::zero, done); - // Atomic swap back the old header - if (os::is_MP()) lock(); - cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + // Atomic swap back the old header + if (os::is_MP()) lock(); + cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - // zero for simple unlock of a stack-lock case - jcc(Assembler::zero, done); + // zero for simple unlock of a stack-lock case + jcc(Assembler::zero, done); + } // Call the runtime routine for slow case. movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 9d807192f96..6b1e6e64c68 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -1706,7 +1706,7 @@ void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Regi // rax,: tmp -- KILLED // scr: tmp -- KILLED void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, - Register scrReg, Register cx1Reg, Register cx2Reg, + Register scrReg, Register cx1Reg, Register cx2Reg, Register thread, BiasedLockingCounters* counters, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, @@ -1771,31 +1771,40 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH] testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased - jccb(Assembler::notZero, IsInflated); - - // Attempt stack-locking ... - orptr (tmpReg, markOopDesc::unlocked_value); - movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { - lock(); - } - cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg - if (counters != NULL) { - cond_inc32(Assembler::equal, - ExternalAddress((address)counters->fast_path_entry_count_addr())); + if (UseAltFastLocking) { + jcc(Assembler::notZero, IsInflated); + } else { + jccb(Assembler::notZero, IsInflated); } - jcc(Assembler::equal, DONE_LABEL); // Success - - // Recursive locking. - // The object is stack-locked: markword contains stack pointer to BasicLock. - // Locked by current thread if difference with current SP is less than one page. - subptr(tmpReg, rsp); - // Next instruction set ZFlag == 1 (Success) if difference is less then one page. - andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); - movptr(Address(boxReg, 0), tmpReg); - if (counters != NULL) { - cond_inc32(Assembler::equal, - ExternalAddress((address)counters->fast_path_entry_count_addr())); + + if (UseAltFastLocking) { + fast_lock_impl(objReg, tmpReg, thread, scrReg, DONE_LABEL); + xorl(tmpReg, tmpReg); // Set ZF=1 to indicate success + } else { + // Attempt stack-locking ... + orptr (tmpReg, markOopDesc::unlocked_value); + movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS + if (os::is_MP()) { + lock(); + } + cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } + jcc(Assembler::equal, DONE_LABEL); // Success + + // Recursive locking. + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + subptr(tmpReg, rsp); + // Next instruction set ZFlag == 1 (Success) if difference is less then one page. + andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); + movptr(Address(boxReg, 0), tmpReg); + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } } jmp(DONE_LABEL); @@ -2018,18 +2027,38 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR } #endif - cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header - jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock + if (!UseAltFastLocking) { + cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header + jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock + } movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword testptr(tmpReg, markOopDesc::monitor_value); // Inflated? // If UseWispMonitor is enable, insert more code, then the length in jccb isn't enough. - if (UseWispMonitor) { + if (UseWispMonitor || UseAltFastLocking) { jcc (Assembler::zero, Stacked); } else { jccb (Assembler::zero, Stacked); } // It's inflated. + if (UseAltFastLocking) { + // If the owner is ANONYMOUS, we need to fix it. + testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER); +#ifdef _LP64 + if (!Compile::current()->in_scratch_emit_size()) { + C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg); + Compile::current()->add_stub(stub); + jcc(Assembler::notEqual, stub->entry()); + bind(stub->continuation()); + } else +#endif + { + // We can't easily implement this optimization on 32 bit because we don't have a thread register. + // Call the slow-path instead. + jcc(Assembler::notEqual, DONE_LABEL); + } + } + #if INCLUDE_RTM_OPT if (use_rtm) { Label L_regular_inflated_unlock; @@ -2038,7 +2067,11 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); - jmpb(DONE_LABEL); + if (UseAltFastLocking) { + jmp(DONE_LABEL); + } else { + jmpb(DONE_LABEL); + } bind(L_regular_inflated_unlock); } #endif @@ -2211,12 +2244,20 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR xorptr(boxReg, boxReg); } orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); + if (UseAltFastLocking) { + jcc (Assembler::notZero, DONE_LABEL); + } else { + jccb (Assembler::notZero, DONE_LABEL); + } movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); - jmpb (DONE_LABEL); + if (UseAltFastLocking) { + jmp (DONE_LABEL); + } else { + jmpb (DONE_LABEL); + } if ((EmitSync & 65536) == 0) { // Try to avoid passing control into the slow_path ... @@ -2280,17 +2321,31 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure - jmpb (DONE_LABEL); + if (UseAltFastLocking) { + jmp (DONE_LABEL); + } else { + jmpb (DONE_LABEL); + } bind (LSuccess); testl (boxReg, 0); // set ICC.ZF=1 to indicate success - jmpb (DONE_LABEL); + if (UseAltFastLocking) { + jmp (DONE_LABEL); + } else { + jmpb (DONE_LABEL); + } } bind (Stacked); - movptr(tmpReg, Address (boxReg, 0)); // re-fetch - if (os::is_MP()) { lock(); } - cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box + if (UseAltFastLocking) { + mov(boxReg, tmpReg); + fast_unlock_impl(objReg, boxReg, tmpReg, DONE_LABEL); + xorl(tmpReg, tmpReg); + } else { + movptr(tmpReg, Address (boxReg, 0)); // re-fetch + if (os::is_MP()) { lock(); } + cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box + } if (EmitSync & 65536) { bind (CheckSucc); @@ -10628,3 +10683,70 @@ void MacroAssembler::get_thread(Register thread) { } #endif + +// Implements fast-locking. +// Branches to slow upon failure to lock the object, with ZF cleared. +// Falls through upon success with unspecified ZF. +// +// obj: the object to be locked +// hdr: the (pre-loaded) header of the object, must be rax +// thread: the thread which attempts to lock obj +// tmp: a temporary register +void MacroAssembler::fast_lock_impl(Register obj, Register hdr, Register thread, Register tmp, Label& slow) { + assert(hdr == rax, "header must be in rax for cmpxchg"); + assert_different_registers(obj, hdr, thread, tmp); + + // First we need to check if the lock-stack has room for pushing the object reference. + // Note: we subtract 1 from the end-offset so that we can do a 'greater' comparison, instead + // of 'greaterEqual' below, which readily clears the ZF. This makes C2 code a little simpler and + // avoids one branch. + cmpl(Address(thread, JavaThread::lock_stack_top_offset()), LockStack::end_offset() - 1); + jcc(Assembler::greater, slow); + + // Now we attempt to take the fast-lock. + // Clear lock_mask bits (locked state). + andptr(hdr, ~(int32_t)markOopDesc::lock_mask_in_place); + movptr(tmp, hdr); + // Set unlocked_value bit. + orptr(hdr, markOopDesc::unlocked_value); + lock(); + cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + jcc(Assembler::notEqual, slow); + + // If successful, push object to lock-stack. + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + movptr(Address(thread, tmp), obj); + incrementl(tmp, oopSize); + movl(Address(thread, JavaThread::lock_stack_top_offset()), tmp); +} + +// Implements fast-unlocking. +// Branches to slow upon failure, with ZF cleared. +// Falls through upon success, with unspecified ZF. +// +// obj: the object to be unlocked +// hdr: the (pre-loaded) header of the object, must be rax +// tmp: a temporary register +void MacroAssembler::fast_unlock_impl(Register obj, Register hdr, Register tmp, Label& slow) { + assert(hdr == rax, "header must be in rax for cmpxchg"); + assert_different_registers(obj, hdr, tmp); + + // Mark-word must be lock_mask now, try to swing it back to unlocked_value. + movptr(tmp, hdr); // The expected old value + orptr(tmp, markOopDesc::unlocked_value); + lock(); + cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + jcc(Assembler::notEqual, slow); + // Pop the lock object from the lock-stack. +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = rax; + get_thread(thread); +#endif + subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize); +#ifdef ASSERT + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + movptr(Address(thread, tmp), 0); +#endif +} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 0965b913bd4..2c3129b504d 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -673,7 +673,7 @@ class MacroAssembler: public Assembler { // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. // See full desription in macroAssembler_x86.cpp. void fast_lock(Register obj, Register box, Register tmp, - Register scr, Register cx1, Register cx2, + Register scr, Register cx1, Register cx2, Register thread, BiasedLockingCounters* counters, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, @@ -1893,6 +1893,9 @@ class MacroAssembler: public Assembler { XMMRegister tmp1, Register tmp2); void vallones(XMMRegister dst, int vector_len); + + void fast_lock_impl(Register obj, Register hdr, Register thread, Register tmp, Label& slow); + void fast_unlock_impl(Register obj, Register hdr, Register tmp, Label& slow); }; /** diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 9e9410c9dc4..a241dc9b80f 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -2444,44 +2444,50 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, rscratch2, false, lock_done, &slow_path_lock); - } + if (UseAltFastLocking) { + // Load object header + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock_impl(obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock); + } else { + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, rscratch2, false, lock_done, &slow_path_lock); + } - // Load immediate 1 into swap_reg %rax - __ movl(swap_reg, 1); + // Load immediate 1 into swap_reg %rax + __ movl(swap_reg, 1); - // Load (object->mark() | 1) into swap_reg %rax - __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + // Load (object->mark() | 1) into swap_reg %rax + __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - // Save (object->mark() | 1) into BasicLock's displaced header - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); + // Save (object->mark() | 1) into BasicLock's displaced header + __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - if (os::is_MP()) { - __ lock(); - } + if (os::is_MP()) { + __ lock(); + } - // src -> dest iff dest == rax else rax <- dest - __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::equal, lock_done); + // src -> dest iff dest == rax else rax <- dest + __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ jcc(Assembler::equal, lock_done); - // Hmm should this move to the slow path code area??? + // Hmm should this move to the slow path code area??? - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 3) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (3 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 2 bits clear. - // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg - __ subptr(swap_reg, rsp); - __ andptr(swap_reg, 3 - os::vm_page_size()); + __ subptr(swap_reg, rsp); + __ andptr(swap_reg, 3 - os::vm_page_size()); - // Save the test result, for recursive case, the result is zero - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - __ jcc(Assembler::notEqual, slow_path_lock); + // Save the test result, for recursive case, the result is zero + __ movptr(Address(lock_reg, mark_word_offset), swap_reg); + __ jcc(Assembler::notEqual, slow_path_lock); + } // Slow path will re-enter here @@ -2643,28 +2649,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ biased_locking_exit(obj_reg, old_hdr, done); } - // Simple recursive lock? + if (!UseAltFastLocking) { + // Simple recursive lock? - __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); - __ jcc(Assembler::equal, done); + __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); + __ jcc(Assembler::equal, done); + } // Must save rax if if it is live now because cmpxchg must use it if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { save_native_result(masm, ret_type, stack_slots); } - - // get address of the stack lock - __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); - // get old displaced header - __ movptr(old_hdr, Address(rax, 0)); - - // Atomic swap old header if oop still contains the stack lock - if (os::is_MP()) { - __ lock(); + if (UseAltFastLocking) { + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ andptr(swap_reg, ~(int32_t)markOopDesc::lock_mask_in_place); + __ fast_unlock_impl(obj_reg, swap_reg, lock_reg, slow_path_unlock); + } else { + // get address of the stack lock + __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ movptr(old_hdr, Address(rax, 0)); + + // Atomic swap old header if oop still contains the stack lock + if (os::is_MP()) { + __ lock(); + } + __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ jcc(Assembler::notEqual, slow_path_unlock); } - __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::notEqual, slow_path_unlock); // slow path re-enters here __ bind(unlock_done); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 6f665642be2..e6a6398c0fc 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -12598,7 +12598,7 @@ instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, $cx1$$Register, $cx2$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread, _counters, _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); @@ -12614,7 +12614,7 @@ instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRe format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, $cx1$$Register, noreg, _counters, NULL, NULL, NULL, false, false); + $scr$$Register, $cx1$$Register, noreg, r15_thread, _counters, NULL, NULL, NULL, false, false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index 99d0f9b5fff..144cbe3381b 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -702,13 +702,13 @@ JRT_BLOCK_ENTRY(void, Runtime1::monitorenter(JavaThread* thread, oopDesc* obj, B NOT_PRODUCT(_monitorenter_slowcase_cnt++;) if (!UseBiasedLocking) { if (UseFastLocking) { - assert(obj == lock->obj(), "must match"); + assert(UseAltFastLocking || obj == lock->obj(), "must match"); } else { lock->set_obj(obj); } } WispPostStealHandleUpdateMark w(thread, __hm); - SharedRuntime::monitor_enter_helper(obj, lock->lock(), thread, UseFastLocking); + SharedRuntime::monitor_enter_helper(obj, UseAltFastLocking ? NULL : lock->lock(), thread, UseFastLocking); JRT_END diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index c901151d554..6df6ad24425 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -757,6 +757,7 @@ address monitorenter_address_interp = (address)InterpreterRuntime::monitorenter; //%note monitor_1 IRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* thread, BasicObjectLock* elem)) + assert(!UseAltFastLocking, "Should call monitorenter_obj() when using the alternative fast locking"); #ifdef ASSERT thread->last_frame().interpreter_frame_verify_monitor(elem); #endif @@ -786,6 +787,22 @@ IRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* thread, Ba #endif IRT_END +// NOTE: We provide a separate implementation for the new lightweight locking to workaround a limitation +// of registers in x86_32. This entry point accepts an oop instead of a BasicObjectLock*. +// The problem is that we would need to preserve the register that holds the BasicObjectLock, +// but we are using that register to hold the thread. We don't have enough registers to +// also keep the BasicObjectLock, but we don't really need it anyway, we only need +// the object. See also InterpreterMacroAssembler::lock_object(). +// As soon as legacy stack-locking goes away we could remove the other monitorenter() entry +// point, and only use oop-accepting entries (same for monitorexit() below). +JRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter_obj(JavaThread* thread, oopDesc* obj)) + assert(UseAltFastLocking, "Should call monitorenter() when not using the new lightweight locking"); + Handle h_obj(thread, cast_to_oop(obj)); + assert(Universe::heap()->is_in_or_null(h_obj()), + "must be null or an object"); + ObjectSynchronizer::slow_enter(h_obj, NULL, thread); + return; +JRT_END //%note monitor_1 IRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorexit(JavaThread* thread, BasicObjectLock* elem)) diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp index 87e84c893f2..6fef8c21d72 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.hpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp @@ -117,6 +117,7 @@ class InterpreterRuntime: AllStatic { public: // Synchronization static void monitorenter(JavaThread* thread, BasicObjectLock* elem); + static void monitorenter_obj(JavaThread* current, oopDesc* obj); static void monitorexit (JavaThread* thread, BasicObjectLock* elem); static void throw_illegal_monitor_state_exception(JavaThread* thread); diff --git a/src/hotspot/share/oops/markOop.hpp b/src/hotspot/share/oops/markOop.hpp index 649217c8808..87ddc6af2cc 100644 --- a/src/hotspot/share/oops/markOop.hpp +++ b/src/hotspot/share/oops/markOop.hpp @@ -264,12 +264,22 @@ class markOopDesc: public oopDesc { return markOop(value() | unlocked_value); } bool has_locker() const { + assert(!UseAltFastLocking, "should only be called with legacy stack locking"); return ((value() & lock_mask_in_place) == locked_value); } BasicLock* locker() const { assert(has_locker(), "check"); return (BasicLock*) value(); } + + bool is_fast_locked() const { + assert(UseAltFastLocking, "should only be called with new lightweight locking"); + return (value() & lock_mask_in_place) == locked_value; + } + markOop set_fast_locked() const { + return markOop(value() & ~lock_mask_in_place); + } + bool has_monitor() const { return ((value() & monitor_value) != 0); } @@ -279,7 +289,11 @@ class markOopDesc: public oopDesc { return (ObjectMonitor*) (value() ^ monitor_value); } bool has_displaced_mark_helper() const { - return ((value() & unlocked_value) == 0); + if (UseAltFastLocking) { + return (value() & lock_mask_in_place) == monitor_value; + } else { + return ((value() & unlocked_value) == 0); + } } markOop displaced_mark_helper() const { assert(has_displaced_mark_helper(), "check"); diff --git a/src/hotspot/share/oops/oop.cpp b/src/hotspot/share/oops/oop.cpp index 0470cbbde1a..fcf1be508cd 100644 --- a/src/hotspot/share/oops/oop.cpp +++ b/src/hotspot/share/oops/oop.cpp @@ -124,7 +124,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) { } // Header verification: the mark is typically non-NULL. If we're - // at a safepoint, it must not be null. + // at a safepoint, it must not be null, except when using the new lightweight locking. // Outside of a safepoint, the header could be changing (for example, // another thread could be inflating a lock on this object). if (ignore_mark_word) { @@ -133,7 +133,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) { if (obj->mark_raw() != NULL) { return true; } - return !SafepointSynchronize::is_at_safepoint(); + return !SafepointSynchronize::is_at_safepoint() || UseAltFastLocking; } // used only for asserts and guarantees diff --git a/src/hotspot/share/opto/c2_CodeStubs.cpp b/src/hotspot/share/opto/c2_CodeStubs.cpp new file mode 100644 index 00000000000..95441597773 --- /dev/null +++ b/src/hotspot/share/opto/c2_CodeStubs.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeBlob.hpp" +#include "opto/c2_CodeStubs.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/output.hpp" + +C2CodeStubList::C2CodeStubList() { + Arena* arena = Compile::current()->comp_arena(); + _stubs = new(arena) GrowableArray(arena, 8, 0, NULL); +} + +void C2CodeStubList::emit(CodeBuffer& cb) { + C2_MacroAssembler masm(&cb); + for (int i = _stubs->length() - 1; i >= 0; i--) { + C2CodeStub* stub = _stubs->at(i); + int max_size = stub->max_size(); + // Make sure there is enough space in the code buffer + if (cb.insts()->maybe_expand_to_ensure_remaining(max_size) && cb.blob() == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + DEBUG_ONLY(int size_before = cb.insts_size();) + + stub->emit(masm); + + DEBUG_ONLY(int actual_size = cb.insts_size() - size_before;) + assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size); + } + _stubs->clear(); +} diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp new file mode 100644 index 00000000000..a1658c73405 --- /dev/null +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "asm/assembler.hpp" +#include "asm/codeBuffer.hpp" +#include "memory/allocation.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "utilities/growableArray.hpp" + +#ifndef SHARE_OPTO_C2_CODESTUBS_HPP +#define SHARE_OPTO_C2_CODESTUBS_HPP + +class C2CodeStub : public ResourceObj { +private: + Label _entry; + Label _continuation; + +protected: + C2CodeStub() : + _entry(), + _continuation() {} + ~C2CodeStub() {} + +public: + Label& entry() { return _entry; } + Label& continuation() { return _continuation; } + + virtual void emit(C2_MacroAssembler& masm) = 0; + virtual int max_size() const = 0; +}; + +class C2CodeStubList { +private: + GrowableArray* _stubs; + +public: + C2CodeStubList(); + ~C2CodeStubList() {} + void add_stub(C2CodeStub* stub) { _stubs->append(stub); } + void emit(CodeBuffer& cb); +}; + +#ifdef _LP64 +class C2HandleAnonOMOwnerStub : public C2CodeStub { +private: + Register _monitor; + Register _tmp; +public: + C2HandleAnonOMOwnerStub(Register monitor, Register tmp = noreg) : C2CodeStub(), + _monitor(monitor), _tmp(tmp) {} + Register monitor() { return _monitor; } + Register tmp() { return _tmp; } + int max_size() const; + void emit(C2_MacroAssembler& masm); +}; +#endif + +#endif // SHARE_OPTO_C2_CODESTUBS_HPP diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 928596e0f07..df3c9e82f8a 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -1210,6 +1210,7 @@ void Compile::Init(int aliaslevel) { _type_verify_symmetry = true; _exception_backedge = false; #endif + _stub_list = new(comp_arena()) C2CodeStubList(); } //---------------------------init_start---------------------------------------- diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 3c35b77af12..f08bd1bb79a 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -35,6 +35,7 @@ #include "libadt/vectset.hpp" #include "jfr/jfrEvents.hpp" #include "memory/resourceArea.hpp" +#include "opto/c2_CodeStubs.hpp" #include "oops/methodData.hpp" #include "opto/idealGraphPrinter.hpp" #include "opto/phasetype.hpp" @@ -610,6 +611,7 @@ class Compile : public Phase { int _first_block_size; // Size of unvalidated entry point code / OSR poison code ExceptionHandlerTable _handler_table; // Table of native-code exception handlers ImplicitExceptionTable _inc_table; // Table of implicit null checks in native code + C2CodeStubList* _stub_list; // List of code stubs OopMapSet* _oop_map_set; // Table of oop maps (one for each safepoint location) static int _CompiledZap_count; // counter compared against CompileZap[First/Last] BufferBlob* _scratch_buffer_blob; // For temporary code buffers. @@ -965,6 +967,9 @@ class Compile : public Phase { // Constant table ConstantTable& constant_table() { return _constant_table; } + // Code stubs list + void add_stub(C2CodeStub* stub) { _stub_list->add_stub(stub); } + MachConstantBaseNode* mach_constant_base_node(); bool has_mach_constant_base_node() const { return _mach_constant_base_node != NULL; } // Generated by adlc, true if CallNode requires MachConstantBase. diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp index 5a8197feb82..a56fabb66d6 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -1542,6 +1542,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { #endif if (failing()) return; + // Fill in stubs. + _stub_list->emit(*cb); + if (C->failing()) return; + #ifndef PRODUCT // Information on the size of the method, without the extraneous code Scheduling::increment_method_size(cb->insts_size()); diff --git a/src/hotspot/share/prims/jvmtiEnvBase.cpp b/src/hotspot/share/prims/jvmtiEnvBase.cpp index 093c790425c..4dfa96f1eac 100644 --- a/src/hotspot/share/prims/jvmtiEnvBase.cpp +++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp @@ -953,7 +953,7 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec uint32_t debug_bits = 0; // first derive the object's owner and entry_count (if any) - { + if (!UseAltFastLocking) { // Revoke any biases before querying the mark word if (at_safepoint) { BiasedLocking::revoke_at_safepoint(hobj); @@ -1022,9 +1022,25 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec ret.entry_count = count_locked_objects(owning_thread, hobj); } // implied else: entry_count == 0 + } else { + ThreadsListHandle tlh; + owning_thread = ObjectSynchronizer::get_lock_owner(tlh.list(), hobj); + if (owning_thread != NULL) { // monitor is owned + Handle th(current_thread, owning_thread->threadObj()); + ret.owner = (jthread)jni_reference(calling_thread, th); + ret.entry_count = count_locked_objects(owning_thread, hobj); + } } jint nWant = 0, nWait = 0; + if (UseAltFastLocking) { + markOop mark = hobj->mark(); + if (mark->has_monitor()) { + mon = mark->monitor(); + assert(mon != NULL, "must have monitor"); + } + } + if (mon != NULL) { // this object has a heavyweight monitor nWant = mon->contentions(); // # of threads contending for monitor diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 2843cf88052..25c89687933 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -2093,6 +2093,28 @@ bool Arguments::check_vm_args_consistency() { log_warning(arguments) ("NUMA support for Heap depends on the file system when AllocateHeapAt option is used.\n"); } } + + if (UseAltFastLocking) { +#if !defined(_LP64) || !(defined(X86) || defined(AARCH64)) + jio_fprintf(defaultStream::error_stream(), "Platform do not support UseAltFastLocking.\n"); + status = false; +#else + if (UseBiasedLocking) { + FLAG_SET_DEFAULT(UseBiasedLocking, false); + } + + if (UseHeavyMonitors) { + FLAG_SET_DEFAULT(UseHeavyMonitors, false); + } +#endif + +#if INCLUDE_JVMCI + if (EnableJVMCI || UseAOT) { + FLAG_SET_DEFAULT(UseAltFastLocking, false); + } +#endif + } + return status; } diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index db2d48b6fe6..4ccdacde6cb 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -2689,6 +2689,10 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G); \ experimental(bool, UseFastUnorderedTimeStamps, false, \ "Use platform unstable time where supported for timestamps only") \ + \ + diagnostic(bool, UseAltFastLocking, false, \ + "Alternative lightweight locking") \ + \ #define VM_FLAGS(develop, \ develop_pd, \ diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp new file mode 100644 index 00000000000..a4ec2f3994f --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "runtime/lockStack.inline.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/thread.hpp" +#include "utilities/copy.hpp" +#include "utilities/ostream.hpp" + +const int LockStack::lock_stack_offset = in_bytes(JavaThread::lock_stack_offset()); +const int LockStack::lock_stack_top_offset = in_bytes(JavaThread::lock_stack_top_offset()); +const int LockStack::lock_stack_base_offset = in_bytes(JavaThread::lock_stack_base_offset()); + +LockStack::LockStack(JavaThread* jt) : + _top(lock_stack_base_offset), _base() { +#ifdef ASSERT + for (int i = 0; i < CAPACITY; i++) { + _base[i] = NULL; + } +#endif +} + +uint32_t LockStack::start_offset() { + int offset = lock_stack_base_offset; + assert(offset > 0, "must be positive offset"); + return static_cast(offset); +} + +uint32_t LockStack::end_offset() { + int offset = lock_stack_base_offset + CAPACITY * oopSize; + assert(offset > 0, "must be positive offset"); + return static_cast(offset); +} + +#ifndef PRODUCT +void LockStack::verify(const char* msg) const { + assert(UseAltFastLocking, "never use lock-stack when light weight locking is disabled"); + assert((_top <= end_offset()), "lockstack overflow: _top %d end_offset %d", _top, end_offset()); + assert((_top >= start_offset()), "lockstack underflow: _top %d end_offset %d", _top, start_offset()); + if (SafepointSynchronize::is_at_safepoint() || (Thread::current()->is_Java_thread() && is_owning_thread())) { + int top = to_index(_top); + for (int i = 0; i < top; i++) { + assert(_base[i] != NULL, "no zapped before top"); + for (int j = i + 1; j < top; j++) { + assert(_base[i] != _base[j], "entries must be unique: %s", msg); + } + } + for (int i = top; i < CAPACITY; i++) { + assert(_base[i] == NULL, "only zapped entries after top: i: %d, top: %d, entry: " PTR_FORMAT, i, top, p2i(_base[i])); + } + } +} +#endif diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp new file mode 100644 index 00000000000..ce6a96bcfe6 --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_RUNTIME_LOCKSTACK_HPP +#define SHARE_RUNTIME_LOCKSTACK_HPP + +#include "oops/oopsHierarchy.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/sizes.hpp" + +class Thread; +class OopClosure; + +class LockStack { + friend class VMStructs; +private: + static const int CAPACITY = 8; + + // TODO: It would be very useful if JavaThread::lock_stack_offset() and friends were constexpr, + // but this is currently not the case because we're using offset_of() which is non-constexpr, + // GCC would warn about non-standard-layout types if we were using offsetof() (which *is* constexpr). + static const int lock_stack_offset; + static const int lock_stack_top_offset; + static const int lock_stack_base_offset; + + // The offset of the next element, in bytes, relative to the JavaThread structure. + // We do this instead of a simple index into the array because this allows for + // efficient addressing in generated code. + uint32_t _top; + oop _base[CAPACITY]; + + // Get the owning thread of this lock-stack. + inline JavaThread* get_thread() const; + + // Tests if the calling thread is the thread that owns this lock-stack. + bool is_owning_thread() const; + + // Verifies consistency of the lock-stack. + void verify(const char* msg) const PRODUCT_RETURN; + + // Given an offset (in bytes) calculate the index into the lock-stack. + static inline int to_index(uint32_t offset); + +public: + static ByteSize top_offset() { return byte_offset_of(LockStack, _top); } + static ByteSize base_offset() { return byte_offset_of(LockStack, _base); } + + LockStack(JavaThread* jt); + + // The boundary indicies of the lock-stack. + static uint32_t start_offset(); + static uint32_t end_offset(); + + // Return true if we have room to push onto this lock-stack, false otherwise. + inline bool can_push() const; + + // Pushes an oop on this lock-stack. + inline void push(oop o); + + // Pops an oop from this lock-stack. + inline oop pop(); + + // Removes an oop from an arbitrary location of this lock-stack. + inline void remove(oop o); + + // Tests whether the oop is on this lock-stack. + inline bool contains(oop o) const; + + // GC support + inline void oops_do(OopClosure* cl); + +}; + +#endif // SHARE_RUNTIME_LOCKSTACK_HPP diff --git a/src/hotspot/share/runtime/lockStack.inline.hpp b/src/hotspot/share/runtime/lockStack.inline.hpp new file mode 100644 index 00000000000..b0df5b56849 --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.inline.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_RUNTIME_LOCKSTACK_INLINE_HPP +#define SHARE_RUNTIME_LOCKSTACK_INLINE_HPP + +#include "memory/iterator.hpp" +#include "runtime/lockStack.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/thread.hpp" + +inline int LockStack::to_index(uint32_t offset) { + return (offset - lock_stack_base_offset) / oopSize; +} + +JavaThread* LockStack::get_thread() const { + char* addr = reinterpret_cast(const_cast(this)); + return reinterpret_cast(addr - lock_stack_offset); +} + +inline bool LockStack::can_push() const { + return to_index(_top) < CAPACITY; +} + +inline bool LockStack::is_owning_thread() const { + JavaThread* thread = JavaThread::current(); + bool is_owning = &thread->lock_stack() == this; + assert(is_owning == (get_thread() == thread), "is_owning sanity"); + return is_owning; +} + +inline void LockStack::push(oop o) { + verify("pre-push"); + assert(oopDesc::is_oop(o), "must be"); + assert(!contains(o), "entries must be unique"); + assert(can_push(), "must have room"); + assert(_base[to_index(_top)] == NULL, "expect zapped entry"); + _base[to_index(_top)] = o; + _top += oopSize; + verify("post-push"); +} + +inline oop LockStack::pop() { + verify("pre-pop"); + assert(to_index(_top) > 0, "underflow, probably unbalanced push/pop"); + _top -= oopSize; + oop o = _base[to_index(_top)]; +#ifdef ASSERT + _base[to_index(_top)] = NULL; +#endif + assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); + verify("post-pop"); + return o; +} + +inline void LockStack::remove(oop o) { + verify("pre-remove"); + assert(contains(o), "entry must be present: " PTR_FORMAT, p2i(o)); + int end = to_index(_top); + for (int i = 0; i < end; i++) { + if (_base[i] == o) { + int last = end - 1; + for (; i < last; i++) { + _base[i] = _base[i + 1]; + } + _top -= oopSize; +#ifdef ASSERT + _base[to_index(_top)] = NULL; +#endif + break; + } + } + assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); + verify("post-remove"); +} + +inline bool LockStack::contains(oop o) const { + verify("pre-contains"); + int end = to_index(_top); + for (int i = end - 1; i >= 0; i--) { + if (_base[i] == o) { + verify("post-contains"); + return true; + } + } + verify("post-contains"); + return false; +} + +inline void LockStack::oops_do(OopClosure* cl) { + verify("pre-oops-do"); + int end = to_index(_top); + for (int i = 0; i < end; i++) { + cl->do_oop(&_base[i]); + } + verify("post-oops-do"); +} + +#endif // SHARE_RUNTIME_LOCKSTACK_INLINE_HPP diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index b37ddcc1a6f..ef610b703c9 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -285,7 +285,7 @@ void ObjectMonitor::enter(TRAPS) { return; } - if (Self->is_lock_owned ((address)cur)) { + if (!UseAltFastLocking && Self->is_lock_owned ((address)cur)) { assert(_recursions == 0, "internal state error"); _recursions = 1; // Commute owner from a thread-specific on-stack BasicLockObject address to @@ -954,7 +954,7 @@ void ObjectMonitor::exit(bool not_suspended, TRAPS) { } Thread * const Self = THREAD; if (THREAD != _owner) { - if (THREAD->is_lock_owned((address) _owner)) { + if (!UseAltFastLocking && THREAD->is_lock_owned((address) _owner)) { // Transmute _owner from a BasicLock pointer to a Thread address. // We don't need to hold _mutex for this transition. // Non-null to Non-null is safe as long as all readers can @@ -1401,7 +1401,7 @@ intptr_t ObjectMonitor::complete_exit(TRAPS) { DeferredInitialize(); if (THREAD != _owner) { - if (THREAD->is_lock_owned ((address)_owner)) { + if (!UseAltFastLocking && THREAD->is_lock_owned ((address)_owner)) { assert(_recursions == 0, "internal state error"); _owner = THREAD; // Convert from basiclock addr to Thread addr _recursions = 0; @@ -1440,7 +1440,7 @@ void ObjectMonitor::reenter(intptr_t recursions, TRAPS) { #define CHECK_OWNER() \ do { \ if (THREAD != _owner) { \ - if (THREAD->is_lock_owned((address) _owner)) { \ + if (!UseAltFastLocking && THREAD->is_lock_owned((address) _owner)) { \ _owner = THREAD; /* Convert from basiclock addr to Thread addr */ \ _recursions = 0; \ } else { \ @@ -1455,7 +1455,7 @@ void ObjectMonitor::reenter(intptr_t recursions, TRAPS) { void ObjectMonitor::check_slow(TRAPS) { TEVENT(check_slow - throw IMSX); - assert(THREAD != _owner && !THREAD->is_lock_owned((address) _owner), "must not be owner"); + assert(THREAD != _owner && !(!UseAltFastLocking && THREAD->is_lock_owned((address) _owner)), "must not be owner"); THROW_MSG(vmSymbols::java_lang_IllegalMonitorStateException(), "current thread not owner"); } diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 30595b43ab8..84298f41a86 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -153,6 +153,11 @@ class ObjectMonitor { DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile markOop) + sizeof(void * volatile) + sizeof(ObjectMonitor *)); + public: + // NOTE: Typed as uintptr_t so that we can pick it up in SA, via vmStructs. + static const uintptr_t ANONYMOUS_OWNER = 1; + private: + static void* anon_owner_ptr() { return reinterpret_cast(ANONYMOUS_OWNER); } protected: // protected for JvmtiRawMonitor void * volatile _owner; // pointer to owning thread OR BasicLock volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor @@ -253,6 +258,20 @@ class ObjectMonitor { void* owner() const; void set_owner(void* owner); + void set_owner_anonymous() { + assert(_owner == NULL, "sanity"); + _owner = anon_owner_ptr(); + } + + bool is_owner_anonymous() const { + return _owner == anon_owner_ptr(); + } + + void set_owner_from_anonymous(Thread* owner) { + assert(_owner == anon_owner_ptr(), "sanity"); + _owner = owner; + } + jint waiters() const; jint count() const; diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp index f4a5fbe619f..2b1ef100e59 100644 --- a/src/hotspot/share/runtime/objectMonitor.inline.hpp +++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp @@ -25,12 +25,24 @@ #ifndef SHARE_VM_RUNTIME_OBJECTMONITOR_INLINE_HPP #define SHARE_VM_RUNTIME_OBJECTMONITOR_INLINE_HPP +#include "runtime/lockStack.inline.hpp" + inline intptr_t ObjectMonitor::is_entered(TRAPS) const { if (UseWispMonitor) { THREAD = WispThread::current(THREAD); } - if (THREAD == _owner || THREAD->is_lock_owned((address) _owner)) { - return 1; + if (UseAltFastLocking) { + if (is_owner_anonymous()) { + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + return jt->lock_stack().contains((oop)object()) ? 1 : 0; + } else { + return THREAD == _owner ? 1 : 0; + } + } else { + if (THREAD == _owner || THREAD->is_lock_owned((address) _owner)) { + return 1; + } } return 0; } @@ -90,7 +102,7 @@ inline bool ObjectMonitor::check(TRAPS) { THREAD = WispThread::current(THREAD); } if (THREAD != _owner) { - if (THREAD->is_lock_owned((address) _owner)) { + if (!UseAltFastLocking && THREAD->is_lock_owned((address) _owner)) { _owner = THREAD; // regain ownership of inflated monitor assert (_recursions == 0, "invariant") ; } else { diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp index 02a288ba1b7..f0810fdb8b5 100644 --- a/src/hotspot/share/runtime/synchronizer.cpp +++ b/src/hotspot/share/runtime/synchronizer.cpp @@ -37,6 +37,7 @@ #include "runtime/coroutine.hpp" #include "runtime/handles.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/objectMonitor.inline.hpp" @@ -164,10 +165,18 @@ bool ObjectSynchronizer::quick_notify(oopDesc * obj, Thread * self, bool all) { if (obj == NULL) return false; // slow-path for invalid obj const markOop mark = obj->mark(); - if (mark->has_locker() && self->is_lock_owned((address)mark->locker())) { - // Degenerate notify - // stack-locked by caller so by definition the implied waitset is empty. - return true; + if (UseAltFastLocking) { + if (mark->is_fast_locked() && reinterpret_cast(self)->lock_stack().contains(cast_to_oop(obj))) { + // Degenerate notify + // fast-locked by caller so by definition the implied waitset is empty. + return true; + } + } else { + if (mark->has_locker() && self->is_lock_owned((address)mark->locker())) { + // Degenerate notify + // stack-locked by caller so by definition the implied waitset is empty. + return true; + } } if (mark->has_monitor()) { @@ -233,17 +242,19 @@ bool ObjectSynchronizer::quick_enter(oop obj, Thread * Self, return true; } - // This Java Monitor is inflated so obj's header will never be - // displaced to this thread's BasicLock. Make the displaced header - // non-NULL so this BasicLock is not seen as recursive nor as - // being locked. We do this unconditionally so that this thread's - // BasicLock cannot be mis-interpreted by any stack walkers. For - // performance reasons, stack walkers generally first check for - // Biased Locking in the object's header, the second check is for - // stack-locking in the object's header, the third check is for - // recursive stack-locking in the displaced header in the BasicLock, - // and last are the inflated Java Monitor (ObjectMonitor) checks. - lock->set_displaced_header(markOopDesc::unused_mark()); + if (!UseAltFastLocking) { + // This Java Monitor is inflated so obj's header will never be + // displaced to this thread's BasicLock. Make the displaced header + // non-NULL so this BasicLock is not seen as recursive nor as + // being locked. We do this unconditionally so that this thread's + // BasicLock cannot be mis-interpreted by any stack walkers. For + // performance reasons, stack walkers generally first check for + // Biased Locking in the object's header, the second check is for + // stack-locking in the object's header, the third check is for + // recursive stack-locking in the displaced header in the BasicLock, + // and last are the inflated Java Monitor (ObjectMonitor) checks. + lock->set_displaced_header(markOopDesc::unused_mark()); + } if (owner == NULL && Atomic::replace_if_null(Self, &(m->_owner))) { assert(m->_recursions == 0, "invariant"); @@ -293,50 +304,88 @@ void ObjectSynchronizer::fast_exit(oop object, BasicLock* lock, TRAPS) { assert(mark == markOopDesc::INFLATING() || !mark->has_bias_pattern(), "should not see bias pattern here"); - markOop dhw = lock->displaced_header(); - if (dhw == NULL) { - // If the displaced header is NULL, then this exit matches up with - // a recursive enter. No real work to do here except for diagnostics. -#ifndef PRODUCT - if (mark != markOopDesc::INFLATING()) { - // Only do diagnostics if we are not racing an inflation. Simply - // exiting a recursive enter of a Java Monitor that is being - // inflated is safe; see the has_monitor() comment below. - assert(!mark->is_neutral(), "invariant"); - assert(!mark->has_locker() || - THREAD->is_lock_owned((address)mark->locker()), "invariant"); - if (mark->has_monitor()) { - // The BasicLock's displaced_header is marked as a recursive - // enter and we have an inflated Java Monitor (ObjectMonitor). - // This is a special case where the Java Monitor was inflated - // after this thread entered the stack-lock recursively. When a - // Java Monitor is inflated, we cannot safely walk the Java - // Monitor owner's stack and update the BasicLocks because a - // Java Monitor can be asynchronously inflated by a thread that - // does not own the Java Monitor. - ObjectMonitor * m = mark->monitor(); - assert(((oop)(m->object()))->mark() == mark, "invariant"); - assert(m->is_entered(THREAD), "invariant"); + if (UseAltFastLocking) { + // Fast-locking does not use the 'lock' argument. + if (mark->is_fast_locked()) { + markOop unlocked_mark = mark->set_unlocked(); + markOop old_mark = object->cas_set_mark(unlocked_mark, mark); + if (old_mark != mark) { + // Another thread won the CAS, it must have inflated the monitor. + // It can only have installed an anonymously locked monitor at this point. + // Fetch that monitor, set owner correctly to this thread, and + // exit it (allowing waiting threads to enter). + assert(old_mark->has_monitor(), "must have monitor"); + ObjectMonitor* monitor = old_mark->monitor(); + assert(monitor->is_owner_anonymous(), "must be anonymous owner"); + monitor->set_owner_from_anonymous(THREAD); + monitor->exit(true, THREAD); } + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + LockStack& lock_stack = jt->lock_stack(); + lock_stack.remove(object); + return; } + } else { + markOop dhw = lock->displaced_header(); + if (dhw == NULL) { + // If the displaced header is NULL, then this exit matches up with + // a recursive enter. No real work to do here except for diagnostics. +#ifndef PRODUCT + if (mark != markOopDesc::INFLATING()) { + // Only do diagnostics if we are not racing an inflation. Simply + // exiting a recursive enter of a Java Monitor that is being + // inflated is safe; see the has_monitor() comment below. + assert(!mark->is_neutral(), "invariant"); + assert(!mark->has_locker() || + THREAD->is_lock_owned((address)mark->locker()), "invariant"); + if (mark->has_monitor()) { + // The BasicLock's displaced_header is marked as a recursive + // enter and we have an inflated Java Monitor (ObjectMonitor). + // This is a special case where the Java Monitor was inflated + // after this thread entered the stack-lock recursively. When a + // Java Monitor is inflated, we cannot safely walk the Java + // Monitor owner's stack and update the BasicLocks because a + // Java Monitor can be asynchronously inflated by a thread that + // does not own the Java Monitor. + ObjectMonitor * m = mark->monitor(); + assert(((oop)(m->object()))->mark() == mark, "invariant"); + assert(m->is_entered(THREAD), "invariant"); + } + } #endif - return; - } - - if (mark == (markOop) lock) { - // If the object is stack-locked by the current thread, try to - // swing the displaced header from the BasicLock back to the mark. - assert(dhw->is_neutral(), "invariant"); - if (object->cas_set_mark(dhw, mark) == mark) { - TEVENT(fast_exit: release stack-lock); return; } + + if (mark == (markOop) lock) { + // If the object is stack-locked by the current thread, try to + // swing the displaced header from the BasicLock back to the mark. + assert(dhw->is_neutral(), "invariant"); + if (object->cas_set_mark(dhw, mark) == mark) { + TEVENT(fast_exit: release stack-lock); + return; + } + } } // We have to take the slow-path of possible inflation and then exit. - ObjectSynchronizer::inflate(THREAD, - object, - inflate_cause_vm_internal)->exit(true, THREAD); + if (UseAltFastLocking) { + ObjectMonitor* monitor = inflate(THREAD, object, inflate_cause_vm_internal); + if (monitor->is_owner_anonymous()) { + // It must be us. Pop lock object from lock stack. + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + LockStack& lock_stack = jt->lock_stack(); + oop popped = lock_stack.pop(); + assert(popped == object, "must be owned by this thread"); + monitor->set_owner_from_anonymous(THREAD); + } + monitor->exit(true, THREAD); + } else { + ObjectSynchronizer::inflate(THREAD, + object, + inflate_cause_vm_internal)->exit(true, THREAD); + } } void ObjectSynchronizer::fast_exit(Handle object, BasicLock* lock, TRAPS) { @@ -346,50 +395,88 @@ void ObjectSynchronizer::fast_exit(Handle object, BasicLock* lock, TRAPS) { assert(mark == markOopDesc::INFLATING() || !mark->has_bias_pattern(), "should not see bias pattern here"); - markOop dhw = lock->displaced_header(); - if (dhw == NULL) { - // If the displaced header is NULL, then this exit matches up with - // a recursive enter. No real work to do here except for diagnostics. -#ifndef PRODUCT - if (mark != markOopDesc::INFLATING()) { - // Only do diagnostics if we are not racing an inflation. Simply - // exiting a recursive enter of a Java Monitor that is being - // inflated is safe; see the has_monitor() comment below. - assert(!mark->is_neutral(), "invariant"); - assert(!mark->has_locker() || - THREAD->is_lock_owned((address)mark->locker()), "invariant"); - if (mark->has_monitor()) { - // The BasicLock's displaced_header is marked as a recursive - // enter and we have an inflated Java Monitor (ObjectMonitor). - // This is a special case where the Java Monitor was inflated - // after this thread entered the stack-lock recursively. When a - // Java Monitor is inflated, we cannot safely walk the Java - // Monitor owner's stack and update the BasicLocks because a - // Java Monitor can be asynchronously inflated by a thread that - // does not own the Java Monitor. - ObjectMonitor * m = mark->monitor(); - assert(((oop)(m->object()))->mark() == mark, "invariant"); - assert(m->is_entered(THREAD), "invariant"); + if (UseAltFastLocking) { + // Fast-locking does not use the 'lock' argument. + if (mark->is_fast_locked()) { + markOop unlocked_mark = mark->set_unlocked(); + markOop old_mark = object->cas_set_mark(unlocked_mark, mark); + if (old_mark != mark) { + // Another thread won the CAS, it must have inflated the monitor. + // It can only have installed an anonymously locked monitor at this point. + // Fetch that monitor, set owner correctly to this thread, and + // exit it (allowing waiting threads to enter). + assert(old_mark->has_monitor(), "must have monitor"); + ObjectMonitor* monitor = old_mark->monitor(); + assert(monitor->is_owner_anonymous(), "must be anonymous owner"); + monitor->set_owner_from_anonymous(THREAD); + monitor->exit(true, THREAD); } + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + LockStack& lock_stack = jt->lock_stack(); + lock_stack.remove(object()); + return; } + } else { + markOop dhw = lock->displaced_header(); + if (dhw == NULL) { + // If the displaced header is NULL, then this exit matches up with + // a recursive enter. No real work to do here except for diagnostics. +#ifndef PRODUCT + if (mark != markOopDesc::INFLATING()) { + // Only do diagnostics if we are not racing an inflation. Simply + // exiting a recursive enter of a Java Monitor that is being + // inflated is safe; see the has_monitor() comment below. + assert(!mark->is_neutral(), "invariant"); + assert(!mark->has_locker() || + THREAD->is_lock_owned((address)mark->locker()), "invariant"); + if (mark->has_monitor()) { + // The BasicLock's displaced_header is marked as a recursive + // enter and we have an inflated Java Monitor (ObjectMonitor). + // This is a special case where the Java Monitor was inflated + // after this thread entered the stack-lock recursively. When a + // Java Monitor is inflated, we cannot safely walk the Java + // Monitor owner's stack and update the BasicLocks because a + // Java Monitor can be asynchronously inflated by a thread that + // does not own the Java Monitor. + ObjectMonitor * m = mark->monitor(); + assert(((oop)(m->object()))->mark() == mark, "invariant"); + assert(m->is_entered(THREAD), "invariant"); + } + } #endif - return; - } - - if (mark == (markOop) lock) { - // If the object is stack-locked by the current thread, try to - // swing the displaced header from the BasicLock back to the mark. - assert(dhw->is_neutral(), "invariant"); - if (object->cas_set_mark(dhw, mark) == mark) { - TEVENT(fast_exit: release stack-lock); return; } + + if (mark == (markOop) lock) { + // If the object is stack-locked by the current thread, try to + // swing the displaced header from the BasicLock back to the mark. + assert(dhw->is_neutral(), "invariant"); + if (object->cas_set_mark(dhw, mark) == mark) { + TEVENT(fast_exit: release stack-lock); + return; + } + } } // We have to take the slow-path of possible inflation and then exit. - ObjectSynchronizer::inflate(THREAD, - object(), - inflate_cause_vm_internal)->exit(true, THREAD); + if (UseAltFastLocking) { + ObjectMonitor* monitor = inflate(THREAD, object(), inflate_cause_vm_internal); + if (monitor->is_owner_anonymous()) { + // It must be us. Pop lock object from lock stack. + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + LockStack& lock_stack = jt->lock_stack(); + oop popped = lock_stack.pop(); + assert(popped == object(), "must be owned by this thread"); + monitor->set_owner_from_anonymous(THREAD); + } + monitor->exit(true, THREAD); + } else { + ObjectSynchronizer::inflate(THREAD, + object(), + inflate_cause_vm_internal)->exit(true, THREAD); + } } // ----------------------------------------------------------------------------- // Interpreter/Compiler Slow Case @@ -397,31 +484,52 @@ void ObjectSynchronizer::fast_exit(Handle object, BasicLock* lock, TRAPS) { // We don't need to use fast path here, because it must have been // failed in the interpreter/compiler code. void ObjectSynchronizer::slow_enter(Handle obj, BasicLock* lock, TRAPS) { - markOop mark = obj->mark(); - assert(!mark->has_bias_pattern(), "should not see bias pattern here"); - - if (mark->is_neutral()) { - // Anticipate successful CAS -- the ST of the displaced mark must - // be visible <= the ST performed by the CAS. - lock->set_displaced_header(mark); - if (mark == obj()->cas_set_mark((markOop) lock, mark)) { - TEVENT(slow_enter: release stacklock); + if (UseAltFastLocking) { + // Fast-locking does not use the 'lock' argument. + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + LockStack& lock_stack = jt->lock_stack(); + if (lock_stack.can_push()) { + markOop mark = obj()->mark(); + if (mark->is_neutral()) { + assert(!lock_stack.contains(obj()), "thread must not already hold the lock"); + // Try to swing into 'fast-locked' state. + markOop locked_mark = mark->set_fast_locked(); + markOop old_mark = obj()->cas_set_mark(locked_mark, mark); + if (old_mark == mark) { + // Successfully fast-locked, push object to lock-stack and return. + lock_stack.push(obj()); + return; + } + } + } + } else { + markOop mark = obj->mark(); + assert(!mark->has_bias_pattern(), "should not see bias pattern here"); + + if (mark->is_neutral()) { + // Anticipate successful CAS -- the ST of the displaced mark must + // be visible <= the ST performed by the CAS. + lock->set_displaced_header(mark); + if (mark == obj()->cas_set_mark((markOop) lock, mark)) { + TEVENT(slow_enter: release stacklock); + return; + } + // Fall through to inflate() ... + } else if (mark->has_locker() && + THREAD->is_lock_owned((address)mark->locker())) { + assert(lock != mark->locker(), "must not re-lock the same lock"); + assert(lock != (BasicLock*)obj->mark(), "don't relock with same BasicLock"); + lock->set_displaced_header(NULL); return; } - // Fall through to inflate() ... - } else if (mark->has_locker() && - THREAD->is_lock_owned((address)mark->locker())) { - assert(lock != mark->locker(), "must not re-lock the same lock"); - assert(lock != (BasicLock*)obj->mark(), "don't relock with same BasicLock"); - lock->set_displaced_header(NULL); - return; - } - - // The object header will never be displaced to this lock, - // so it does not matter what the value is, except that it - // must be non-zero to avoid looking like a re-entrant lock, - // and must not look locked either. - lock->set_displaced_header(markOopDesc::unused_mark()); + + // The object header will never be displaced to this lock, + // so it does not matter what the value is, except that it + // must be non-zero to avoid looking like a re-entrant lock, + // and must not look locked either. + lock->set_displaced_header(markOopDesc::unused_mark()); + } ObjectSynchronizer::inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD); @@ -585,8 +693,17 @@ void ObjectSynchronizer::notify(Handle obj, TRAPS) { assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now"); } markOop mark = obj->mark(); - if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) { - return; + if (UseAltFastLocking) { + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + if ((mark->is_fast_locked() && jt->lock_stack().contains(obj()))) { + // Not inflated so there can't be any waiters to notify. + return; + } + } else { + if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) { + return; + } } ObjectSynchronizer::inflate(THREAD, obj(), @@ -601,8 +718,17 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) { } markOop mark = obj->mark(); - if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) { - return; + if (UseAltFastLocking) { + assert(THREAD->is_Java_thread(), "sanity"); + JavaThread* jt = (JavaThread*)THREAD; + if ((mark->is_fast_locked() && jt->lock_stack().contains(obj()))) { + // Not inflated so there can't be any waiters to notify. + return; + } + } else { + if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) { + return; + } } ObjectSynchronizer::inflate(THREAD, obj(), @@ -648,7 +774,7 @@ static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending static markOop ReadStableMark(oop obj) { markOop mark = obj->mark(); - if (!mark->is_being_inflated()) { + if (!mark->is_being_inflated() || UseAltFastLocking) { return mark; // normal fast-path return } @@ -772,6 +898,13 @@ static inline intptr_t get_next_hash(Thread * Self, oop obj) { return value; } +// Can be called from non JavaThreads (e.g., VMThread) for FastHashCode +// calculations as part of JVM/TI tagging. +static bool is_lock_owned(Thread* thread, oop obj) { + assert(UseAltFastLocking, "only call this with new lightweight locking enabled"); + return thread->is_Java_thread() ? reinterpret_cast(thread)->lock_stack().contains(obj) : false; +} + intptr_t ObjectSynchronizer::FastHashCode(Thread * Self, oop obj) { if (UseBiasedLocking) { // NOTE: many places throughout the JVM do not expect a safepoint @@ -835,7 +968,14 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread * Self, oop obj) { return hash; } // Skip to the following code to reduce code size - } else if (Self->is_lock_owned((address)mark->locker())) { + } else if (UseAltFastLocking && mark->is_fast_locked() && is_lock_owned(Self, obj)) { + // This is a fast lock owned by the calling thread so use the + // markOop from the object. + hash = mark->hash(); + if (hash != 0) { // if it has a hash, just return it + return hash; + } + } else if (!UseAltFastLocking && Self->is_lock_owned((address)mark->locker())) { temp = mark->displaced_mark_helper(); // this is a lightweight monitor owned assert(temp->is_neutral(), "invariant"); hash = temp->hash(); // by current thread, check if the displaced @@ -899,9 +1039,15 @@ bool ObjectSynchronizer::current_thread_holds_lock(JavaThread* thread, markOop mark = ReadStableMark(obj); // Uncontended case, header points to stack - if (mark->has_locker()) { + if (!UseAltFastLocking && mark->has_locker()) { return thread->is_lock_owned((address)mark->locker()); } + + if (UseAltFastLocking && mark->is_fast_locked()) { + // fast-locking case, see if lock is in current's lock stack + return thread->lock_stack().contains(h_obj()); + } + // Contended case, header points to ObjectMonitor (tagged pointer) if (mark->has_monitor()) { ObjectMonitor* monitor = mark->monitor(); @@ -938,11 +1084,15 @@ ObjectSynchronizer::LockOwnership ObjectSynchronizer::query_lock_ownership markOop mark = ReadStableMark(obj); // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. - if (mark->has_locker()) { + if (!UseAltFastLocking && mark->has_locker()) { return self->is_lock_owned((address)mark->locker()) ? owner_self : owner_other; } + if (UseAltFastLocking && mark->is_fast_locked()) { + return is_lock_owned(self, obj) ? owner_self : owner_other; + } + // CASE: inflated. Mark (tagged pointer) points to an objectMonitor. // The Object:ObjectMonitor relationship is stable as long as we're // not at a safepoint. @@ -975,15 +1125,24 @@ JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_ob markOop mark = ReadStableMark(obj); // Uncontended case, header points to stack - if (mark->has_locker()) { + if (!UseAltFastLocking && mark->has_locker()) { owner = (address) mark->locker(); } + if (UseAltFastLocking && mark->is_fast_locked()) { + // fast-locked so get owner from the object. + // owning_thread_from_object() may also return null here: + return Threads::owning_thread_from_object(t_list, h_obj()); + } + // Contended case, header points to ObjectMonitor (tagged pointer) if (mark->has_monitor()) { ObjectMonitor* monitor = mark->monitor(); assert(monitor != NULL, "monitor should be non-null"); owner = (address) monitor->owner(); + if (UseAltFastLocking) { + return Threads::owning_thread_from_monitor(t_list, monitor); + } } if (owner != NULL) { @@ -1467,8 +1626,14 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread * Self, assert(!mark->has_bias_pattern(), "invariant"); // The mark can be in one of the following states: - // * Inflated - just return - // * Stack-locked - coerce it to inflated + // * inflated - Just return if using stack-locking. + // If using fast-locking and the ObjectMonitor owner + // is anonymous and the current thread owns the + // object lock, then we make the current thread the + // ObjectMonitor owner and remove the lock from the + // current thread's lock stack. + // * fast-locked - Coerce it to inflated from fast-locked. + // * stack-locked - Coerce it to inflated from stack-locked. // * INFLATING - busy wait for conversion to complete // * Neutral - aggressively inflate the object. // * BIASED - Illegal. We should never see this @@ -1479,6 +1644,11 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread * Self, assert(inf->header()->is_neutral(), "invariant"); assert(inf->object() == object, "invariant"); assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid"); + if (UseAltFastLocking && inf->is_owner_anonymous() && is_lock_owned(Self, object)) { + inf->set_owner_from_anonymous(Self); + assert(Self->is_Java_thread(), "must be Java thread"); + reinterpret_cast(Self)->lock_stack().remove(object); + } return inf; } @@ -1488,12 +1658,63 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread * Self, // The INFLATING value is transient. // Currently, we spin/yield/park and poll the markword, waiting for inflation to finish. // We could always eliminate polling by parking the thread on some auxiliary list. - if (mark == markOopDesc::INFLATING()) { + if (!UseAltFastLocking && mark == markOopDesc::INFLATING()) { TEVENT(Inflate: spin while INFLATING); ReadStableMark(object); continue; } + // CASE: fast-locked + // Could be fast-locked either by current or by some other thread. + // + // Note that we allocate the ObjectMonitor speculatively, _before_ + // attempting to set the object's mark to the new ObjectMonitor. If + // this thread owns the monitor, then we set the ObjectMonitor's + // owner to this thread. Otherwise, we set the ObjectMonitor's owner + // to anonymous. If we lose the race to set the object's mark to the + // new ObjectMonitor, then we just delete it and loop around again. + // + if (UseAltFastLocking && mark->is_fast_locked()) { + ObjectMonitor* monitor = omAlloc(Self); + monitor->Recycle(); + monitor->_Responsible = NULL; + monitor->_recursions = 0; + monitor->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // Consider: maintain by type/class + + monitor->set_header(mark->set_unlocked()); + bool own = is_lock_owned(Self, object); + if (own) { + // Owned by us. + monitor->set_owner(Self); + } else { + // Owned by somebody else. + monitor->set_owner_anonymous(); + } + monitor->set_object(object); + markOop monitor_mark = markOopDesc::encode(monitor); + markOop old_mark = object->cas_set_mark(monitor_mark, mark); + if (old_mark == mark) { + // Success! Return inflated monitor. + if (own) { + assert(Self->is_Java_thread(), "must be Java thread"); + reinterpret_cast(Self)->lock_stack().remove(object); + } + // Hopefully the performance counters are allocated on distinct + // cache lines to avoid false sharing on MP systems ... + OM_PERFDATA_OP(Inflations, inc()); + if (event.should_commit()) { + post_monitor_inflate_event(&event, object, cause); + } + return monitor; + } else { + monitor->set_object(NULL); + monitor->set_owner(NULL); + omRelease(Self, monitor, true); + continue; // Interference -- just retry + } + } + + // CASE: stack-locked // Could be stack-locked either by this thread or by some other thread. // @@ -1513,7 +1734,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread * Self, // before or after the CAS(INFLATING) operation. // See the comments in omAlloc(). - if (mark->has_locker()) { + if (!UseAltFastLocking && mark->has_locker()) { ObjectMonitor * m = omAlloc(Self); // Optimistically prepare the objectmonitor - anticipate successful CAS // We do this before the CAS in order to minimize the length of time diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index 7c541539b08..784b8783c52 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -81,9 +81,10 @@ #include "runtime/javaCalls.hpp" #include "runtime/jniHandles.inline.hpp" #include "runtime/jniPeriodicChecker.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/memprofiler.hpp" #include "runtime/mutexLocker.hpp" -#include "runtime/objectMonitor.hpp" +#include "runtime/objectMonitor.inline.hpp" #include "runtime/orderAccess.hpp" #include "runtime/osThread.hpp" #include "runtime/prefetch.inline.hpp" @@ -1062,6 +1063,7 @@ bool Thread::is_in_usable_stack(address adr) const { // should be revisited, and they should be removed if possible. bool Thread::is_lock_owned(address adr) const { + assert(!UseAltFastLocking, "sanity"); return on_local_stack(adr); } @@ -1715,7 +1717,7 @@ void JavaThread::initialize() { } JavaThread::JavaThread(bool is_attaching_via_jni) : - Thread() { + Thread(), _lock_stack(this) { initialize(); if (is_attaching_via_jni) { _jni_attach_state = _attaching_via_jni; @@ -1777,7 +1779,7 @@ static void compiler_thread_entry(JavaThread* thread, TRAPS); static void sweeper_thread_entry(JavaThread* thread, TRAPS); JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : - Thread() { + Thread(), _lock_stack(this) { initialize(); _jni_attach_state = _not_attaching_via_jni; set_entry_point(entry_point); @@ -2220,6 +2222,7 @@ void JavaThread::clear_aync_thread_death_exception() { } bool JavaThread::is_lock_owned(address adr) const { + assert(!UseAltFastLocking, "sanity"); if (Thread::is_lock_owned(adr)) return true; for (MonitorChunk* chunk = monitor_chunks(); chunk != NULL; chunk = chunk->next()) { @@ -3045,6 +3048,10 @@ void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { if (jvmti_thread_state() != NULL) { jvmti_thread_state()->oops_do(f, cf); } + + if (UseAltFastLocking) { + lock_stack().oops_do(f); + } } void JavaThread::nmethods_do(CodeBlobClosure* cf) { @@ -4803,6 +4810,7 @@ GrowableArray* Threads::get_pending_threads(ThreadsList * t_list, JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list, address owner) { + assert(!UseAltFastLocking, "sanity"); // NULL owner means not locked so we can skip the search if (owner == NULL) return NULL; @@ -4855,6 +4863,31 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list, return the_owner; } +JavaThread* Threads::owning_thread_from_object(ThreadsList * t_list, oop obj) { + assert(UseAltFastLocking, "Only with new lightweight locking"); + DO_JAVA_THREADS(t_list, q) { + if (q->lock_stack().contains(obj)) { + return q; + } + } + return NULL; +} + +JavaThread* Threads::owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* monitor) { + if (UseAltFastLocking) { + if (monitor->is_owner_anonymous()) { + return owning_thread_from_object(t_list, (oop)monitor->object()); + } else { + Thread* owner = reinterpret_cast(monitor->owner()); + assert(owner == NULL || owner->is_Java_thread(), "only JavaThreads own monitors"); + return reinterpret_cast(owner); + } + } else { + address owner = (address)monitor->owner(); + return owning_thread_from_monitor_owner(t_list, owner); + } +} + // Threads::print_on() is called at safepoint by VM_PrintThreads operation. void Threads::print_on(outputStream* st, bool print_stacks, bool internal_format, bool print_concurrent_locks, diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp index 5a7b12aefc9..c3332d2d276 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -37,6 +37,7 @@ #include "runtime/handshake.hpp" #include "runtime/javaFrameAnchor.hpp" #include "runtime/jniHandles.hpp" +#include "runtime/lockStack.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" #include "runtime/osThread.hpp" @@ -2107,6 +2108,19 @@ class JavaThread: public Thread { // AsyncGetCallTrace support inline bool in_asgct(void) {return _in_asgct;} inline void set_in_asgct(bool value) {_in_asgct = value;} + +private: + LockStack _lock_stack; + +public: + LockStack& lock_stack() { return _lock_stack; } + + static ByteSize lock_stack_offset() { return byte_offset_of(JavaThread, _lock_stack); } + // Those offsets are used in code generators to access the LockStack that is embedded in this + // JavaThread structure. Those accesses are relative to the current thread, which + // is typically in a dedicated register. + static ByteSize lock_stack_top_offset() { return lock_stack_offset() + LockStack::top_offset(); } + static ByteSize lock_stack_base_offset() { return lock_stack_offset() + LockStack::base_offset(); } }; // Inline implementation of JavaThread::current @@ -2317,6 +2331,9 @@ class Threads: AllStatic { static JavaThread *owning_thread_from_monitor_owner(ThreadsList * t_list, address owner); + static JavaThread* owning_thread_from_object(ThreadsList* t_list, oop obj); + static JavaThread* owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* owner); + // Number of threads on the active threads list static int number_of_threads() { return _number_of_threads; } // Number of non-daemon threads on the active threads list diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 084da0b0579..48166a9b744 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -786,6 +786,9 @@ typedef PaddedEnd PaddedObjectMonitor; nonstatic_field(JavaThread, _vm_result, oop) \ nonstatic_field(JavaThread, _vm_result_2, Metadata*) \ nonstatic_field(JavaThread, _pending_async_exception, oop) \ + nonstatic_field(JavaThread, _lock_stack, LockStack) \ + nonstatic_field(LockStack, _top, uint32_t) \ + nonstatic_field(LockStack, _base[0], oop) \ volatile_nonstatic_field(JavaThread, _exception_oop, oop) \ volatile_nonstatic_field(JavaThread, _exception_pc, address) \ volatile_nonstatic_field(JavaThread, _is_method_handle_return, int) \ @@ -1399,6 +1402,7 @@ typedef PaddedEnd PaddedObjectMonitor; declare_type(AttachListenerThread, JavaThread) \ declare_toplevel_type(OSThread) \ declare_toplevel_type(JavaFrameAnchor) \ + declare_toplevel_type(LockStack) \ \ /***************/ \ /* Interpreter */ \ @@ -2719,8 +2723,10 @@ typedef PaddedEnd PaddedObjectMonitor; \ /* InvocationCounter constants */ \ declare_constant(InvocationCounter::count_increment) \ - declare_constant(InvocationCounter::count_shift) - + declare_constant(InvocationCounter::count_shift) \ + \ + /* ObjectMonitor constants */ \ + declare_constant(ObjectMonitor::ANONYMOUS_OWNER) \ //-------------------------------------------------------------------------------- // diff --git a/src/hotspot/share/services/threadService.cpp b/src/hotspot/share/services/threadService.cpp index c87e7ff4883..fd5f3142912 100644 --- a/src/hotspot/share/services/threadService.cpp +++ b/src/hotspot/share/services/threadService.cpp @@ -425,8 +425,12 @@ DeadlockCycle* ThreadService::find_deadlocks_at_safepoint(ThreadsList * t_list, if (waitingToLockMonitor != NULL) { address currentOwner = (address)waitingToLockMonitor->owner(); if (currentOwner != NULL) { - currentThread = Threads::owning_thread_from_monitor_owner(t_list, - currentOwner); + if (UseAltFastLocking) { + currentThread = Threads::owning_thread_from_monitor(t_list, waitingToLockMonitor); + } else { + currentThread = Threads::owning_thread_from_monitor_owner(t_list, + currentOwner); + } if (currentThread == NULL) { // This function is called at a safepoint so the JavaThread // that owns waitingToLockMonitor should be findable, but @@ -1042,8 +1046,12 @@ void DeadlockCycle::print_on_with(ThreadsList * t_list, outputStream* st) const // No Java object associated - a JVMTI raw monitor owner_desc = " (JVMTI raw monitor),\n which is held by"; } - currentThread = Threads::owning_thread_from_monitor_owner(t_list, - (address)waitingToLockMonitor->owner()); + if (UseAltFastLocking) { + currentThread = Threads::owning_thread_from_monitor(t_list, waitingToLockMonitor); + } else { + currentThread = Threads::owning_thread_from_monitor_owner(t_list, + (address)waitingToLockMonitor->owner()); + } if (currentThread == NULL) { // The deadlock was detected at a safepoint so the JavaThread // that owns waitingToLockMonitor should be findable, but diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java index 0ecc5ee72ac..0515b76bb89 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java @@ -43,6 +43,8 @@ public class JavaThread extends Thread { private static AddressField nextField; private static sun.jvm.hotspot.types.OopField threadObjField; + private static long lockStackTopOffset; + private static long lockStackBaseOffset; private static AddressField anchorField; private static AddressField lastJavaSPField; private static AddressField lastJavaPCField; @@ -51,6 +53,7 @@ public class JavaThread extends Thread { private static AddressField stackBaseField; private static CIntegerField stackSizeField; private static CIntegerField terminatedField; + private static long oopPtrSize; private static JavaThreadPDAccess access; @@ -83,6 +86,7 @@ public void update(Observable o, Object data) { private static synchronized void initialize(TypeDataBase db) { Type type = db.lookupType("JavaThread"); Type anchorType = db.lookupType("JavaFrameAnchor"); + Type typeLockStack = db.lookupType("LockStack"); nextField = type.getAddressField("_next"); threadObjField = type.getOopField("_threadObj"); @@ -94,6 +98,9 @@ private static synchronized void initialize(TypeDataBase db) { stackBaseField = type.getAddressField("_stack_base"); stackSizeField = type.getCIntegerField("_stack_size"); terminatedField = type.getCIntegerField("_terminated"); + lockStackTopOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_top").getOffset(); + lockStackBaseOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_base[0]").getOffset(); + oopPtrSize = VM.getVM().getAddressSize(); UNINITIALIZED = db.lookupIntConstant("_thread_uninitialized").intValue(); NEW = db.lookupIntConstant("_thread_new").intValue(); @@ -398,6 +405,23 @@ public boolean isInStack(Address a) { return stackBase.greaterThan(a) && sp.lessThanOrEqual(a); } + public boolean isLockOwned(OopHandle obj) { + long current = lockStackBaseOffset; + long end = addr.getJIntAt(lockStackTopOffset); + if (Assert.ASSERTS_ENABLED) { + Assert.that(current <= end, "current stack offset must be above base offset"); + } + + while (current < end) { + Address oop = addr.getAddressAt(current); + if (oop.equals(obj)) { + return true; + } + current += oopPtrSize; + } + return false; + } + public boolean isLockOwned(Address a) { Address stackBase = getStackBase(); Address stackLimit = stackBase.addOffsetTo(-getStackSize()); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java index 5486d6c2bc3..6779034c685 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java @@ -77,6 +77,10 @@ private String identifyLockState(MonitorInfo monitor, String waitingState) { if (mark.hasMonitor() && ( // we have marked ourself as pending on this monitor mark.monitor().equals(thread.getCurrentPendingMonitor()) || + // Owned anonymously means that we are not the owner of + // the monitor and must be waiting for the owner to + // exit it. + mark.monitor().isOwnedAnonymous() || // we are not the owner of this monitor !mark.monitor().isEntered(thread) )) { @@ -146,7 +150,7 @@ public void printLockInfo(PrintStream tty, int frameCount) { // an inflated monitor that is first on the monitor list in // the first frame can block us on a monitor enter. lockState = identifyLockState(monitor, "waiting to lock"); - } else if (frameCount != 0) { + } else if (frameCount != 0 && !VM.getVM().getCommandLineBooleanFlag("UseAltFastLocking")) { // JDK-8214499 // This is not the first frame so we either own this monitor // or we owned the monitor before and called wait(). Because // wait() could have been called on any monitor in a lower diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java index 146b8084659..8979a97c018 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java @@ -53,6 +53,8 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc countField = type.getJIntField("_count"); waitersField = type.getJIntField("_waiters"); recursionsField = type.getCIntegerField("_recursions"); + + ANONYMOUS_OWNER = db.lookupLongConstant("ObjectMonitor::ANONYMOUS_OWNER").longValue(); } public ObjectMonitor(Address addr) { @@ -77,6 +79,13 @@ public boolean isEntered(sun.jvm.hotspot.runtime.Thread current) { return false; } + public boolean isOwnedAnonymous() { + if (VM.getVM().getCommandLineBooleanFlag("UseAltFastLocking") && owner() != null) { + return addr.getAddressAt(ownerFieldOffset).asLongValue() == ANONYMOUS_OWNER; + } + return false; + } + public Address owner() { return addr.getAddressAt(ownerFieldOffset); } // FIXME // void set_owner(void* owner); @@ -117,5 +126,7 @@ public int contentions() { private static JIntField countField; private static JIntField waitersField; private static CIntegerField recursionsField; + private static long ANONYMOUS_OWNER; + // FIXME: expose platform-dependent stuff } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java index 817d57ab2b1..af711671f85 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -192,6 +192,7 @@ public void oopsDo(AddressVisitor oopVisitor) { // refer to Threads::owning_thread_from_monitor_owner public JavaThread owningThreadFromMonitor(Address o) { + assert(!VM.getVM().getCommandLineBooleanFlag("UseAltFastLocking")); if (o == null) return null; for (JavaThread thread = first(); thread != null; thread = thread.next()) { if (o.equals(thread.threadObjectAddress())) { @@ -207,7 +208,23 @@ public JavaThread owningThreadFromMonitor(Address o) { } public JavaThread owningThreadFromMonitor(ObjectMonitor monitor) { - return owningThreadFromMonitor(monitor.owner()); + if (VM.getVM().getCommandLineBooleanFlag("UseAltFastLocking")) { + if (monitor.isOwnedAnonymous()) { + OopHandle object = monitor.object(); + for (JavaThread thread = first(); thread != null; thread = thread.next()) { + if (thread.isLockOwned(object)) { + return thread; + } + } + throw new InternalError("We should have found a thread that owns the anonymous lock"); + } + // Owner can only be threads at this point. + Address o = monitor.owner(); + if (o == null) return null; + return new JavaThread(o); + } else { + return owningThreadFromMonitor(monitor.owner()); + } } // refer to Threads::get_pending_threads