Skip to content

Commit

Permalink
[Backport] 8291555: Implement alternative fast-locking scheme
Browse files Browse the repository at this point in the history
Summary: Implement new fast-locking with option UseAltFastLocking, also according to JDK-8308107

Test Plan: CICD

Reviewed-by: kuaiwei, yulei, ddh

Issue: #679
  • Loading branch information
mmyxym committed Oct 9, 2023
1 parent ea25b10 commit f3ff21f
Show file tree
Hide file tree
Showing 44 changed files with 1,948 additions and 548 deletions.
119 changes: 73 additions & 46 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -3657,36 +3657,40 @@ encode %{
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
}

// Set tmp to be (markOop of object | UNLOCK_VALUE).
__ orr(tmp, disp_hdr, markOopDesc::unlocked_value);

// Initialize the box. (Must happen before we update the object mark!)
__ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));

// Compare object markOop with an unlocked value (tmp) and if
// equal exchange the stack address of our box with object markOop.
// On failure disp_hdr contains the possibly locked markOop.
__ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
/*release*/ true, /*weak*/ false, disp_hdr);
__ br(Assembler::EQ, cont);

assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

// If the compare-and-exchange succeeded, then we found an unlocked
// object, will have now locked it will continue at label cont
if (UseAltFastLocking) {
__ fast_lock(oop, disp_hdr, tmp, rscratch1, cont);
} else {
// Set tmp to be (markOop of object | UNLOCK_VALUE).
__ orr(tmp, disp_hdr, markOopDesc::unlocked_value);

__ bind(cas_failed);
// We did not see an unlocked object so try the fast recursive case.
// Initialize the box. (Must happen before we update the object mark!)
__ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));

// Check if the owner is self by comparing the value in the
// markOop of object (disp_hdr) with the stack pointer.
__ mov(rscratch1, sp);
__ sub(disp_hdr, disp_hdr, rscratch1);
__ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
// If condition is true we are cont and hence we can store 0 as the
// displaced header in the box, which indicates that it is a recursive lock.
__ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result
__ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// Compare object markOop with an unlocked value (tmp) and if
// equal exchange the stack address of our box with object markOop.
// On failure disp_hdr contains the possibly locked markOop.
__ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
/*release*/ true, /*weak*/ false, disp_hdr);
__ br(Assembler::EQ, cont);

assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

// If the compare-and-exchange succeeded, then we found an unlocked
// object, will have now locked it will continue at label cont

__ bind(cas_failed);
// We did not see an unlocked object so try the fast recursive case.

// Check if the owner is self by comparing the value in the
// markOop of object (disp_hdr) with the stack pointer.
__ mov(rscratch1, sp);
__ sub(disp_hdr, disp_hdr, rscratch1);
__ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
// If condition is true we are cont and hence we can store 0 as the
// displaced header in the box, which indicates that it is a recursive lock.
__ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result
__ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
}

if ((EmitSync & 0x02) == 0) {
__ b(cont);
Expand All @@ -3708,12 +3712,14 @@ encode %{
__ ldr (rthread, Address(rthread, WispThread::thread_offset()));
}

// Store a non-null value into the box to avoid looking like a re-entrant
// lock. The fast-path monitor unlock code checks for
// markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
// relevant bit set, and also matches ObjectSynchronizer::slow_enter.
__ mov(tmp, (address)markOopDesc::unused_mark());
__ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
if (!UseAltFastLocking) {
// Store a non-null value into the box to avoid looking like a re-entrant
// lock. The fast-path monitor unlock code checks for
// markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
// relevant bit set, and also matches ObjectSynchronizer::slow_enter.
__ mov(tmp, (address)markOopDesc::unused_mark());
__ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
}
}

__ bind(cont);
Expand Down Expand Up @@ -3742,33 +3748,54 @@ encode %{
__ biased_locking_exit(oop, tmp, cont);
}

// Find the lock address and load the displaced header from the stack.
__ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
if (!UseAltFastLocking) {
// Find the lock address and load the displaced header from the stack.
__ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));

// If the displaced header is 0, we have a recursive unlock.
__ cmp(disp_hdr, zr);
__ br(Assembler::EQ, cont);
// If the displaced header is 0, we have a recursive unlock.
__ cmp(disp_hdr, zr);
__ br(Assembler::EQ, cont);
}

// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
__ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
__ tbnz(tmp, exact_log2(markOopDesc::monitor_value), object_has_monitor);
}

// Check if it is still a light weight lock, this is is true if we
// see the stack address of the basicLock in the markOop of the
// object.
if (UseAltFastLocking) {
__ fast_unlock(oop, tmp, box, disp_hdr, cont);
__ b(cont);
} else {
// Check if it is still a light weight lock, this is is true if we
// see the stack address of the basicLock in the markOop of the
// object.

__ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
/*release*/ true, /*weak*/ false, tmp);
__ b(cont);
__ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
/*release*/ true, /*weak*/ false, tmp);
__ b(cont);
}

assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
__ bind(object_has_monitor);
__ add(tmp, tmp, -markOopDesc::monitor_value); // monitor

if (UseAltFastLocking) {
// If the owner is anonymous, we need to fix it -- in an outline stub.
Register tmp2 = disp_hdr;
__ ldr(tmp2, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
// We cannot use tbnz here, the target might be too far away and cannot
// be encoded.
__ tst(tmp2, (uint64_t)ObjectMonitor::ANONYMOUS_OWNER);
C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2);
Compile::current()->add_stub(stub);
__ br(Assembler::NE, stub->entry());
__ bind(stub->continuation());
}

__ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
if (UseWispMonitor) {
Expand Down
114 changes: 65 additions & 49 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,39 +81,43 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr

// Load object header
ldr(hdr, Address(obj, hdr_offset));
// and mark it as unlocked
orr(hdr, hdr, markOopDesc::unlocked_value);
// save unlocked object header into the displaced header location on the stack
str(hdr, Address(disp_hdr, 0));
// test if object header is still the same (i.e. unlocked), and if so, store the
// displaced header address in the object header - if it is not the same, get the
// object header instead
lea(rscratch2, Address(obj, hdr_offset));
cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
// if the object header was the same, we're done
// if the object header was not the same, it is now in the hdr register
// => test if it is a stack pointer into the same stack (recursive locking), i.e.:
//
// 1) (hdr & aligned_mask) == 0
// 2) sp <= hdr
// 3) hdr <= sp + page_size
//
// these 3 tests can be done by evaluating the following expression:
//
// (hdr - sp) & (aligned_mask - page_size)
//
// assuming both the stack pointer and page_size have their least
// significant 2 bits cleared and page_size is a power of 2
mov(rscratch1, sp);
sub(hdr, hdr, rscratch1);
ands(hdr, hdr, aligned_mask - os::vm_page_size());
// for recursive locking, the result is zero => save it in the displaced header
// location (NULL in the displaced hdr location indicates recursive locking)
str(hdr, Address(disp_hdr, 0));
// otherwise we don't care about the result and handle locking via runtime call
cbnz(hdr, slow_case);
// done
bind(done);
if (UseAltFastLocking) {
fast_lock(obj, hdr, rscratch1, rscratch2, slow_case);
} else {
// and mark it as unlocked
orr(hdr, hdr, markOopDesc::unlocked_value);
// save unlocked object header into the displaced header location on the stack
str(hdr, Address(disp_hdr, 0));
// test if object header is still the same (i.e. unlocked), and if so, store the
// displaced header address in the object header - if it is not the same, get the
// object header instead
lea(rscratch2, Address(obj, hdr_offset));
cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
// if the object header was the same, we're done
// if the object header was not the same, it is now in the hdr register
// => test if it is a stack pointer into the same stack (recursive locking), i.e.:
//
// 1) (hdr & aligned_mask) == 0
// 2) sp <= hdr
// 3) hdr <= sp + page_size
//
// these 3 tests can be done by evaluating the following expression:
//
// (hdr - sp) & (aligned_mask - page_size)
//
// assuming both the stack pointer and page_size have their least
// significant 2 bits cleared and page_size is a power of 2
mov(rscratch1, sp);
sub(hdr, hdr, rscratch1);
ands(hdr, hdr, aligned_mask - os::vm_page_size());
// for recursive locking, the result is zero => save it in the displaced header
// location (NULL in the displaced hdr location indicates recursive locking)
str(hdr, Address(disp_hdr, 0));
// otherwise we don't care about the result and handle locking via runtime call
cbnz(hdr, slow_case);
// done
bind(done);
}
if (PrintBiasedLockingStatistics) {
lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
addmw(Address(rscratch2, 0), 1, rscratch1);
Expand All @@ -134,29 +138,41 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
biased_locking_exit(obj, hdr, done);
}

// load displaced header
ldr(hdr, Address(disp_hdr, 0));
// if the loaded hdr is NULL we had recursive locking
// if we had recursive locking, we are done
cbz(hdr, done);
if (!UseAltFastLocking) {
// load displaced header
ldr(hdr, Address(disp_hdr, 0));
// if the loaded hdr is NULL we had recursive locking
// if we had recursive locking, we are done
cbz(hdr, done);
}

if (!UseBiasedLocking) {
// load object
ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
}
verify_oop(obj);
// test if object header is pointing to the displaced header, and if so, restore
// the displaced header in the object - if the object header is not pointing to
// the displaced header, get the object header instead
// if the object header was not pointing to the displaced header,
// we do unlocking via runtime call
if (hdr_offset) {
lea(rscratch1, Address(obj, hdr_offset));
cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
if (UseAltFastLocking) {
ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
// We cannot use tbnz here, the target might be too far away and cannot
// be encoded.
tst(hdr, markOopDesc::monitor_value);
br(Assembler::NE, slow_case);
fast_unlock(obj, hdr, rscratch1, rscratch2, slow_case);
} else {
cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
// test if object header is pointing to the displaced header, and if so, restore
// the displaced header in the object - if the object header is not pointing to
// the displaced header, get the object header instead
// if the object header was not pointing to the displaced header,
// we do unlocking via runtime call
if (hdr_offset) {
lea(rscratch1, Address(obj, hdr_offset));
cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
} else {
cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
}
// done
bind(done);
}
// done
bind(done);
}


Expand Down
61 changes: 61 additions & 0 deletions src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2020, 2022 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/c2_CodeStubs.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"

#define __ masm.

int C2HandleAnonOMOwnerStub::max_size() const {
// Max size of stub has been determined by testing with 0, in which case
// C2CodeStubList::emit() will throw an assertion and report the actual size that
// is needed.
return 24;
}

void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) {
__ bind(entry());
Register mon = monitor();
Register t = tmp();
assert(t != noreg, "need tmp register");

// Fix owner to be the current thread.
__ str(rthread, Address(mon, ObjectMonitor::owner_offset_in_bytes()));

// Pop owner object from lock-stack.
__ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset()));
__ subw(t, t, oopSize);
#ifdef ASSERT
__ str(zr, Address(rthread, t));
#endif
__ strw(t, Address(rthread, JavaThread::lock_stack_top_offset()));

__ b(continuation());
}

#undef __
Loading

0 comments on commit f3ff21f

Please sign in to comment.