Skip to content

Commit

Permalink
[Backport] 8280872: Reorder code cache segments to improve code density
Browse files Browse the repository at this point in the history
Summary: Backport 8280872 to improve code cache density

Test Plan: test/hotspot/jtreg/compiler/c2/aarch64/TestFarJump.java

Reviewed-by: kuaiwei, wangzhuo

Issue: dragonwell11#664
  • Loading branch information
Boris Ulasevich authored and lusou-zhangquan committed Aug 31, 2023
1 parent 0d6e20b commit 4dce927
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 19 deletions.
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1182,12 +1182,12 @@ class HandlerImpl {
static int emit_deopt_handler(CodeBuffer& cbuf);

static uint size_exception_handler() {
return MacroAssembler::far_branch_size();
return MacroAssembler::far_codestub_branch_size();
}

static uint size_deopt_handler() {
// count one adr and one far branch instruction
return 4 * NativeInstruction::instruction_size;
return NativeInstruction::instruction_size + MacroAssembler::far_codestub_branch_size();
}
};

Expand Down Expand Up @@ -2184,7 +2184,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
__ adr(lr, __ pc());
__ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));

assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
__ end_a_stub();
return offset;
}
Expand Down
10 changes: 8 additions & 2 deletions src/hotspot/cpu/aarch64/icBuffer_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,15 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached
address start = __ pc();
Label l;
__ ldr(rscratch2, l);
__ far_jump(ExternalAddress(entry_point));
__ align(wordSize);
int jump_code_size = __ far_jump(ExternalAddress(entry_point));
// IC stub code size is not expected to vary depending on target address.
// We use NOPs to make the [ldr + far_jump + nops + int64] stub size equal to ic_stub_code_size.
for (int size = NativeInstruction::instruction_size + jump_code_size + 8;
size < ic_stub_code_size(); size += NativeInstruction::instruction_size) {
__ nop();
}
__ bind(l);
assert((uintptr_t)__ pc() % wordSize == 0, "");
__ emit_int64((int64_t)cached_value);
// Only need to invalidate the 1st two instructions - not the whole ic stub
ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
Expand Down
25 changes: 20 additions & 5 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,14 +400,27 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
}
}

static inline bool target_needs_far_branch(address addr) {
// codecache size <= 128M
if (!MacroAssembler::far_branches()) {
return false;
}
// codecache size > 240M
if (MacroAssembler::codestub_branch_needs_far_jump()) {
return true;
}
// codecache size: 128M..240M
return !CodeCache::is_non_nmethod(addr);
}

void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
if (target_needs_far_branch(entry.target())) {
uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
Expand All @@ -418,14 +431,15 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
}
}

void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
int MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
address start = pc();
if (target_needs_far_branch(entry.target())) {
uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
Expand All @@ -434,6 +448,7 @@ void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
if (cbuf) cbuf->set_insts_mark();
b(entry);
}
return pc() - start;
}

void MacroAssembler::reserved_stack_check() {
Expand Down
11 changes: 8 additions & 3 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1062,13 +1062,18 @@ class MacroAssembler: public Assembler {
return ReservedCodeCacheSize > branch_range || UseAOT;
}

// Check if branches to the the non nmethod section require a far jump
static bool codestub_branch_needs_far_jump() {
return CodeCache::max_distance_to_non_nmethod() > branch_range;
}

// Jumps that can reach anywhere in the code cache.
// Trashes tmp.
void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
int far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);

static int far_branch_size() {
if (far_branches()) {
static int far_codestub_branch_size() {
if (codestub_branch_needs_far_jump()) {
return 3 * 4; // adrp, add, br
} else {
return 4;
Expand Down
30 changes: 24 additions & 6 deletions src/hotspot/share/code/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,20 @@ void CodeCache::initialize_heaps() {
const size_t alignment = MAX2(page_size(false, 8), (size_t) os::vm_allocation_granularity());
non_nmethod_size = align_up(non_nmethod_size, alignment);
profiled_size = align_down(profiled_size, alignment);
non_profiled_size = align_down(non_profiled_size, alignment);

// Reserve one continuous chunk of memory for CodeHeaps and split it into
// parts for the individual heaps. The memory layout looks like this:
// ---------- high -----------
// Non-profiled nmethods
// Profiled nmethods
// Non-nmethods
// Profiled nmethods
// ---------- low ------------
ReservedCodeSpace rs = reserve_heap_memory(cache_size);
ReservedSpace non_method_space = rs.first_part(non_nmethod_size);
ReservedSpace rest = rs.last_part(non_nmethod_size);
ReservedSpace profiled_space = rest.first_part(profiled_size);
ReservedSpace non_profiled_space = rest.last_part(profiled_size);
ReservedSpace profiled_space = rs.first_part(profiled_size);
ReservedSpace rest = rs.last_part(profiled_size);
ReservedSpace non_method_space = rest.first_part(non_nmethod_size);
ReservedSpace non_profiled_space = rest.last_part(non_nmethod_size);

// Non-nmethods (stubs, adapters, ...)
add_heap(non_method_space, "CodeHeap 'non-nmethods'", CodeBlobType::NonNMethod);
Expand Down Expand Up @@ -1043,6 +1044,24 @@ size_t CodeCache::max_capacity() {
return max_cap;
}

bool CodeCache::is_non_nmethod(address addr) {
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
return blob->contains(addr);
}

size_t CodeCache::max_distance_to_non_nmethod() {
if (!SegmentedCodeCache) {
return ReservedCodeCacheSize;
} else {
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
// the max distance is minimized by placing the NonNMethod segment
// in between MethodProfiled and MethodNonProfiled segments
size_t dist1 = (size_t)blob->high() - (size_t)_low_bound;
size_t dist2 = (size_t)_high_bound - (size_t)blob->low();
return dist1 > dist2 ? dist1 : dist2;
}
}

/**
* Returns the reverse free ratio. E.g., if 25% (1/4) of the code heap
* is free, reverse_free_ratio() returns 4.
Expand All @@ -1052,7 +1071,6 @@ double CodeCache::reverse_free_ratio(int code_blob_type) {
if (heap == NULL) {
return 0;
}

double unallocated_capacity = MAX2((double)heap->unallocated_capacity(), 1.0); // Avoid division by 0;
double max_capacity = (double)heap->max_capacity();
double result = max_capacity / unallocated_capacity;
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/code/codeCache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ class CodeCache : AllStatic {
static bool needs_cache_clean() { return _needs_cache_clean; }
static void set_needs_cache_clean(bool v) { _needs_cache_clean = v; }

static size_t max_distance_to_non_nmethod();
static bool is_non_nmethod(address addr);

static void clear_inline_caches(); // clear all inline caches
static void cleanup_inline_caches(); // clean unloaded/zombie nmethods from inline caches
static void do_unloading_nmethod_caches(bool class_unloading_occurred); // clean all nmethod caches for unloading, including inline caches
Expand Down
137 changes: 137 additions & 0 deletions test/hotspot/jtreg/compiler/c2/aarch64/TestFarJump.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright (c) 2022, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.aarch64;

import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;

/*
* @test
* @bug 8280872
* @summary Far call to runtime stub should be generated with single instruction for CodeHeap up to 250MB
* @library /test/lib /
*
* @requires vm.flagless
* @requires os.arch=="aarch64"
* @requires vm.debug == false
* @requires vm.compiler2.enabled
*
* @run driver compiler.c2.aarch64.TestFarJump
*/
public class TestFarJump {

// ADRP instruction encoding:
// |31 30 29 28|27 26 25 24|23 22 21 20|19 18 17 16|15 14 13 12|11 10 09 08|07 06 05 04|03 02 01 10|
// | 1|immlo| 1 0 0 0 0| immhi | Rd |
static boolean isADRP(int encoding) {
final int mask = 0b1001_1111;
final int val = 0b1001_0000;
return ((encoding >> 24) & mask) == val;
}

// Looking for adrp instruction in binary/text assembly output:
// 0x0000ffff7ff1b7d0: c8ff ffd0 | 0801 1091 | 0001 1fd6
// 0x0000ffff6bf20ee0: adrp x8, 0x0000ffff6bef1000
static boolean containsADRP(String input) {
int index = input.indexOf(": ");
if (index == -1) {
return false;
}
input = input.substring(index + 1);
if (input.contains("adrp")) {
return true;
}
Pattern pattern = Pattern.compile("[0-9a-f ]*");
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
String match = matcher.group();
match = match.replace(" " , "");
if (match.length() != 8) {
continue;
}
int dump = (int)Long.parseLong(match, 16);
int encoding = Integer.reverseBytes(dump);
if (isADRP(encoding)) {
return true;
}
}
return false;
}

static void runVM(boolean bigCodeHeap) throws Exception {
String className = TestFarJump.class.getName();
String[] procArgs = {
"-XX:-Inline",
"-Xcomp",
"-Xbatch",
"-XX:+TieredCompilation",
"-XX:+SegmentedCodeCache",
"-XX:CompileOnly=" + className + "::main",
"-XX:ReservedCodeCacheSize=" + (bigCodeHeap ? "256M" : "200M"),
"-XX:+UnlockDiagnosticVMOptions",
"-XX:+PrintAssembly",
className};

ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(procArgs);
OutputAnalyzer output = new OutputAnalyzer(pb.start());
List<String> lines = output.asLines();

ListIterator<String> itr = lines.listIterator();
while (itr.hasNext()) {
String line = itr.next();
if (line.contains("[Exception Handler]")) {
String next1 = itr.next();
String next2 = itr.next();
System.out.println(line);
System.out.println(next1);
System.out.println(next2);
boolean containsADRP = containsADRP(next1) || containsADRP(next2);
if (bigCodeHeap && !containsADRP) {
throw new RuntimeException("ADRP instruction is expected on far jump");
}
if (!bigCodeHeap && containsADRP) {
throw new RuntimeException("for CodeHeap < 250MB the far jump is expected to be encoded with a single branch instruction");
}
return;
}
}
throw new RuntimeException("Assembly output: exception Handler is not found");
}

public static void main(String[] args) throws Exception {
if (args.length == 0) {
// Main VM: fork VM with options
runVM(true);
runVM(false);
return;
}
if (args.length > 0) {
// We are in a forked VM. Just exit
System.out.println("Ok");
}
}
}

0 comments on commit 4dce927

Please sign in to comment.