diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index 8ea0b339c1c..cddce1369eb 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -789,7 +789,7 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP], $1_CFLAGS_CPU_JDK="${$1_CFLAGS_CPU_JDK} -fno-omit-frame-pointer" fi - $1_CXXSTD_CXXFLAG="-std=gnu++98" + $1_CXXSTD_CXXFLAG="-std=gnu++14" FLAGS_CXX_COMPILER_CHECK_ARGUMENTS(ARGUMENT: [${$1_CXXSTD_CXXFLAG} -Werror], PREFIX: $3, IF_FALSE: [$1_CXXSTD_CXXFLAG=""]) $1_TOOLCHAIN_CFLAGS_JDK_CXXONLY="${$1_CXXSTD_CXXFLAG}" diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index e10741612bd..95731732dcd 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1,6 +1,7 @@ // // Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2014, 2022, Red Hat, Inc. All rights reserved. +// Copyright (c) 2024 Alibaba Group Holding Limited. All Rights Reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -15768,6 +15769,39 @@ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_ ins_pipe(pipe_class_memory); %} +instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP super_con, vRegD_V0 vtemp, iRegP_R5 result, + iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con))); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp); + + ins_cost(700); // smaller than the next version + format %{ "partialSubtypeCheck $result, $sub, super" %} + + ins_encode %{ + bool success = false; + u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); + if (InlineSecondarySupersTest) { + success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, + $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, + $vtemp$$FloatRegister, + $result$$Register, + super_klass_slot); + } else { + address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot))); + success = (call != NULL); + } + if (!success) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + ins_pipe(pipe_class_memory); +%} + instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr) %{ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index cd7945049c7..e73c97e26d6 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -1,6 +1,7 @@ /* * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2024, 2024, Alibaba Group Holding Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2289,7 +2290,7 @@ void mvnw(Register Rd, Register Rm, INSN(cmhi, 1, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D #undef INSN - + // Advanced SIMD across lanes #define INSN(NAME, opc, opc2, accepted) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \ diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 99b6855acb1..0db4f04e0ac 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1,6 +1,7 @@ /* * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2024 Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -327,6 +328,16 @@ void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { } } +void MacroAssembler::rt_call(address dest, Register tmp) { + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + far_call(RuntimeAddress(dest)); + } else { + lea(tmp, RuntimeAddress(dest)); + blr(tmp); + } +} + void MacroAssembler::reset_last_Java_frame(bool clear_fp) { // we must set sp to zero to clear frame str(zr, Address(rthread, JavaThread::last_Java_sp_offset())); @@ -1375,6 +1386,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, Label* L_success, Label* L_failure, bool set_cond_codes) { + // NB! Callers may assume that, when temp2_reg is a valid register, + // this code sets it to a nonzero value. + assert_different_registers(sub_klass, super_klass, temp_reg); if (temp2_reg != noreg) assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); @@ -1454,6 +1468,240 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } +// Ensure that the inline code and the stub are using the same registers. +#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ +do { \ + assert(r_super_klass == r0 && \ + r_array_base == r1 && \ + r_array_length == r2 && \ + (r_array_index == r3 || r_array_index == noreg) && \ + (r_sub_klass == r4 || r_sub_klass == noreg) && \ + (r_bitmap == rscratch2 || r_bitmap == noreg) && \ + (result == r5 || result == noreg), "registers must match aarch64.ad"); \ +} while(0) + +// Return true: we succeeded in generating this code +bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near) { + assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2); + + Label L_fallthrough; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + const Register + r_array_base = temp1, // r1 + r_array_length = temp2, // r2 + r_array_index = temp3, // r3 + r_bitmap = rscratch2; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + u1 bit = super_klass_slot; + + // Make sure that result is nonzero if the TBZ below misses. + mov(result, 1); + + // We're going to need the bitmap in a vector reg and in a core reg, + // so load both now. + ldr(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + if (bit != 0) { + ldrd(vtemp, Address(r_sub_klass, Klass::bitmap_offset())); + } + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + tbz(r_bitmap, bit, L_fallthrough); + + // Get the first array index that can contain super_klass into r_array_index. + if (bit != 0) { + shld(vtemp, vtemp, Klass::SECONDARY_SUPERS_TABLE_MASK - bit); + cnt(vtemp, T8B, vtemp); + addv(vtemp, T8B, vtemp); + fmovd(r_array_index, vtemp); + } else { + mov(r_array_index, (u1)1); + } + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + // We will consult the secondary-super array. + ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // The value i in r_array_index is >= 1, so even though r_array_base + // points to the length, we don't need to adjust it to point to the + // data. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + ldr(result, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord))); + eor(result, result, r_super_klass); + cbz(result, L_fallthrough); // Found a match + + // Is there another entry to check? Consult the bitmap. + tbz(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK, L_fallthrough); + + // Linear probe. + if (bit != 0) { + ror(r_bitmap, r_bitmap, bit); + } + + // The slot we just inspected is at secondary_supers[r_array_index - 1]. + // The next slot to be inspected, by the stub we're about to call, + // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap + // have been checked. + Address stub = RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); + if (stub_is_near) { + bl(stub); + } else { + address call = trampoline_call(stub); + if (call == nullptr) { + return false; // trampoline allocation failed + } + } + + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + bind(L_fallthrough); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, // r4, r0 + temp1, temp2, result); // r1, r2, r5 + } + return true; +} + +// Called by code generated by check_klass_subtype_slow_path +// above. This is called when there is a collision in the hashed +// lookup in the secondary supers array. +void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register result) { + assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, rscratch1); + + const Register + r_array_length = temp1, + r_sub_klass = noreg; // unused + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + Label L_fallthrough, L_huge; + + // Load the array length. + ldrw(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + // NB! Effectively increments current slot index by 1. + assert(Array::base_offset_in_bytes() == wordSize, ""); + add(r_array_base, r_array_base, Array::base_offset_in_bytes()); + + // The bitmap is full to bursting. + // Implicit invariant: BITMAP_FULL implies (length > 0) + assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), ""); + cmn(r_bitmap, (u1)1); + br(EQ, L_huge); + + // NB! Our caller has checked bits 0 and 1 in the bitmap. The + // current slot (at secondary_supers[r_array_index]) has not yet + // been inspected, and r_array_index may be out of bounds if we + // wrapped around the end of the array. + + { // This is conventional linear probing, but instead of terminating + // when a null entry is found in the table, we maintain a bitmap + // in which a 0 indicates missing entries. + // The check above guarantees there are 0s in the bitmap, so the loop + // eventually terminates. + Label L_loop; + bind(L_loop); + + // Check for wraparound. + cmp(r_array_index, r_array_length); + csel(r_array_index, zr, r_array_index, GE); + + ldr(rscratch1, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord))); + eor(result, rscratch1, r_super_klass); + cbz(result, L_fallthrough); + + tbz(r_bitmap, 2, L_fallthrough); // look-ahead check (Bit 2); result is non-zero + + ror(r_bitmap, r_bitmap, 1); + add(r_array_index, r_array_index, 1); + b(L_loop); + } + + { // Degenerate case: more than 64 secondary supers. + // FIXME: We could do something smarter here, maybe a vectorized + // comparison or a binary search, but is that worth any added + // complexity? + bind(L_huge); + cmp(sp, zr); // Clear Z flag; SP is never zero + repne_scan(r_array_base, r_super_klass, r_array_length, rscratch1); + cset(result, NE); // result == 0 iff we got a match. + } + + bind(L_fallthrough); +} + +// Make sure that the hashed lookup and a linear scan agree. +void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register result) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, result, rscratch1); + + const Register + r_array_base = temp1, + r_array_length = temp2, + r_array_index = noreg, // unused + r_bitmap = noreg; // unused + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + BLOCK_COMMENT("verify_secondary_supers_table {"); + + // We will consult the secondary-super array. + ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // Load the array length. + ldrw(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + add(r_array_base, r_array_base, Array::base_offset_in_bytes()); + + cmp(sp, zr); // Clear Z flag; SP is never zero + // Scan R2 words at [R5] for an occurrence of R0. + // Set NZ/Z based on last compare. + repne_scan(/*addr*/r_array_base, /*value*/r_super_klass, /*count*/r_array_length, rscratch2); + // rscratch1 == 0 iff we got a match. + cset(rscratch1, NE); + + Label passed; + cmp(result, zr); + cset(result, NE); // normalize result to 0/1 for comparison + + cmp(rscratch1, result); + br(EQ, passed); + { + mov(r0, r_super_klass); // r0 <- r0 + mov(r1, r_sub_klass); // r1 <- r4 + mov(r2, /*expected*/rscratch1); // r2 <- r8 + mov(r3, result); // r3 <- r5 + mov(r4, (address)("mismatch")); // r4 <- const + rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure), rscratch2); + should_not_reach_here(); + } + bind(passed); + + BLOCK_COMMENT("} verify_secondary_supers_table"); +} void MacroAssembler::verify_oop(Register reg, const char* s) { if (!VerifyOops) return; diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 384ad78b560..22abfb7f9ca 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1,6 +1,7 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2024 Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -101,6 +102,7 @@ class MacroAssembler: public Assembler { void safepoint_poll(Label& slow_path); void safepoint_poll_acquire(Label& slow_path); + void rt_call(address dest, Register tmp = rscratch1); // Biased locking support // lock_reg and obj_reg must be loaded up with the appropriate values. // swap_reg is killed. @@ -946,6 +948,31 @@ class MacroAssembler: public Assembler { Label* L_failure, bool set_cond_codes = false); + // As above, but with a constant super_klass. + // The result is in Register result, not the condition codes. + bool lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near = false); + + void verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register result); + + void lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register result); + // Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 05dd928511c..bcf54c0537b 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2024 Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -5045,6 +5046,52 @@ class StubGenerator: public StubCodeGenerator { return start; } + + address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + const Register + r_super_klass = r0, + r_array_base = r1, + r_array_length = r2, + r_array_index = r3, + r_sub_klass = r4, + r_bitmap = rscratch2, + result = r5; + const FloatRegister + vtemp = v0; + + Label L_success; + __ enter(); + __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, + r_array_base, r_array_length, r_array_index, + vtemp, result, super_klass_index, + /*stub_is_near*/true); + __ leave(); + __ ret(lr); + + return start; + } + + // Slow path implementation for UseSecondarySupersTable. + address generate_lookup_secondary_supers_table_slow_path_stub() { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table_slow_path"); + + address start = __ pc(); + const Register + r_super_klass = r0, // argument + r_array_base = r1, // argument + temp1 = r2, // temp + r_array_index = r3, // argument + r_bitmap = rscratch2, // argument + result = r5; // argument + + __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result); + __ ret(lr); + + return start; + } #ifdef LINUX @@ -6456,7 +6503,15 @@ class StubGenerator: public StubCodeGenerator { generate_atomic_entry_points(); #endif // LINUX - + if (UseSecondarySupersTable) { + StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub(); + if (! InlineSecondarySupersTest) { + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + StubRoutines::_lookup_secondary_supers_table_stubs[slot] + = generate_lookup_secondary_supers_table_stub(slot); + } + } + } StubRoutines::aarch64::set_completed(); } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 8c9676aed17..3000b6cbfb9 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -115,6 +115,7 @@ class VM_Version : public Abstract_VM_Version { static int icache_line_size() { return _icache_line_size; } static int dcache_line_size() { return _dcache_line_size; } + constexpr static bool supports_secondary_supers_table() { return true; } #ifdef __APPLE__ // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 5181c31b380..82826d52939 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -10550,6 +10550,97 @@ void Assembler::notq(Register dst) { emit_int8((unsigned char)(0xD0 | encode)); } +int8_t Assembler::get_prefixq(Address adr) { + int8_t prfx = get_prefixq(adr, rax); + assert(REX_W <= prfx && prfx <= REX_WXB, "must be"); + return prfx; +} + +int8_t Assembler::get_prefixq(Address adr, Register src) { + int8_t prfx = (int8_t)(REX_W + + ((int)adr.base_needs_rex()) + + ((int)adr.index_needs_rex() << 1) + + ((int)(src->encoding() >= 8) << 2)); +#ifdef ASSERT + if (src->encoding() < 8) { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + assert(prfx == REX_WXB, "must be"); + } else { + assert(prfx == REX_WB, "must be"); + } + } else { + if (adr.index_needs_rex()) { + assert(prfx == REX_WX, "must be"); + } else { + assert(prfx == REX_W, "must be"); + } + } + } else { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + assert(prfx == REX_WRXB, "must be"); + } else { + assert(prfx == REX_WRB, "must be"); + } + } else { + if (adr.index_needs_rex()) { + assert(prfx == REX_WRX, "must be"); + } else { + assert(prfx == REX_WR, "must be"); + } + } + } +#endif + return prfx; +} + +#ifdef _LP64 +void Assembler::salq(Address dst, int imm8) { + InstructionMark im(this); + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + if (imm8 == 1) { + emit_int16(get_prefixq(dst), (unsigned char)0xD1); + emit_operand(as_Register(4), dst, 0); + } + else { + emit_int16(get_prefixq(dst), (unsigned char)0xC1); + emit_operand(as_Register(4), dst, 1); + emit_int8(imm8); + } +} + +void Assembler::salq(Address dst) { + InstructionMark im(this); + emit_int16(get_prefixq(dst), (unsigned char)0xD3); + emit_operand(as_Register(4), dst, 0); +} + +void Assembler::salq(Register dst, int imm8) { + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + int encode = prefixq_and_encode(dst->encoding()); + if (imm8 == 1) { + emit_int16((unsigned char)0xD1, (0xE0 | encode)); + } else { + emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8); + } +} + +void Assembler::salq(Register dst) { + int encode = prefixq_and_encode(dst->encoding()); + emit_int16((unsigned char)0xD3, (0xE0 | encode)); +} +#endif + +void Assembler::btq(Register src, int imm8) { + assert(isByte(imm8), "not a byte"); + InstructionMark im(this); + int encode = prefixq_and_encode(src->encoding()); + emit_int16(0x0f, 0xba); + emit_int8(0xe0|encode); + emit_int8(imm8); +} + void Assembler::orq(Address dst, int32_t imm32) { InstructionMark im(this); prefixq(dst); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 917e648be4d..4fb2ff47b5c 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -707,6 +707,12 @@ class Assembler : public AbstractAssembler { int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte); int prefixq_and_encode(int dst_enc, int src_enc); +// Some prefixq variants always emit exactly one prefix byte, so besides a + // prefix-emitting method we provide a method to get the prefix byte to emit, + // which can then be folded into a byte stream. + int8_t get_prefixq(Address adr); + int8_t get_prefixq(Address adr, Register reg); + void prefix(Register reg); void prefix(Register dst, Register src, Prefix p); void prefix(Register dst, Address adr, Prefix p); @@ -1674,6 +1680,7 @@ class Assembler : public AbstractAssembler { #ifdef _LP64 void notq(Register dst); + void btq(Register src, int imm8); #endif void orw(Register dst, Register src); @@ -1938,6 +1945,13 @@ class Assembler : public AbstractAssembler { void sarl(Register dst, int imm8); void sarl(Register dst); +#ifdef _LP64 + void salq(Register dst, int imm8); + void salq(Register dst); + void salq(Address dst, int imm8); + void salq(Address dst); +#endif + void sarq(Register dst, int imm8); void sarq(Register dst); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 721134f84e8..d1246d3c6b6 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -5055,6 +5055,339 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } +#ifdef _LP64 + +// population_count variant for running without the POPCNT +// instruction, which was introduced with SSE4.2 in 2008. +void MacroAssembler::population_count(Register dst, Register src, + Register scratch1, Register scratch2) { + assert_different_registers(src, scratch1, scratch2); + if (UsePopCountInstruction) { + Assembler::popcntq(dst, src); + } else { + assert_different_registers(src, scratch1, scratch2); + assert_different_registers(dst, scratch1, scratch2); + Label loop, done; + + mov(scratch1, src); + // dst = 0; + // while(scratch1 != 0) { + // dst++; + // scratch1 &= (scratch1 - 1); + // } + xorl(dst, dst); + testq(scratch1, scratch1); + jccb(Assembler::equal, done); + { + bind(loop); + incq(dst); + movq(scratch2, scratch1); + decq(scratch2); + andq(scratch1, scratch2); + jccb(Assembler::notEqual, loop); + } + bind(done); + } +} + +// Ensure that the inline code and the stub are using the same registers. +#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ +do { \ + assert(r_super_klass == rax, "mismatch"); \ + assert(r_array_base == rbx, "mismatch"); \ + assert(r_array_length == rcx, "mismatch"); \ + assert(r_array_index == rdx, "mismatch"); \ + assert(r_sub_klass == rsi || r_sub_klass == noreg, "mismatch"); \ + assert(r_bitmap == r11 || r_bitmap == noreg, "mismatch"); \ + assert(result == rdi || result == noreg, "mismatch"); \ +} while(0) + +void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result); + + Label L_fallthrough, L_success, L_failure; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + const Register + r_array_index = temp1, + r_array_length = temp2, + r_array_base = temp3, + r_bitmap = temp4; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + xorq(result, result); // = 0 + + movq(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + movq(r_array_index, r_bitmap); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + u1 bit = super_klass_slot; + { + // NB: If the count in a x86 shift instruction is 0, the flags are + // not affected, so we do a testq instead. + int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit; + if (shift_count != 0) { + salq(r_array_index, shift_count); + } else { + testq(r_array_index, r_array_index); + } + } + // We test the MSB of r_array_index, i.e. its sign bit + jcc(Assembler::positive, L_failure); + + // Get the first array index that can contain super_klass into r_array_index. + if (bit != 0) { + population_count(r_array_index, r_array_index, temp2, temp3); + } else { + movl(r_array_index, 1); + } + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + // We will consult the secondary-super array. + movptr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // We're asserting that the first word in an Array is the + // length, and the second word is the first word of the data. If + // that ever changes, r_array_base will have to be adjusted here. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + cmpq(r_super_klass, Address(r_array_base, r_array_index, Address::times_8)); + jccb(Assembler::equal, L_success); + + // Is there another entry to check? Consult the bitmap. + btq(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); + jccb(Assembler::carryClear, L_failure); + + // Linear probe. Rotate the bitmap so that the next bit to test is + // in Bit 1. + if (bit != 0) { + rorq(r_bitmap, bit); + } + + // Calls into the stub generated by lookup_secondary_supers_table_slow_path. + // Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap. + // Kills: r_array_length. + // Returns: result. + call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub())); + // Result (0/1) is in rdi + jmpb(L_fallthrough); + + bind(L_failure); + incq(result); // 0 => 1 + + bind(L_success); + // result = 0; + + bind(L_fallthrough); + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, result, + temp1, temp2, temp3); + } +} + +void MacroAssembler::repne_scanq(Register addr, Register value, Register count, Register limit, + Label* L_success, Label* L_failure) { + Label L_loop, L_fallthrough; + { + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + } + bind(L_loop); + cmpq(value, Address(addr, count, Address::times_8)); + jcc(Assembler::equal, *L_success); + addl(count, 1); + cmpl(count, limit); + jcc(Assembler::less, L_loop); + + if (&L_fallthrough != L_failure) { + jmp(*L_failure); + } + bind(L_fallthrough); +} + +// Called by code generated by check_klass_subtype_slow_path +// above. This is called when there is a collision in the hashed +// lookup in the secondary supers array. +void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register temp2, + Label* L_success, + Label* L_failure) { + assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, temp2); + + const Register + r_array_length = temp1, + r_sub_klass = noreg, + result = noreg; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + + // Load the array length. + movl(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + // NB! Effectively increments current slot index by 1. + assert(Array::base_offset_in_bytes() == wordSize, ""); + addptr(r_array_base, Array::base_offset_in_bytes()); + + // Linear probe + Label L_huge; + + // The bitmap is full to bursting. + // Implicit invariant: BITMAP_FULL implies (length > 0) + assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), ""); + cmpq(r_bitmap, (int32_t)-1); // sign-extends immediate to 64-bit value + jcc(Assembler::equal, L_huge); + + // NB! Our caller has checked bits 0 and 1 in the bitmap. The + // current slot (at secondary_supers[r_array_index]) has not yet + // been inspected, and r_array_index may be out of bounds if we + // wrapped around the end of the array. + + { // This is conventional linear probing, but instead of terminating + // when a null entry is found in the table, we maintain a bitmap + // in which a 0 indicates missing entries. + // The check above guarantees there are 0s in the bitmap, so the loop + // eventually terminates. + + xorl(temp2, temp2); // = 0; + + Label L_again; + bind(L_again); + + // Check for array wraparound. + cmpl(r_array_index, r_array_length); + cmovl(Assembler::greaterEqual, r_array_index, temp2); + + cmpq(r_super_klass, Address(r_array_base, r_array_index, Address::times_8)); + jcc(Assembler::equal, *L_success); + + // If the next bit in bitmap is zero, we're done. + btq(r_bitmap, 2); // look-ahead check (Bit 2); Bits 0 and 1 are tested by now + jcc(Assembler::carryClear, *L_failure); + + rorq(r_bitmap, 1); // Bits 1/2 => 0/1 + addl(r_array_index, 1); + + jmp(L_again); + } + + { // Degenerate case: more than 64 secondary supers. + // FIXME: We could do something smarter here, maybe a vectorized + // comparison or a binary search, but is that worth any added + // complexity? + bind(L_huge); + xorl(r_array_index, r_array_index); // = 0 + repne_scanq(r_array_base, r_super_klass, r_array_index, r_array_length, + L_success, + (&L_fallthrough != L_failure ? L_failure : nullptr)); + + bind(L_fallthrough); + } +} + +struct VerifyHelperArguments { + Klass* _super; + Klass* _sub; + intptr_t _linear_result; + intptr_t _table_result; +}; + +static void verify_secondary_supers_table_helper(const char* msg, VerifyHelperArguments* args) { + Klass::on_secondary_supers_verification_failure(args->_super, + args->_sub, + args->_linear_result, + args->_table_result, + msg); +} + +// Make sure that the hashed lookup and a linear scan agree. +void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register result, + Register temp1, + Register temp2, + Register temp3) { + const Register + r_array_index = temp1, + r_array_length = temp2, + r_array_base = temp3, + r_bitmap = noreg; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + BLOCK_COMMENT("verify_secondary_supers_table {"); + + Label L_success, L_failure, L_check, L_done; + + movptr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + movl(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + addptr(r_array_base, Array::base_offset_in_bytes()); + + testl(r_array_length, r_array_length); // array_length == 0? + jcc(Assembler::zero, L_failure); + + movl(r_array_index, 0); + repne_scanq(r_array_base, r_super_klass, r_array_index, r_array_length, &L_success); + // fall through to L_failure + + const Register linear_result = r_array_index; // reuse temp1 + + bind(L_failure); // not present + movl(linear_result, 1); + jmp(L_check); + + bind(L_success); // present + movl(linear_result, 0); + + bind(L_check); + cmpl(linear_result, result); + jcc(Assembler::equal, L_done); + + { // To avoid calling convention issues, build a record on the stack + // and pass the pointer to that instead. + push(result); + push(linear_result); + push(r_sub_klass); + push(r_super_klass); + movptr(c_rarg1, rsp); + movptr(c_rarg0, (intptr_t) "mismatch"); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, verify_secondary_supers_table_helper))); + should_not_reach_here(); + } + bind(L_done); + + BLOCK_COMMENT("} verify_secondary_supers_table"); +} + +#undef LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS + +#endif // LP64 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { if (VM_Version::supports_cmov()) { diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index d5c074bd047..515ed093286 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -538,6 +538,8 @@ class MacroAssembler: public Assembler { ); void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); + void population_count(Register dst, Register src, Register scratch1, Register scratch2); + // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, @@ -579,6 +581,45 @@ class MacroAssembler: public Assembler { Label* L_failure, bool set_cond_codes = false); + void hashed_check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // As above, but with a constant super_klass. + // The result is in Register result, not the condition codes. + void lookup_secondary_supers_table(Register sub_klass, + Register super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot); + + void lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register temp2, + Label* L_success, + Label* L_failure = NULL); + + void verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register expected, + Register temp1, + Register temp2, + Register temp3); + + void repne_scanq(Register addr, Register value, Register count, Register limit, + Label* L_success, + Label* L_failure = NULL); + // Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 5ee6c9482f6..b847de8675a 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -5898,6 +5898,54 @@ address generate_avx_ghash_processBlocks() { } +address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + + const Register + r_super_klass = rax, + r_sub_klass = rsi, + result = rdi; + + __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, + rdx, rcx, rbx, r11, // temps + result, + super_klass_index); + __ ret(0); + + return start; + } + + // Slow path implementation for UseSecondarySupersTable. + address generate_lookup_secondary_supers_table_slow_path_stub() { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + + const Register + r_super_klass = rax, + r_array_base = rbx, + r_array_index = rdx, + r_sub_klass = rsi, + r_bitmap = r11, + result = rdi; + + Label L_success; + __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, + rcx, rdi, // temps + &L_success); + // bind(L_failure); + __ movl(result, 1); + __ ret(0); + + __ bind(L_success); + __ movl(result, 0); + __ ret(0); + + return start; + } + #undef __ #define __ masm-> @@ -6292,6 +6340,14 @@ address generate_avx_ghash_processBlocks() { StubRoutines::_montgomeryMultiply = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); } + if (UseSecondarySupersTable) { + StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub(); + if (!InlineSecondarySupersTest) { + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + StubRoutines::_lookup_secondary_supers_table_stubs[slot] = generate_lookup_secondary_supers_table_stub(slot); + } + } + } if (UseMontgomerySquareIntrinsic) { StubRoutines::_montgomerySquare = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index a6df96f7511..0a6e124ff5f 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -949,6 +949,11 @@ class VM_Version : public Abstract_VM_Version { // the intrinsic for java.lang.Thread.onSpinWait() static bool supports_on_spin_wait() { return supports_sse2(); } + // x86_64 supports secondary supers table + constexpr static bool supports_secondary_supers_table() { + return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + } + #ifdef __APPLE__ // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64) static bool is_cpu_emulated(); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 0f153df15fc..a9ae1dc1627 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -432,6 +432,9 @@ reg_class long_rcx_reg(RCX, RCX_H); // Singleton class for RDX long register reg_class long_rdx_reg(RDX, RDX_H); +// Singleton class for R11 long register +reg_class long_r11_reg(R11, R11_H); + // Class for all int registers (excluding RSP) reg_class int_reg_with_rbp(RAX, RDX, @@ -3770,6 +3773,16 @@ operand rdx_RegL() interface(REG_INTER); %} +operand r11_RegL() +%{ + constraint(ALLOC_IN_RC(long_r11_reg)); + match(RegL); + match(rRegL); + + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of compare instructions operand rFlagsReg() %{ @@ -12426,6 +12439,31 @@ instruct partialSubtypeCheck(rdi_RegP result, ins_pipe(pipe_slow); %} +instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result, + rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con))); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4); + + ins_cost(700); // smaller than the next version + format %{ "partialSubtypeCheck $result, $sub, super" %} + + ins_encode %{ + u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); + if (InlineSecondarySupersTest) { + __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register, + $temp3$$Register, $temp4$$Register, $result$$Register, + super_klass_slot); + } else { + __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot))); + } + %} + + ins_pipe(pipe_slow); +%} + instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr, rsi_RegP sub, rax_RegP super, rcx_RegI rcx, immP0 zero, diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp index 820bdb5c006..c75e140e50b 100644 --- a/src/hotspot/share/classfile/classLoader.cpp +++ b/src/hotspot/share/classfile/classLoader.cpp @@ -135,6 +135,7 @@ PerfCounter* ClassLoader::_sync_JVMDefineClassLockFreeCounter = NULL; PerfCounter* ClassLoader::_sync_JNIDefineClassLockFreeCounter = NULL; PerfCounter* ClassLoader::_unsafe_defineClassCallCounter = NULL; PerfCounter* ClassLoader::_load_instance_class_failCounter = NULL; +PerfCounter* ClassLoader::_perf_secondary_hash_time = NULL; GrowableArray* ClassLoader::_patch_mod_entries = NULL; GrowableArray* ClassLoader::_exploded_entries = NULL; @@ -1697,7 +1698,8 @@ void ClassLoader::initialize() { NEWPERFEVENTCOUNTER(_unsafe_defineClassCallCounter, SUN_CLS, "unsafeDefineClassCalls"); - + NEWPERFTICKCOUNTER(_perf_secondary_hash_time, SUN_CLS, + "secondarySuperHashTime"); NEWPERFEVENTCOUNTER(_load_instance_class_failCounter, SUN_CLS, "loadInstanceClassFailRate"); } diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp index c4add8e383e..cbd87371502 100644 --- a/src/hotspot/share/classfile/classLoader.hpp +++ b/src/hotspot/share/classfile/classLoader.hpp @@ -212,6 +212,8 @@ class ClassLoader: AllStatic { static PerfCounter* _unsafe_defineClassCallCounter; static PerfCounter* _load_instance_class_failCounter; + // Count the time taken to hash the scondary superclass arrays. + static PerfCounter* _perf_secondary_hash_time; // The boot class path consists of 3 ordered pieces: // 1. the module/path pairs specified to --patch-module @@ -319,6 +321,7 @@ class ClassLoader: AllStatic { static PerfCounter* perf_class_parse_selftime() { return _perf_class_parse_selftime; } static PerfCounter* perf_sys_class_lookup_time() { return _perf_sys_class_lookup_time; } static PerfCounter* perf_shared_classload_time() { return _perf_shared_classload_time; } + static PerfCounter* perf_secondary_hash_time() { return _perf_secondary_hash_time; } static PerfCounter* perf_sys_classload_time() { return _perf_sys_classload_time; } static PerfCounter* perf_app_classload_time() { return _perf_app_classload_time; } static PerfCounter* perf_app_classload_selftime() { return _perf_app_classload_selftime; } diff --git a/src/hotspot/share/memory/universe.cpp b/src/hotspot/share/memory/universe.cpp index dc94d4b33e4..f4ea9258462 100644 --- a/src/hotspot/share/memory/universe.cpp +++ b/src/hotspot/share/memory/universe.cpp @@ -139,6 +139,9 @@ Array* Universe::_the_empty_short_array = NULL; Array* Universe::_the_empty_klass_array = NULL; Array* Universe::_the_empty_method_array = NULL; +uintx Universe::_the_array_interfaces_bitmap = 0; +uintx Universe::_the_empty_klass_bitmap = 0; + // These variables are guarded by FullGCALot_lock. debug_only(objArrayOop Universe::_fullgc_alot_dummy_array = NULL;) debug_only(int Universe::_fullgc_alot_dummy_next = 0;) @@ -389,6 +392,11 @@ void Universe::genesis(TRAPS) { _the_array_interfaces_array->at_put(1, SystemDictionary::Serializable_klass()); } + if (UseSecondarySupersTable) { + Universe::_the_array_interfaces_bitmap = Klass::compute_secondary_supers_bitmap(_the_array_interfaces_array); + Universe::_the_empty_klass_bitmap = Klass::compute_secondary_supers_bitmap(_the_empty_klass_array); + } + initialize_basic_type_klass(boolArrayKlassObj(), CHECK); initialize_basic_type_klass(charArrayKlassObj(), CHECK); initialize_basic_type_klass(singleArrayKlassObj(), CHECK); diff --git a/src/hotspot/share/memory/universe.hpp b/src/hotspot/share/memory/universe.hpp index 21cac8c91ca..9bf01b9d999 100644 --- a/src/hotspot/share/memory/universe.hpp +++ b/src/hotspot/share/memory/universe.hpp @@ -170,6 +170,9 @@ class Universe: AllStatic { static Array* _the_array_interfaces_array; + static uintx _the_array_interfaces_bitmap; + static uintx _the_empty_klass_bitmap; + // array of preallocated error objects with backtrace static objArrayOop _preallocated_out_of_memory_error_array; @@ -326,6 +329,7 @@ class Universe: AllStatic { static objArrayOop the_empty_class_klass_array () { return _the_empty_class_klass_array; } static Array* the_array_interfaces_array() { return _the_array_interfaces_array; } + static uintx the_array_interfaces_bitmap() { return _the_array_interfaces_bitmap; } static oop the_null_string() { return _the_null_string; } static oop the_min_jint_string() { return _the_min_jint_string; } @@ -364,6 +368,8 @@ class Universe: AllStatic { static Array* the_empty_short_array() { return _the_empty_short_array; } static Array* the_empty_method_array() { return _the_empty_method_array; } static Array* the_empty_klass_array() { return _the_empty_klass_array; } + static uintx the_empty_klass_bitmap() { return _the_empty_klass_bitmap; } + // OutOfMemoryError support. Returns an error with the required message. The returned error // may or may not have a backtrace. If error has a backtrace then the stack trace is already diff --git a/src/hotspot/share/metaprogramming/enableIf.hpp b/src/hotspot/share/metaprogramming/enableIf.hpp index 06e57a795c3..5169a8222bd 100644 --- a/src/hotspot/share/metaprogramming/enableIf.hpp +++ b/src/hotspot/share/metaprogramming/enableIf.hpp @@ -26,6 +26,10 @@ #define SHARE_VM_METAPROGRAMMING_ENABLEIF_HPP #include "memory/allocation.hpp" +#include + +#define ENABLE_IF(...) \ + std::enable_if_t = 0 // This metaprogramming tool allows explicitly enabling and disabling overloads // of member functions depending on whether the condition B holds true. diff --git a/src/hotspot/share/oops/arrayKlass.cpp b/src/hotspot/share/oops/arrayKlass.cpp index bb2b6ee7af9..44696e3177a 100644 --- a/src/hotspot/share/oops/arrayKlass.cpp +++ b/src/hotspot/share/oops/arrayKlass.cpp @@ -119,7 +119,8 @@ GrowableArray* ArrayKlass::compute_secondary_supers(int num_extra_slots, assert(num_extra_slots == 0, "sanity of primitive array type"); assert(transitive_interfaces == NULL, "sanity"); // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_array_interfaces_array()); + set_secondary_supers(Universe::the_array_interfaces_array(), + Universe::the_array_interfaces_bitmap()); return NULL; } diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp index dd8eaf3ed95..f50b3da563a 100644 --- a/src/hotspot/share/oops/instanceKlass.cpp +++ b/src/hotspot/share/oops/instanceKlass.cpp @@ -1166,26 +1166,36 @@ bool InstanceKlass::can_be_primary_super_slow() const { GrowableArray* InstanceKlass::compute_secondary_supers(int num_extra_slots, Array* transitive_interfaces) { // The secondaries are the implemented interfaces. - Array* interfaces = transitive_interfaces; + // We need the cast because Array is NOT a supertype of Array, + // (but it's safe to do here because we won't write into _secondary_supers from this point on). + Array* interfaces = (Array*)(address)transitive_interfaces; int num_secondaries = num_extra_slots + interfaces->length(); if (num_secondaries == 0) { // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_empty_klass_array()); + set_secondary_supers(Universe::the_empty_klass_array(), Universe::the_empty_klass_bitmap()); return NULL; } else if (num_extra_slots == 0) { // The secondary super list is exactly the same as the transitive interfaces. // Redefine classes has to be careful not to delete this! - set_secondary_supers(interfaces); - return NULL; - } else { - // Copy transitive interfaces to a temporary growable array to be constructed - // into the secondary super list with extra slots. - GrowableArray* secondaries = new GrowableArray(interfaces->length()); - for (int i = 0; i < interfaces->length(); i++) { - secondaries->push(interfaces->at(i)); - } - return secondaries; + if (!UseSecondarySupersTable) { + set_secondary_supers(interfaces); + return NULL; + } else if (num_extra_slots == 0 && interfaces->length() <= 1) { + // We will reuse the transitive interfaces list if we're certain + // it's in hash order. + uintx bitmap = compute_secondary_supers_bitmap(interfaces); + set_secondary_supers(interfaces, bitmap); + return NULL; + // ... fall through if that didn't work. + } } + // Copy transitive interfaces to a temporary growable array to be constructed + // into the secondary super list with extra slots. + GrowableArray* secondaries = new GrowableArray(interfaces->length()); + for (int i = 0; i < interfaces->length(); i++) { + secondaries->push(interfaces->at(i)); + } + return secondaries; } bool InstanceKlass::compute_is_subtype_of(Klass* k) { @@ -3252,6 +3262,29 @@ void InstanceKlass::print_on(outputStream* st) const { } st->print(BULLET"local interfaces: "); local_interfaces()->print_value_on(st); st->cr(); st->print(BULLET"trans. interfaces: "); transitive_interfaces()->print_value_on(st); st->cr(); + + st->print(BULLET"secondary supers: "); secondary_supers()->print_value_on(st); st->cr(); + if (UseSecondarySupersTable) { + st->print(BULLET"hash_slot: %d", hash_slot()); st->cr(); + st->print(BULLET"bitmap: " UINTX_FORMAT_X_0, _bitmap); st->cr(); + } + if (secondary_supers() != nullptr) { + if (Verbose) { + bool is_hashed = UseSecondarySupersTable && (_bitmap != SECONDARY_SUPERS_BITMAP_FULL); + st->print_cr(BULLET"---- secondary supers (%d words):", _secondary_supers->length()); + for (int i = 0; i < _secondary_supers->length(); i++) { + ResourceMark rm; // for external_name() + Klass* secondary_super = _secondary_supers->at(i); + st->print(BULLET"%2d:", i); + if (is_hashed) { + int home_slot = compute_home_slot(secondary_super, _bitmap); + int distance = (i - home_slot) & SECONDARY_SUPERS_TABLE_MASK; + st->print(" dist:%02d:", distance); + } + st->print_cr(" %p %s", secondary_super, secondary_super->external_name()); + } + } + } st->print(BULLET"constants: "); constants()->print_value_on(st); st->cr(); if (class_loader_data() != NULL) { st->print(BULLET"class loader data: "); diff --git a/src/hotspot/share/oops/klass.cpp b/src/hotspot/share/oops/klass.cpp index 889f3430956..6b1042247d7 100644 --- a/src/hotspot/share/oops/klass.cpp +++ b/src/hotspot/share/oops/klass.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "classfile/classLoader.hpp" #include "classfile/classLoaderData.inline.hpp" #include "classfile/dictionary.hpp" #include "classfile/javaClasses.inline.hpp" @@ -30,6 +31,7 @@ #include "classfile/vmSymbols.hpp" #include "gc/shared/collectedHeap.inline.hpp" #include "logging/log.hpp" +#include "logging/logMessage.hpp" #include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" @@ -44,8 +46,12 @@ #include "runtime/atomic.hpp" #include "runtime/handles.inline.hpp" #include "runtime/orderAccess.hpp" +#include "runtime/perfData.hpp" #include "utilities/macros.hpp" +#include "utilities/rotate_bits.hpp" +#include "utilities/population_count.hpp" #include "utilities/stack.inline.hpp" +#include "utilities/count_trailing_zeros.hpp" void Klass::set_java_mirror(Handle m) { assert(!m.is_null(), "New mirror should never be null."); @@ -77,9 +83,65 @@ void Klass::set_is_cloneable() { } } +uint8_t Klass::compute_hash_slot(Symbol* n) { + uint hash_code; + // Special cases for the two superclasses of all Array instances. + // Code elsewhere assumes, for all instances of ArrayKlass, that + // these two interfaces will be in this order. + + // We ensure there are some empty slots in the hash table between + // these two very common interfaces because if they were adjacent + // (e.g. Slots 0 and 1), then any other class which hashed to 0 or 1 + // would result in a probe length of 3. + if (n == vmSymbols::java_lang_Cloneable()) { + hash_code = 0; + } else if (n == vmSymbols::java_io_Serializable()) { + hash_code = SECONDARY_SUPERS_TABLE_SIZE / 2; + } else { + auto s = (const jbyte*) n->bytes(); + hash_code = java_lang_String::hash_code(s, n->utf8_length()); + // We use String::hash_code here (rather than e.g. + // Symbol::identity_hash()) in order to have a hash code that + // does not change from run to run. We want that because the + // hash value for a secondary superclass appears in generated + // code as a constant. + + // This constant is magic: see Knuth, "Fibonacci Hashing". + constexpr uint multiplier + = 2654435769; // (uint)(((u8)1 << 32) / ((1 + sqrt(5)) / 2 )) + constexpr uint hash_shift = sizeof(hash_code) * 8 - 6; + // The leading bits of the least significant half of the product. + hash_code = (hash_code * multiplier) >> hash_shift; + + if (StressSecondarySupers) { + // Generate many hash collisions in order to stress-test the + // linear search fallback. + hash_code = hash_code % 3; + hash_code = hash_code * (SECONDARY_SUPERS_TABLE_SIZE / 3); + } + } + + return (hash_code & SECONDARY_SUPERS_TABLE_MASK); +} + void Klass::set_name(Symbol* n) { _name = n; - if (_name != NULL) _name->increment_refcount(); + if (_name != NULL) { + _name->increment_refcount(); + } + + if (UseSecondarySupersTable) { + elapsedTimer selftime; + selftime.start(); + + _hash_slot = compute_hash_slot(n); + assert(_hash_slot < SECONDARY_SUPERS_TABLE_SIZE, "required"); + + selftime.stop(); + if (UsePerfData) { + ClassLoader::perf_secondary_hash_time()->inc(selftime.ticks()); + } + } } bool Klass::is_subclass_of(const Klass* k) const { @@ -246,6 +308,174 @@ bool Klass::can_be_primary_super_slow() const { return true; } +void Klass::set_secondary_supers(Array* secondaries) { + assert(!UseSecondarySupersTable || secondaries == NULL, ""); + set_secondary_supers(secondaries, SECONDARY_SUPERS_BITMAP_EMPTY); +} + +void Klass::set_secondary_supers(Array* secondaries, uintx bitmap) { +#ifdef ASSERT + if (UseSecondarySupersTable && secondaries != NULL) { + uintx real_bitmap = compute_secondary_supers_bitmap(secondaries); + assert(bitmap == real_bitmap, "must be"); + } +#endif + _bitmap = bitmap; + _secondary_supers = secondaries; + + /* if (secondaries != NULL) { + LogMessage(class, load) msg; + LogStream log {LogLevel::Debug, msg}; + if (log.is_enabled()) { + ResourceMark rm; + log.print_cr("set_secondary_supers: hash_slot: %d; klass: %s", hash_slot(), external_name()); + print_secondary_supers_on(&log); + } + } */ +} + +// Hashed secondary superclasses +// +// We use a compressed 64-entry hash table with linear probing. We +// start by creating a hash table in the usual way, followed by a pass +// that removes all the null entries. To indicate which entries would +// have been null we use a bitmap that contains a 1 in each position +// where an entry is present, 0 otherwise. This bitmap also serves as +// a kind of Bloom filter, which in many cases allows us quickly to +// eliminate the possibility that something is a member of a set of +// secondaries. +uintx Klass::hash_secondary_supers(Array* secondaries, bool rewrite) { + const int length = secondaries->length(); + + if (length == 0) { + return SECONDARY_SUPERS_BITMAP_EMPTY; + } + + if (length == 1) { + int hash_slot = secondaries->at(0)->hash_slot(); + return uintx(1) << hash_slot; + } + + // For performance reasons we don't use a hashed table unless there + // are at least two empty slots in it. If there were only one empty + // slot it'd take a long time to create the table and the resulting + // search would be no faster than linear probing. + if (length > SECONDARY_SUPERS_TABLE_SIZE - 2) { + return SECONDARY_SUPERS_BITMAP_FULL; + } + + { + PerfTraceTime ptt(ClassLoader::perf_secondary_hash_time()); + + ResourceMark rm; + uintx bitmap = SECONDARY_SUPERS_BITMAP_EMPTY; + auto hashed_secondaries = new GrowableArray(SECONDARY_SUPERS_TABLE_SIZE, + SECONDARY_SUPERS_TABLE_SIZE, NULL); + + for (int j = 0; j < length; j++) { + Klass* k = secondaries->at(j); + hash_insert(k, hashed_secondaries, bitmap); + } + + // Pack the hashed secondaries array by copying it into the + // secondaries array, sans nulls, if modification is allowed. + // Otherwise, validate the order. + int i = 0; + for (int slot = 0; slot < SECONDARY_SUPERS_TABLE_SIZE; slot++) { + bool has_element = ((bitmap >> slot) & 1) != 0; + assert(has_element == (hashed_secondaries->at(slot) != NULL), ""); + if (has_element) { + Klass* k = hashed_secondaries->at(slot); + if (rewrite) { + secondaries->at_put(i, k); + } else if (secondaries->at(i) != k) { + assert(false, "broken secondary supers hash table"); + return SECONDARY_SUPERS_BITMAP_FULL; + } + i++; + } + } + assert(i == secondaries->length(), "mismatch"); + + return bitmap; + } +} + +void Klass::hash_insert(Klass* klass, GrowableArray* secondaries, uintx& bitmap) { + assert(bitmap != SECONDARY_SUPERS_BITMAP_FULL, ""); + + int dist = 0; + for (int slot = klass->hash_slot(); true; slot = (slot + 1) & SECONDARY_SUPERS_TABLE_MASK) { + Klass* existing = secondaries->at(slot); + assert(((bitmap >> slot) & 1) == (existing != NULL), "mismatch"); + if (existing == NULL) { // no conflict + secondaries->at_put(slot, klass); + bitmap |= uintx(1) << slot; + assert(bitmap != SECONDARY_SUPERS_BITMAP_FULL, ""); + return; + } else { + // Use Robin Hood hashing to minimize the worst case search. + // Also, every permutation of the insertion sequence produces + // the same final Robin Hood hash table, provided that a + // consistent tie breaker is used. + int existing_dist = (slot - existing->hash_slot()) & SECONDARY_SUPERS_TABLE_MASK; + if (existing_dist < dist + // This tie breaker ensures that the hash order is maintained. + || ((existing_dist == dist) + && (uintptr_t(existing) < uintptr_t(klass)))) { + Klass* tmp = secondaries->at(slot); + secondaries->at_put(slot, klass); + klass = tmp; + dist = existing_dist; + } + ++dist; + } + } +} + +Array* Klass::pack_secondary_supers(ClassLoaderData* loader_data, + GrowableArray* primaries, + GrowableArray* secondaries, + uintx& bitmap, TRAPS) { + int new_length = primaries->length() + secondaries->length(); + Array* secondary_supers = MetadataFactory::new_array(loader_data, new_length, CHECK_NULL); + + // Combine the two arrays into a metadata object to pack the array. + // The primaries are added in the reverse order, then the secondaries. + int fill_p = primaries->length(); + for (int j = 0; j < fill_p; j++) { + secondary_supers->at_put(j, primaries->pop()); // add primaries in reverse order. + } + for( int j = 0; j < secondaries->length(); j++ ) { + secondary_supers->at_put(j+fill_p, secondaries->at(j)); // add secondaries on the end. + } +#ifdef ASSERT + // We must not copy any null placeholders left over from bootstrap. + for (int j = 0; j < secondary_supers->length(); j++) { + assert(secondary_supers->at(j) != NULL, "correct bootstrapping order"); + } +#endif + + if (UseSecondarySupersTable) { + bitmap = hash_secondary_supers(secondary_supers, /*rewrite=*/true); // rewrites freshly allocated array + } else { + bitmap = SECONDARY_SUPERS_BITMAP_EMPTY; + } + return secondary_supers; +} + +uintx Klass::compute_secondary_supers_bitmap(Array* secondary_supers) { + return hash_secondary_supers(secondary_supers, /*rewrite=*/false); // no rewrites allowed +} + +uint8_t Klass::compute_home_slot(Klass* k, uintx bitmap) { + uint8_t hash = k->hash_slot(); + if (hash > 0) { + return population_count(bitmap << (SECONDARY_SUPERS_TABLE_SIZE - hash)); + } + return 0; +} + void Klass::initialize_supers(Klass* k, Array* transitive_interfaces, TRAPS) { if (FastSuperclassLimit == 0) { // None of the other machinery matters. @@ -341,26 +571,9 @@ void Klass::initialize_supers(Klass* k, Array* transitive_interfaces, TR primaries->push(p); } // Combine the two arrays into a metadata object to pack the array. - // The primaries are added in the reverse order, then the secondaries. - int new_length = primaries->length() + secondaries->length(); - Array* s2 = MetadataFactory::new_array( - class_loader_data(), new_length, CHECK); - int fill_p = primaries->length(); - for (int j = 0; j < fill_p; j++) { - s2->at_put(j, primaries->pop()); // add primaries in reverse order. - } - for( int j = 0; j < secondaries->length(); j++ ) { - s2->at_put(j+fill_p, secondaries->at(j)); // add secondaries on the end. - } - - #ifdef ASSERT - // We must not copy any NULL placeholders left over from bootstrap. - for (int j = 0; j < s2->length(); j++) { - assert(s2->at(j) != NULL, "correct bootstrapping order"); - } - #endif - - set_secondary_supers(s2); + uintx bitmap = 0; + Array* s2 = pack_secondary_supers(class_loader_data(), primaries, secondaries, bitmap, CHECK); + set_secondary_supers(s2, bitmap); } } @@ -368,7 +581,7 @@ GrowableArray* Klass::compute_secondary_supers(int num_extra_slots, Array* transitive_interfaces) { assert(num_extra_slots == 0, "override for complex klasses"); assert(transitive_interfaces == NULL, "sanity"); - set_secondary_supers(Universe::the_empty_klass_array()); + set_secondary_supers(Universe::the_empty_klass_array(), Universe::the_empty_klass_bitmap()); return NULL; } @@ -506,6 +719,11 @@ void Klass::remove_unshareable_info() { // Null out class_loader_data because we don't share that yet. set_class_loader_data(NULL); set_is_shared(); + + // FIXME: validation in Klass::hash_secondary_supers() may fail for shared klasses. + // Even though the bitmaps always match, the canonical order of elements in the table + // is not guaranteed to stay the same (see tie breaker during Robin Hood hashing in Klass::hash_insert). + //assert(compute_secondary_supers_bitmap(secondary_supers()) == _bitmap, "broken table"); } void Klass::remove_java_mirror() { @@ -947,3 +1165,100 @@ const char* Klass::class_in_module_of_loader(bool use_are, bool include_parent_l return class_description; } + +class LookupStats : StackObj { + private: + uint _no_of_samples; + uint _worst; + uint _worst_count; + uint _average; + uint _best; + uint _best_count; + public: + LookupStats() : _no_of_samples(0), _worst(0), _worst_count(0), _average(0), _best(INT_MAX), _best_count(0) {} + + ~LookupStats() { + assert(_best <= _worst || _no_of_samples == 0, "sanity"); + } + + void sample(uint value) { + ++_no_of_samples; + _average += value; + + if (_worst < value) { + _worst = value; + _worst_count = 1; + } else if (_worst == value) { + ++_worst_count; + } + + if (_best > value) { + _best = value; + _best_count = 1; + } else if (_best == value) { + ++_best_count; + } + } + + void print_on(outputStream* st) const { + st->print("best: %2d (%4.1f%%)", _best, (100.0 * _best_count) / _no_of_samples); + if (_best_count < _no_of_samples) { + st->print("; average: %4.1f; worst: %2d (%4.1f%%)", + (1.0 * _average) / _no_of_samples, + _worst, (100.0 * _worst_count) / _no_of_samples); + } + } +}; + +static void print_positive_lookup_stats(Array* secondary_supers, uintx bitmap, outputStream* st) { + int num_of_supers = secondary_supers->length(); + + LookupStats s; + for (int i = 0; i < num_of_supers; i++) { + Klass* secondary_super = secondary_supers->at(i); + int home_slot = Klass::compute_home_slot(secondary_super, bitmap); + uint score = 1 + ((i - home_slot) & Klass::SECONDARY_SUPERS_TABLE_MASK); + s.sample(score); + } + st->print("positive_lookup: "); s.print_on(st); +} + +static uint compute_distance_to_nearest_zero(int slot, uintx bitmap) { + assert(~bitmap != 0, "no zeroes"); + uintx start = rotate_right(bitmap, slot); + return count_trailing_zeros(~start); +} + +static void print_negative_lookup_stats(uintx bitmap, outputStream* st) { + LookupStats s; + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + uint score = compute_distance_to_nearest_zero(slot, bitmap); + s.sample(score); + } + st->print("negative_lookup: "); s.print_on(st); +} + +void Klass::print_secondary_supers_on(outputStream* st) const { + if (secondary_supers() != nullptr) { + if (UseSecondarySupersTable) { + st->print(" - "); st->print("%d elements;", _secondary_supers->length()); + st->print_cr(" bitmap: " UINTX_FORMAT_X_0 ";", _bitmap); + if (_bitmap != SECONDARY_SUPERS_BITMAP_EMPTY && + _bitmap != SECONDARY_SUPERS_BITMAP_FULL) { + st->print(" - "); print_positive_lookup_stats(secondary_supers(), _bitmap, st); st->cr(); + st->print(" - "); print_negative_lookup_stats(_bitmap, st); st->cr(); + } + } + } else { + st->print("null"); + } +} + +void Klass::on_secondary_supers_verification_failure(Klass* super, Klass* sub, bool linear_result, bool table_result, const char* msg) { + ResourceMark rm; + super->print(); + sub->print(); + fatal("%s: %s implements %s: is_subtype_of: %d; linear_search: %d; table_lookup: %d", + msg, sub->external_name(), super->external_name(), + sub->is_subtype_of(super), linear_result, table_result); +} diff --git a/src/hotspot/share/oops/klass.hpp b/src/hotspot/share/oops/klass.hpp index c67be65fdae..acc943cae12 100644 --- a/src/hotspot/share/oops/klass.hpp +++ b/src/hotspot/share/oops/klass.hpp @@ -151,6 +151,12 @@ class Klass : public Metadata { // Provide access the corresponding instance java.lang.ClassLoader. ClassLoaderData* _class_loader_data; + // Bitmap and hash code used by hashed secondary supers. + uintx _bitmap; + uint8_t _hash_slot; + + static uint8_t compute_hash_slot(Symbol* s); + jint _modifier_flags; // Processed access flags, for use by Class.getModifiers. AccessFlags _access_flags; // Access flags. The class/interface distinction is stored here. @@ -230,7 +236,10 @@ class Klass : public Metadata { void set_secondary_super_cache(Klass* k) { _secondary_super_cache = k; } Array* secondary_supers() const { return _secondary_supers; } - void set_secondary_supers(Array* k) { _secondary_supers = k; } + void set_secondary_supers(Array* k); + void set_secondary_supers(Array* k, uintx bitmap); + + uint8_t hash_slot() const { return _hash_slot; } // Return the element of the _super chain of the given depth. // If there is no such element, return either NULL or this. @@ -346,9 +355,26 @@ class Klass : public Metadata { protected: // internal accessors void set_subklass(Klass* s); void set_next_sibling(Klass* s); +private: + static void hash_insert(Klass* klass, GrowableArray* secondaries, uintx& bitmap); + static uintx hash_secondary_supers(Array* secondaries, bool rewrite); public: +// Secondary supers table support + static Array* pack_secondary_supers(ClassLoaderData* loader_data, + GrowableArray* primaries, + GrowableArray* secondaries, + uintx& bitmap, + TRAPS); + + static uintx compute_secondary_supers_bitmap(Array* secondary_supers); + static uint8_t compute_home_slot(Klass* k, uintx bitmap); + static constexpr int SECONDARY_SUPERS_TABLE_SIZE = sizeof(_bitmap) * 8; + static constexpr int SECONDARY_SUPERS_TABLE_MASK = SECONDARY_SUPERS_TABLE_SIZE - 1; + + static constexpr uintx SECONDARY_SUPERS_BITMAP_EMPTY = 0; + static constexpr uintx SECONDARY_SUPERS_BITMAP_FULL = ~(uintx)0; // Compiler support static ByteSize super_offset() { return in_ByteSize(offset_of(Klass, _super)); } static ByteSize super_check_offset_offset() { return in_ByteSize(offset_of(Klass, _super_check_offset)); } @@ -359,7 +385,7 @@ class Klass : public Metadata { static ByteSize modifier_flags_offset() { return in_ByteSize(offset_of(Klass, _modifier_flags)); } static ByteSize layout_helper_offset() { return in_ByteSize(offset_of(Klass, _layout_helper)); } static ByteSize access_flags_offset() { return in_ByteSize(offset_of(Klass, _access_flags)); } - + static ByteSize bitmap_offset() { return byte_offset_of(Klass, _bitmap); } // Unpacking layout_helper: static const int _lh_neutral_value = 0; // neutral non-array non-instance value static const int _lh_instance_slow_path_bit = 0x01; @@ -715,6 +741,8 @@ class Klass : public Metadata { virtual void oop_print_value_on(oop obj, outputStream* st); virtual void oop_print_on (oop obj, outputStream* st); + void print_secondary_supers_on(outputStream* st) const; + virtual const char* internal_name() const = 0; // Verification @@ -731,7 +759,8 @@ class Klass : public Metadata { // for error reporting static Klass* decode_klass_raw(narrowKlass narrow_klass); static bool is_valid(Klass* k); - + + static void on_secondary_supers_verification_failure(Klass* super, Klass* sub, bool linear_result, bool table_result, const char* msg); static bool is_null(narrowKlass obj); static bool is_null(Klass* obj); diff --git a/src/hotspot/share/oops/objArrayKlass.cpp b/src/hotspot/share/oops/objArrayKlass.cpp index a047a0848f1..51cff1c3b42 100644 --- a/src/hotspot/share/oops/objArrayKlass.cpp +++ b/src/hotspot/share/oops/objArrayKlass.cpp @@ -390,7 +390,8 @@ GrowableArray* ObjArrayKlass::compute_secondary_supers(int num_extra_slo int num_secondaries = num_extra_slots + 2 + num_elem_supers; if (num_secondaries == 2) { // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_array_interfaces_array()); + set_secondary_supers(Universe::the_array_interfaces_array(), + Universe::the_array_interfaces_bitmap()); return NULL; } else { GrowableArray* secondaries = new GrowableArray(num_elem_supers+2); diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index fd2ffb932b4..ea5cd8299cd 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -774,6 +774,9 @@ \ product(bool, UseProfiledLoopPredicate, true, \ "move predicates out of loops based on profiling data") \ + \ + diagnostic(bool, InlineSecondarySupersTest, true, \ + "Inline the secondary supers hash lookup.") \ C2_FLAGS(DECLARE_DEVELOPER_FLAG, \ DECLARE_PD_DEVELOPER_FLAG, \ diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index cb9f16b82c9..8e2471ad18e 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2452,6 +2452,14 @@ void Matcher::find_shared( Node *n ) { n->del_req(3); break; } + case Op_PartialSubtypeCheck: { + if (UseSecondarySupersTable && n->in(2)->is_Con()) { + // PartialSubtypeCheck uses both constant and register operands for superclass input. + n->set_req(2, new BinaryNode(n->in(2), n->in(2))); + break; + } + break; + } default: break; } diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 3f5c7afdcc2..c33c488588a 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2313,7 +2313,11 @@ const Type* LoadNode::klass_value_common(PhaseGVN* phase) const { return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR; } } - + if (tkls != NULL && !UseSecondarySupersCache + && tkls->offset() == in_bytes(Klass::secondary_super_cache_offset())) { + // Treat Klass::_secondary_super_cache as a constant when the cache is disabled. + return TypePtr::NULL_PTR; + } // Bailout case return LoadNode::Value(phase); } diff --git a/src/hotspot/share/runtime/abstract_vm_version.hpp b/src/hotspot/share/runtime/abstract_vm_version.hpp index 5a060365084..2e3c3fd329f 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.hpp +++ b/src/hotspot/share/runtime/abstract_vm_version.hpp @@ -196,6 +196,9 @@ class Abstract_VM_Version: AllStatic { static bool supports_fast_class_init_checks() { return false; } static bool print_matching_lines_from_file(const char* filename, outputStream* st, const char* keywords_to_match[]); + + // Does platform support secondary supers table lookup? + constexpr static bool supports_secondary_supers_table() { return false; } }; #endif // SHARE_RUNTIME_ABSTRACT_VM_VERSION_HPP diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index d4c651c265c..1dd0ac6c172 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -4237,6 +4237,17 @@ jint Arguments::apply_ergo() { return code; } + if (FLAG_IS_DEFAULT(UseSecondarySupersTable)) { + FLAG_SET_DEFAULT(UseSecondarySupersTable, VM_Version::supports_secondary_supers_table()); + } else if (UseSecondarySupersTable && !VM_Version::supports_secondary_supers_table()) { + warning("UseSecondarySupersTable is not supported"); + FLAG_SET_DEFAULT(UseSecondarySupersTable, false); + } + if (!UseSecondarySupersTable) { + FLAG_SET_DEFAULT(StressSecondarySupers, false); + FLAG_SET_DEFAULT(VerifySecondarySupers, false); + } + // Turn off biased locking for locking debug mode flags, // which are subtly different from each other but neither works with // biased locking diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index db06b9a263a..5701bedc884 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -2735,6 +2735,17 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G); \ product(bool, CRTrace, true, "Minimal C/R tracing") \ \ + diagnostic(bool, UseSecondarySupersCache, true, \ + "Use secondary supers cache during subtype checks.") \ + \ + diagnostic(bool, UseSecondarySupersTable, false, \ + "Use hash table to lookup secondary supers.") \ + \ + diagnostic(bool, VerifySecondarySupers, false, \ + "Check that linear and hashed secondary lookups return the same result.") \ + \ + diagnostic(bool, StressSecondarySupers, false, \ + "Use a terrible hash function in order to generate many collisions.") \ #define VM_FLAGS(develop, \ develop_pd, \ diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index afefcf666d4..73d3d69376e 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -26,6 +26,7 @@ #include "asm/codeBuffer.hpp" #include "memory/resourceArea.hpp" #include "oops/access.inline.hpp" +#include "oops/klass.hpp" #include "oops/oop.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/timerTrace.hpp" @@ -176,7 +177,8 @@ address StubRoutines::_safefetch32_continuation_pc = NULL; address StubRoutines::_safefetchN_entry = NULL; address StubRoutines::_safefetchN_fault_pc = NULL; address StubRoutines::_safefetchN_continuation_pc = NULL; - +address StubRoutines::_lookup_secondary_supers_table_slow_path_stub = NULL; +address StubRoutines::_lookup_secondary_supers_table_stubs[Klass::SECONDARY_SUPERS_TABLE_SIZE] = { NULL }; // Initialization // // Note: to break cycle with universe initialization, stubs are generated in two phases. diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index 30f38045987..bf3a5d38289 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -228,7 +228,8 @@ class StubRoutines: AllStatic { static address _safefetchN_entry; static address _safefetchN_fault_pc; static address _safefetchN_continuation_pc; - + static address _lookup_secondary_supers_table_stubs[]; + static address _lookup_secondary_supers_table_slow_path_stub; public: // Initialization/Testing static void initialize1(); // must happen before universe::genesis @@ -385,6 +386,17 @@ class StubRoutines: AllStatic { static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; } static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; } static address dtan() { return _dtan; } + + static address lookup_secondary_supers_table_stub(u1 slot) { + assert(slot < Klass::SECONDARY_SUPERS_TABLE_SIZE, "out of bounds"); + assert(_lookup_secondary_supers_table_stubs[slot] != nullptr, "not implemented"); + return _lookup_secondary_supers_table_stubs[slot]; + } + + static address lookup_secondary_supers_table_slow_path_stub() { + assert(_lookup_secondary_supers_table_slow_path_stub != nullptr, "not implemented"); + return _lookup_secondary_supers_table_slow_path_stub; + } static address select_fill_function(BasicType t, bool aligned, const char* &name); diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index 65770cb2ee9..c15677545ec 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -136,6 +136,11 @@ #define INTX_FORMAT "%" PRIdPTR #define UINTX_FORMAT "%" PRIuPTR #define INTX_FORMAT_W(width) "%" #width PRIdPTR +#ifdef _LP64 +#define UINTX_FORMAT_X_0 "0x%016" PRIxPTR +#else +#define UINTX_FORMAT_X_0 "0x%08" PRIxPTR +#endif #define UINTX_FORMAT_W(width) "%" #width PRIuPTR //---------------------------------------------------------------------------------------------------- diff --git a/src/hotspot/share/utilities/population_count.hpp b/src/hotspot/share/utilities/population_count.hpp new file mode 100644 index 00000000000..68b65a896b8 --- /dev/null +++ b/src/hotspot/share/utilities/population_count.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP +#define SHARE_UTILITIES_POPULATION_COUNT_HPP + +#include "metaprogramming/enableIf.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" + +#include + +// Returns the population count of x, i.e., the number of bits set in x. +// +// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2 and the text that +// follows. +// +// Ideally this should be dispatched per platform to use optimized +// instructions when available, such as POPCNT on modern x86/AMD. Our builds +// still target and support older architectures that might lack support for +// these. For example, with current build configurations, __builtin_popcount(x) +// generate a call to a similar but slower 64-bit version when calling with +// a 32-bit integer type. +template +constexpr unsigned population_count(T x) { + STATIC_ASSERT(BitsPerWord <= 128); + STATIC_ASSERT(BitsPerByte == 8); + STATIC_ASSERT(std::is_integral::value); + STATIC_ASSERT(!std::is_signed::value); + // We need to take care with implicit integer promotion when dealing with + // integers < 32-bit. We chose to do this by explicitly widening constants + // to unsigned + using P = std::conditional_t<(sizeof(T) < sizeof(unsigned)), unsigned, T>; + const T all = ~T(0); // 0xFF..FF + const P fives = all/3; // 0x55..55 + const P threes = (all/15) * 3; // 0x33..33 + const P z_ones = all/255; // 0x0101..01 + const P z_effs = z_ones * 15; // 0x0F0F..0F + P r = x; + r -= ((r >> 1) & fives); + r = (r & threes) + ((r >> 2) & threes); + r = ((r + (r >> 4)) & z_effs) * z_ones; + // The preceding multiply by z_ones is the only place where the intermediate + // calculations can exceed the range of T. We need to discard any such excess + // before the right-shift, hence the conversion back to T. + return checked_cast(static_cast(r) >> (((sizeof(T) - 1) * BitsPerByte))); +} + +#endif // SHARE_UTILITIES_POPULATION_COUNT_HPP \ No newline at end of file diff --git a/src/hotspot/share/utilities/rotate_bits.hpp b/src/hotspot/share/utilities/rotate_bits.hpp new file mode 100644 index 00000000000..6077ee8bc18 --- /dev/null +++ b/src/hotspot/share/utilities/rotate_bits.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_UTILITIES_ROTATE_BITS_HPP +#define SHARE_UTILITIES_ROTATE_BITS_HPP + +#include "utilities/globalDefinitions.hpp" + + +inline uint32_t rotate_right_32(uint32_t x, int distance) { + distance = distance & 0x1F; + if (distance > 0) { + return (x >> distance) | (x << (32 - distance)); + } else { + return x; + } +} + +inline uint64_t rotate_right_64(uint64_t x, int distance) { + distance = distance & 0x3F; + if (distance > 0) { + return (x >> distance) | (x << (64 - distance)); + } else { + return x; + } +} + +template::value), +ENABLE_IF(sizeof(T) <= sizeof(uint64_t))> +inline T rotate_right(T x, int dist) { + return (sizeof(x) <= sizeof(uint32_t)) ? + rotate_right_32(static_cast(x), dist) : + rotate_right_64(static_cast(x), dist); +} + +#endif // SHARE_UTILITIES_ROTATE_BITS_HPP \ No newline at end of file diff --git a/src/jdk.pack/share/native/common-unpack/constants.h b/src/jdk.pack/share/native/common-unpack/constants.h index 8d8376dd23a..11f4570a37f 100644 --- a/src/jdk.pack/share/native/common-unpack/constants.h +++ b/src/jdk.pack/share/native/common-unpack/constants.h @@ -203,7 +203,7 @@ enum { AO_HAVE_FIELD_FLAGS_HI = 1<<10, AO_HAVE_METHOD_FLAGS_HI = 1<<11, AO_HAVE_CODE_FLAGS_HI = 1<<12, - AO_UNUSED_MBZ = (-1)<<13, // options bits reserved for future use. + AO_UNUSED_MBZ = 1<<13, // options bits reserved for future use. #define ARCHIVE_BIT_DO(F) \ F(AO_HAVE_SPECIAL_FORMATS) \ diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java new file mode 100644 index 00000000000..aaaf0edb258 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java @@ -0,0 +1,108 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(1) +@State(Scope.Benchmark) +public class SecondarySuperCacheHits { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on a happy path, when we can actually cache + // the last super and use it effectively. + + interface I01 {} + interface I02 {} + interface I03 {} + interface I04 {} + interface I05 {} + interface I06 {} + interface I07 {} + interface I08 {} + interface I09 {} + interface I10 {} + interface I11 {} + interface I12 {} + interface I13 {} + interface I14 {} + interface I15 {} + interface I16 {} + interface I17 {} + interface I18 {} + interface I19 {} + interface I20 {} + + class B {} + class C1 extends B implements I01, I02, I03, I04, I05, I06, I07, I08, I09, I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, I20 {} + + volatile B o; + + @Setup + public void setup() { + o = new C1(); + } + + static final int ITERS = 10000; + + @Benchmark + @OperationsPerInvocation(20*ITERS) + public void test(Blackhole bh) { + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I01); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I02); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I03); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I04); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I05); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I06); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I07); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I08); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I09); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I10); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I11); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I12); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I13); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I14); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I15); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I16); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I17); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I18); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I19); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I20); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java new file mode 100644 index 00000000000..3cafa582c09 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java @@ -0,0 +1,81 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(Threads.MAX) +@State(Scope.Benchmark) +public class SecondarySuperCacheInterContention { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on unhappy path, when we contend a lot over + // the secondary super cache, where different threads want to update the cache + // with different value. In tihs test, every thread comes with its own stable + // cached value. Meaning, this tests the INTER-thread contention. + + interface IA {} + interface IB {} + class B {} + class C1 extends B implements IA, IB {} + class C2 extends B implements IA, IB {} + + volatile B o1, o2; + + @Setup + public void setup() { + o1 = new C1(); + o2 = new C2(); + } + + @Benchmark + @OperationsPerInvocation(2) + @Group("test") + @GroupThreads(1) + public void t1(Blackhole bh) { + bh.consume(o1 instanceof IA); + bh.consume(o2 instanceof IA); + } + + @Benchmark + @OperationsPerInvocation(2) + @Group("test") + @GroupThreads(1) + public void t2(Blackhole bh) { + bh.consume(o1 instanceof IB); + bh.consume(o2 instanceof IB); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java new file mode 100644 index 00000000000..b97d49e2e60 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java @@ -0,0 +1,72 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(Threads.MAX) +@State(Scope.Benchmark) +public class SecondarySuperCacheIntraContention { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on unhappy path, when we contend a lot over + // the secondary super cache, where different threads want to update the cache + // with different value. In this test, every thread comes with its own contending + // value. Meaning, this tests the INTRA-thread contention. + + interface IA {} + interface IB {} + class B {} + class C1 extends B implements IA, IB {} + class C2 extends B implements IA, IB {} + + volatile B o1, o2; + + @Setup + public void setup() { + o1 = new C1(); + o2 = new C2(); + } + + @Benchmark + @OperationsPerInvocation(4) + public void test(Blackhole bh) { + bh.consume(o1 instanceof IA); + bh.consume(o2 instanceof IA); + bh.consume(o1 instanceof IB); + bh.consume(o2 instanceof IB); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java b/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java new file mode 100644 index 00000000000..54e5b081672 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.lang; + +import org.openjdk.jmh.annotations.*; +import java.util.concurrent.TimeUnit; +import java.util.Random; + + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 1, time = 1) +@Measurement(iterations = 3, time = 1) +@Fork(value = 5) +public class SecondarySupersLookup { + interface J {} + interface I01 {} + interface I02 extends I01 {} + interface I03 extends I02 {} + interface I04 extends I03 {} + interface I05 extends I04 {} + interface I06 extends I05 {} + interface I07 extends I06 {} + interface I08 extends I07 {} + interface I09 extends I08 {} + interface I10 extends I09 {} + interface I11 extends I10 {} + interface I12 extends I11 {} + interface I13 extends I12 {} + interface I14 extends I13 {} + interface I15 extends I14 {} + interface I16 extends I15 {} + interface I17 extends I16 {} + interface I18 extends I17 {} + interface I19 extends I18 {} + interface I20 extends I19 {} + interface I21 extends I20 {} + interface I22 extends I21 {} + interface I23 extends I22 {} + interface I24 extends I23 {} + interface I25 extends I24 {} + interface I26 extends I25 {} + interface I27 extends I26 {} + interface I28 extends I27 {} + interface I29 extends I28 {} + interface I30 extends I29 {} + interface I31 extends I30 {} + interface I32 extends I31 {} + interface I33 extends I32 {} + interface I34 extends I33 {} + interface I35 extends I34 {} + interface I36 extends I35 {} + interface I37 extends I36 {} + interface I38 extends I37 {} + interface I39 extends I38 {} + interface I40 extends I39 {} + interface I41 extends I40 {} + interface I42 extends I41 {} + interface I43 extends I42 {} + interface I44 extends I43 {} + interface I45 extends I44 {} + interface I46 extends I45 {} + interface I47 extends I46 {} + interface I48 extends I47 {} + interface I49 extends I48 {} + interface I50 extends I49 {} + interface I51 extends I50 {} + interface I52 extends I51 {} + interface I53 extends I52 {} + interface I54 extends I53 {} + interface I55 extends I54 {} + interface I56 extends I55 {} + interface I57 extends I56 {} + interface I58 extends I57 {} + interface I59 extends I58 {} + interface I60 extends I59 {} + interface I61 extends I60 {} + interface I62 extends I61 {} + interface I63 extends I62 {} + interface I64 extends I63 {} + + final Object obj00 = new Object(); + final Object obj01 = new I01() {}; + final Object obj02 = new I02() {}; + final Object obj03 = new I03() {}; + final Object obj04 = new I04() {}; + final Object obj05 = new I05() {}; + final Object obj06 = new I06() {}; + final Object obj07 = new I07() {}; + final Object obj08 = new I08() {}; + final Object obj09 = new I09() {}; + final Object obj10 = new I10() {}; + final Object obj16 = new I16() {}; + final Object obj20 = new I20() {}; + final Object obj30 = new I30() {}; + final Object obj32 = new I32() {}; + final Object obj40 = new I40() {}; + final Object obj50 = new I50() {}; + final Object obj55 = new I55() {}; + final Object obj56 = new I56() {}; + final Object obj57 = new I57() {}; + final Object obj58 = new I58() {}; + final Object obj59 = new I59() {}; + final Object obj60 = new I60() {}; + final Object obj61 = new I61() {}; + final Object obj62 = new I62() {}; + final Object obj63 = new I63() {}; + final Object obj64 = new I64() {}; + + static Class getSuper(int idx) { + int i = Math.abs(idx) % 10; + switch (i) { + case 0: return I01.class; + case 1: return I02.class; + case 2: return I03.class; + case 3: return I04.class; + case 4: return I05.class; + case 5: return I06.class; + case 6: return I07.class; + case 7: return I08.class; + case 8: return I09.class; + case 9: return I10.class; + } + throw new InternalError("" + i); + } + + @Setup + public void warmup() { + for (int i = 0; i < 20_000; i++) { + Class s = getSuper(i); + test(obj01, s, s.isInstance(obj01)); + test(obj02, s, s.isInstance(obj02)); + test(obj03, s, s.isInstance(obj03)); + test(obj04, s, s.isInstance(obj04)); + test(obj05, s, s.isInstance(obj05)); + test(obj06, s, s.isInstance(obj06)); + test(obj07, s, s.isInstance(obj07)); + test(obj08, s, s.isInstance(obj08)); + test(obj09, s, s.isInstance(obj09)); + } + } + + private static void test(Object obj, Class cls, boolean expected) { + if (cls.isInstance(obj) != expected) { + throw new InternalError(obj.getClass() + " " + cls + " " + expected); + } + } + @Benchmark + public void testPositive01() { + test(obj01, I01.class, true); + } + @Benchmark public void testPositive02() { + test(obj02, I02.class, true); + } + @Benchmark public void testPositive03() { + test(obj03, I03.class, true); + } + @Benchmark public void testPositive04() { + test(obj04, I04.class, true); + } + @Benchmark public void testPositive05() { + test(obj05, I05.class, true); + } + @Benchmark public void testPositive06() { + test(obj06, I06.class, true); + } + @Benchmark public void testPositive07() { + test(obj07, I07.class, true); + } + @Benchmark public void testPositive08() { + test(obj08, I08.class, true); + } + @Benchmark public void testPositive09() { + test(obj09, I09.class, true); + } + @Benchmark public void testPositive10() { + test(obj10, I10.class, true); + } + @Benchmark public void testPositive16() { + test(obj16, I16.class, true); + } + @Benchmark public void testPositive20() { + test(obj20, I20.class, true); + } + @Benchmark public void testPositive30() { + test(obj30, I30.class, true); + } + @Benchmark public void testPositive32() { + test(obj32, I32.class, true); + } + @Benchmark public void testPositive40() { + test(obj40, I40.class, true); + } + @Benchmark public void testPositive50() { + test(obj50, I50.class, true); + } + @Benchmark public void testPositive60() { + test(obj60, I60.class, true); + } + @Benchmark public void testPositive63() { + test(obj63, I63.class, true); + } + @Benchmark public void testPositive64() { + test(obj64, I64.class, true); + } + + @Benchmark public void testNegative00() { + test(obj00, J.class, false); + } + @Benchmark public void testNegative01() { + test(obj01, J.class, false); + } + @Benchmark public void testNegative02() { + test(obj02, J.class, false); + } + @Benchmark public void testNegative03() { + test(obj03, J.class, false); + } + @Benchmark public void testNegative04() { + test(obj04, J.class, false); + } + @Benchmark public void testNegative05() { + test(obj05, J.class, false); + } + @Benchmark public void testNegative06() { + test(obj06, J.class, false); + } + @Benchmark public void testNegative07() { + test(obj07, J.class, false); + } + @Benchmark public void testNegative08() { + test(obj08, J.class, false); + } + @Benchmark public void testNegative09() { + test(obj09, J.class, false); + } + @Benchmark public void testNegative10() { + test(obj10, J.class, false); + } + @Benchmark public void testNegative16() { + test(obj16, J.class, false); + } + @Benchmark public void testNegative20() { + test(obj20, J.class, false); + } + @Benchmark public void testNegative30() { + test(obj30, J.class, false); + } + @Benchmark public void testNegative32() { + test(obj32, J.class, false); + } + @Benchmark public void testNegative40() { + test(obj40, J.class, false); + } + @Benchmark public void testNegative50() { + test(obj50, J.class, false); + } + @Benchmark public void testNegative55() { + test(obj55, J.class, false); + } + @Benchmark public void testNegative56() { + test(obj56, J.class, false); + } + @Benchmark public void testNegative57() { + test(obj57, J.class, false); + } + @Benchmark public void testNegative58() { + test(obj58, J.class, false); + } + @Benchmark public void testNegative59() { + test(obj59, J.class, false); + } + @Benchmark public void testNegative60() { + test(obj60, J.class, false); + } + @Benchmark public void testNegative61() { + test(obj61, J.class, false); + } + @Benchmark public void testNegative62() { + test(obj62, J.class, false); + } + @Benchmark public void testNegative63() { + test(obj63, J.class, false); + } + + @Benchmark public void testNegative64() { + test(obj64, J.class, false); + } +} diff --git a/test/micro/org/openjdk/bench/vm/lang/TypePollution.java b/test/micro/org/openjdk/bench/vm/lang/TypePollution.java new file mode 100644 index 00000000000..34ea25d8ee5 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/lang/TypePollution.java @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2024, Red Hat, Inc.. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.lang; + +import org.openjdk.jmh.annotations.*; + +import java.io.Serializable; +import java.lang.reflect.*; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.TimeUnit; +import java.util.function.*; + +/* + * A test to demonstrate type pollution. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 4, time = 2) +@Measurement(iterations = 4, time = 2) +@Fork(value = 3) +public class TypePollution { + + static class DynamicInvocationHandler implements InvocationHandler { + + @Override + public Object invoke(Object proxy, Method method, Object[] args) { + return null; + } + } + + interface I01 {} + interface I02 {} + interface I03 {} + interface I04 {} + interface I05 {} + interface I06 {} + interface I07 {} + interface I08 {} + interface I09 {} + interface I10 {} + interface I11 {} + interface I12 {} + interface I13 {} + interface I14 {} + interface I15 {} + interface I16 {} + interface I17 {} + interface I18 {} + interface I19 {} + interface I20 {} + + static Class[] classes; + + static { + classes = new Class[] { I01.class, I02.class, I03.class, I04.class, I05.class, + I06.class, I07.class, I08.class, I09.class, I10.class, + I11.class, I12.class, I13.class, I14.class, I15.class, + I16.class, I17.class, I18.class, I19.class, I20.class }; + } + + private static final int NOOFOBJECTS = 100; + + public Object[] objectArray; + + public Random rand = new Random(0); + + @Setup(Level.Trial) + public void setup() { + objectArray = new Object[1000]; + var loader = getClass().getClassLoader(); + Class[] someInterfaces = new Class[0]; + for (int i = 0; i < objectArray.length; i++) { + Set> aSet = new HashSet>(); + for (int j = 0; j < 6; j++) { + aSet.add(classes[rand.nextInt(classes.length)]); + } + Class[] interfaceArray = new Class[aSet.size()]; + interfaceArray = aSet.toArray(interfaceArray); + objectArray[i] = Proxy.newProxyInstance(loader, interfaceArray, new DynamicInvocationHandler()); + } + } + + int probe = 99; + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:-UseSecondarySupersCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchLinearNoSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:+UseSecondarySupersCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchLinearSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:-UseSecondarySupersCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchTableNoSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:+UseSecondarySupersCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchTableSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + long parallelInstanceOfInterfaceSwitch() { + Supplier s = () -> { + long sum = 0; + for (int i = 0; i < 10000; i++) { + sum += instanceOfInterfaceSwitch(); + } + return sum; + }; + try { + CompletableFuture future = CompletableFuture.supplyAsync(s); + return s.get() + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:-UseSecondarySupersCache"}) + public int instanceOfInterfaceSwitchLinearNoSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:+UseSecondarySupersCache"}) + public int instanceOfInterfaceSwitchLinearSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:-UseSecondarySupersCache"}) + public int instanceOfInterfaceSwitchTableNoSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgs={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:+UseSecondarySupersCache"}) + public int instanceOfInterfaceSwitchTableSCC() { + return instanceOfInterfaceSwitch(); + } + + int instanceOfInterfaceSwitch() { + int dummy = 0; + List> list1 = List.of( + o -> (o instanceof I01) ? 1 : -1, + o -> (o instanceof I02) ? 2 : -1, + o -> (o instanceof I03) ? 3 : -1, + o -> (o instanceof I04) ? 4 : -1, + o -> (o instanceof I05) ? 5 : -1, + o -> (o instanceof I06) ? 6 : -1, + o -> (o instanceof I07) ? 7 : -1, + o -> (o instanceof I08) ? 8 : -1 + ); + + List> list2 = List.of( + o -> (o instanceof I18) ? 8 : -1, + o -> (o instanceof I17) ? 7 : -1, + o -> (o instanceof I16) ? 6 : -1, + o -> (o instanceof I15) ? 5 : -1, + o -> (o instanceof I14) ? 4 : -1, + o -> (o instanceof I13) ? 3 : -1, + o -> (o instanceof I12) ? 2 : -1, + o -> (o instanceof I11) ? 1 : -1 + ); + for (int i = 0; i < 100; i++) { + probe ^= probe << 13; // xorshift + probe ^= probe >>> 17; + probe ^= probe << 5; + + Object obj = objectArray[(probe & Integer.MAX_VALUE) % objectArray.length]; + dummy += list1.stream().mapToInt(element -> element.apply(obj)).filter(ret -> ret != -1).findFirst().orElse(10); + probe ^= probe << 13; // xorshift + probe ^= probe >>> 17; + probe ^= probe << 5; + + Object obj2 = objectArray[(probe & Integer.MAX_VALUE) % objectArray.length]; + dummy += list2.stream().mapToInt(element -> element.apply(obj2)).filter(ret -> ret != -1).findFirst().orElse(0); + } + return dummy; + } +}