Skip to content

Commit

Permalink
[Backport] 8261553: Efficient mask generation using BMI2 BZHI instruc…
Browse files Browse the repository at this point in the history
…tion

Summary: [Backport] 8261553: Efficient mask generation using BMI2 BZHI instruction

Test Plan: ci jtreg

Reviewed-by: JoshuaZhuwj

Issue: dragonwell-project#690
  • Loading branch information
JinZhonghui committed Oct 30, 2023
1 parent 6a0328a commit d781571
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 14 deletions.
7 changes: 7 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9454,6 +9454,13 @@ void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XM
emit_int16(0x64, (0xC0 | encode));
}

void Assembler::bzhiq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0xF5, (0xC0 | encode));
}

void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,8 @@ class Assembler : public AbstractAssembler {
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);

void bzhiq(Register dst, Register src1, Register src2);

//====================VECTOR ARITHMETIC=====================================
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
Expand Down
16 changes: 3 additions & 13 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -989,19 +989,9 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
}

void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
/* Not introduce full 8252848, will be changed in JDK-8261553 and JDK-8262355
if (ArrayCopyPartialInlineSize <= 32) {
mov64(dst, 1);
shlxq(dst, dst, len);
decq(dst);
} else {
mov64(dst, -1);
movq(temp, len);
negptr(temp);
addptr(temp, 64);
shrxq(dst, dst, temp);
}
*/
// assert(ArrayCopyPartialInlineSize <= 64,""); Not full 8252848 merged
mov64(dst, -1L);
bzhiq(dst, dst, len);
}
#endif // _LP64

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1627,7 +1627,7 @@ const bool Matcher::match_rule_supported(int opcode) {
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
if (UseAVX < 3) {
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
return false;
}
break;
Expand Down

0 comments on commit d781571

Please sign in to comment.