Skip to content

Commit

Permalink
[Backport] 8266317: Vector API enhancements
Browse files Browse the repository at this point in the history
Summary: [Backport] 8266317: Vector API enhancements

Test Plan: ci jtreg

Reviewed-by: JoshuaZhuwj

Issue: #617
  • Loading branch information
JinZhonghui committed Nov 13, 2023
1 parent 6262964 commit 1dc8b78
Show file tree
Hide file tree
Showing 117 changed files with 9,739 additions and 858 deletions.
52 changes: 24 additions & 28 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4011,18 +4011,6 @@ void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, Compa
emit_int8(vcc);
}

void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3E);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(vcc);
}

void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
Expand Down Expand Up @@ -9256,7 +9244,7 @@ void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int
}

void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
Expand All @@ -9265,11 +9253,12 @@ void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x1F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
Expand All @@ -9281,13 +9270,14 @@ void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x1F);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
Expand All @@ -9296,11 +9286,12 @@ void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x1F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
// Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
Expand All @@ -9312,13 +9303,14 @@ void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x1F);
int opcode = is_signed ? 0x1F : 0x1E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9328,11 +9320,12 @@ void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x3F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9345,13 +9338,14 @@ void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x3F);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}

void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9361,11 +9355,12 @@ void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegi
attributes.set_embedded_opmask_register_specifier(mask);
attributes.reset_is_clear_context();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x3F, (0xC0 | encode), comparison);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int24(opcode, (0xC0 | encode), comparison);
}

void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len) {
int comparison, bool is_signed, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512bw(), "");
assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
Expand All @@ -9378,7 +9373,8 @@ void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address
attributes.reset_is_clear_context();
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x3F);
int opcode = is_signed ? 0x3F : 0x3E;
emit_int8((unsigned char)opcode);
emit_operand(as_Register(dst_enc), src);
emit_int8((unsigned char)comparison);
}
Expand Down
17 changes: 8 additions & 9 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1742,7 +1742,6 @@ class Assembler : public AbstractAssembler {
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);

void pcmpeqw(XMMRegister dst, XMMRegister src);
Expand Down Expand Up @@ -2452,27 +2451,27 @@ class Assembler : public AbstractAssembler {
// Vector integer compares
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector long compares
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector byte compares
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

// Vector short compares
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
int comparison, bool is_signed, int vector_len);

void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);

Expand Down
86 changes: 82 additions & 4 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,25 +1209,103 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
switch(typ) {
case T_BYTE:
evpcmpb(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_SHORT:
evpcmpw(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_INT:
case T_FLOAT:
evpcmpd(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_LONG:
case T_DOUBLE:
evpcmpq(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
default:
assert(false,"Should not reach here.");
break;
}
}

void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison,
int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) {
int vlen_enc = vector_length_encoding(vlen_in_bytes*2);
switch (typ) {
case T_BYTE:
vpmovzxbw(vtmp1, src1, vlen_enc);
vpmovzxbw(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vpacksswb(dst, dst, dst, vlen_enc);
break;
case T_SHORT:
vpmovzxwd(vtmp1, src1, vlen_enc);
vpmovzxwd(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vpackssdw(dst, dst, dst, vlen_enc);
break;
case T_INT:
vpmovzxdq(vtmp1, src1, vlen_enc);
vpmovzxdq(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpermilps(dst, dst, 8, vlen_enc);
break;
default:
assert(false, "Should not reach here");
}
if (vlen_in_bytes == 16) {
vpermpd(dst, dst, 0x8, vlen_enc);
}
}

void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) {
int vlen_enc = vector_length_encoding(vlen_in_bytes);
switch (typ) {
case T_BYTE:
vpmovzxbw(vtmp1, src1, vlen_enc);
vpmovzxbw(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxbw(vtmp1, vtmp1, vlen_enc);
vpmovzxbw(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
vpacksswb(dst, dst, vtmp3, vlen_enc);
vpermpd(dst, dst, 0xd8, vlen_enc);
break;
case T_SHORT:
vpmovzxwd(vtmp1, src1, vlen_enc);
vpmovzxwd(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxwd(vtmp1, vtmp1, vlen_enc);
vpmovzxwd(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
vpackssdw(dst, dst, vtmp3, vlen_enc);
vpermpd(dst, dst, 0xd8, vlen_enc);
break;
case T_INT:
vpmovzxdq(vtmp1, src1, vlen_enc);
vpmovzxdq(vtmp2, src2, vlen_enc);
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpshufd(dst, dst, 8, vlen_enc);
vpermq(dst, dst, 8, vlen_enc);
vextracti128(vtmp1, src1, 1);
vextracti128(vtmp2, src2, 1);
vpmovzxdq(vtmp1, vtmp1, vlen_enc);
vpmovzxdq(vtmp2, vtmp2, vlen_enc);
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
vpshufd(vtmp3, vtmp3, 8, vlen_enc);
vpermq(vtmp3, vtmp3, 0x80, vlen_enc);
vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc);
break;
default:
assert(false, "Should not reach here");
}
}

void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
switch(typ) {
case T_BYTE:
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);

// vector compare
void vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, Register scratch);
void vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch);

// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

// dst = src1 reduce(op, src2) using vtmp as temps
Expand Down
28 changes: 14 additions & 14 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4245,42 +4245,42 @@ void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
}

void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
int comparison, bool is_signed, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
}
}

void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
int comparison, bool is_signed, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
}
}

void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
int comparison, bool is_signed, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
}
}

void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
int comparison, bool is_signed, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
}
}

Expand Down Expand Up @@ -10336,7 +10336,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(mask2, result);

evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);

Expand Down Expand Up @@ -10386,7 +10386,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(mask2, result);

evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);

Expand Down
Loading

0 comments on commit 1dc8b78

Please sign in to comment.