Skip to content

Commit

Permalink
[Backport] 8256973: Intrinsic creation for VectorMask query (lastTrue…
Browse files Browse the repository at this point in the history
…,firstTrue,trueCount) APIs

Summary: [Backport] 8256973: Intrinsic creation for VectorMask query (lastTrue,firstTrue,trueCount) APIs

Test Plan: ci jtreg

Reviewed-by: JoshuaZhuwj

Issue: #615
  • Loading branch information
JinZhonghui authored and JoshuaZhuwj committed Nov 13, 2023
1 parent fa355c6 commit 40d8643
Show file tree
Hide file tree
Showing 81 changed files with 1,821 additions and 32 deletions.
43 changes: 43 additions & 0 deletions mask.incr
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index 3b8261d91d0..ad4bfd57f53 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -429,8 +429,12 @@ bool LibraryCallKit::inline_vector_mask_operation() {
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
BasicType elem_bt = elem_type->basic_type();

- if (num_elem <= 2) {
- return false;
+ if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
+ if (C->print_intrinsics()) {
+ tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
+ Op_LoadVector, num_elem, type2name(T_BOOLEAN));
+ }
+ return false; // not supported
}

int mopc = VectorSupport::vop2ideal(oper->get_con(), elem_bt);
@@ -439,7 +443,7 @@ bool LibraryCallKit::inline_vector_mask_operation() {
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
mopc, num_elem, type2name(elem_bt));
}
- return false;
+ return false; // not supported
}

const Type* elem_ty = Type::get_const_basic_type(elem_bt);
diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskQueryOperationsBenchmark.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskQueryOperationsBenchmark.java
index 50add676c62..6f9df1800f6 100644
--- a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskQueryOperationsBenchmark.java
+++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskQueryOperationsBenchmark.java
@@ -84,8 +84,8 @@ public class MaskQueryOperationsBenchmark {

@Setup(Level.Trial)
public void BmSetup() {
- bspecies = VectorSpecies.of(int.class, VectorShape.forBitSize(bits));
- sspecies = VectorSpecies.of(int.class, VectorShape.forBitSize(bits));
+ bspecies = VectorSpecies.of(byte.class, VectorShape.forBitSize(bits));
+ sspecies = VectorSpecies.of(short.class, VectorShape.forBitSize(bits));
ispecies = VectorSpecies.of(int.class, VectorShape.forBitSize(bits));
lspecies = VectorSpecies.of(long.class, VectorShape.forBitSize(bits));

8 changes: 8 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9493,6 +9493,14 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
emit_int8((unsigned char)(0xC0 | encode));
}

void Assembler::evpmovb2m(KRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x29, (0xC0 | encode));
}

#ifndef _LP64

void Assembler::incl(Register dst) {
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2474,6 +2474,8 @@ class Assembler : public AbstractAssembler {
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);

void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);

// Vector blends
void blendvps(XMMRegister dst, XMMRegister src);
void blendvpd(XMMRegister dst, XMMRegister src);
Expand Down
51 changes: 51 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1305,5 +1305,56 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
}
}

#ifdef _LP64
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
Register tmp, KRegister ktmp, int masklen, int vec_enc) {
assert(VM_Version::supports_avx512vlbw(), "");
vpxor(xtmp, xtmp, xtmp, vec_enc);
vpsubb(xtmp, xtmp, mask, vec_enc);
evpmovb2m(ktmp, xtmp, vec_enc);
kmovql(tmp, ktmp);
switch(opc) {
case Op_VectorMaskTrueCount:
popcntq(dst, tmp);
break;
case Op_VectorMaskLastTrue:
mov64(dst, -1);
bsrq(tmp, tmp);
cmov(Assembler::notZero, dst, tmp);
break;
case Op_VectorMaskFirstTrue:
mov64(dst, masklen);
bsfq(tmp, tmp);
cmov(Assembler::notZero, dst, tmp);
break;
default: assert(false, "Unhandled mask operation");
}
}

void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
XMMRegister xtmp1, Register tmp, int masklen, int vec_enc) {
assert(VM_Version::supports_avx(), "");
vpxor(xtmp, xtmp, xtmp, vec_enc);
vpsubb(xtmp, xtmp, mask, vec_enc);
vpmovmskb(tmp, xtmp);
switch(opc) {
case Op_VectorMaskTrueCount:
popcntq(dst, tmp);
break;
case Op_VectorMaskLastTrue:
mov64(dst, -1);
bsrq(tmp, tmp);
cmov(Assembler::notZero, dst, tmp);
break;
case Op_VectorMaskFirstTrue:
mov64(dst, masklen);
bsfq(tmp, tmp);
cmov(Assembler::notZero, dst, tmp);
break;
default: assert(false, "Unhandled mask operation");
}
}
#endif

#endif // COMPILER2

10 changes: 10 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,14 @@
// Base reduction instruction
void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);

public:
#ifdef _LP64
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp,
KRegister ktmp, int masklen, int vec_enc);

void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1,
Register tmp, int masklen, int vec_enc);
#endif

#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
69 changes: 69 additions & 0 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,13 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_VectorMaskFirstTrue:
case Op_VectorMaskLastTrue:
case Op_VectorMaskTrueCount:
if (!is_LP64 || UseAVX < 1) {
return false;
}
break;
case Op_SqrtF:
if (UseSSE < 1) {
return false;
Expand Down Expand Up @@ -8064,4 +8071,66 @@ instruct vmasked_store64(memory mem, vec src, kReg mask) %{
%}
ins_pipe( pipe_slow );
%}

instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp) %{
predicate(VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskTrueCount mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp);
format %{ "vector_truecount_evex $mask \t! vector mask true count" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp, rFlagsReg cr) %{
predicate(VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskFirstTrue mask));
match(Set dst (VectorMaskLastTrue mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp, KILL cr);
format %{ "vector_mask_first_or_last_true_evex $mask \t! vector first/last true location" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1) %{
predicate(!VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskTrueCount mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1);
format %{ "vector_truecount_avx $mask \t! vector mask true count" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512vlbw());
match(Set dst (VectorMaskFirstTrue mask));
match(Set dst (VectorMaskLastTrue mask));
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
format %{ "vector_mask_first_or_last_true_avx $mask \t! vector first/last true location" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $mask);
int mask_len = vector_length(this, $mask);
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
7 changes: 6 additions & 1 deletion src/hotspot/share/classfile/vmSymbols.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1600,6 +1600,11 @@
do_alias(vector_rebox_sig, object_object_signature) \
do_name(vector_rebox_name, "maybeRebox") \
\
do_intrinsic(_VectorMaskOp, jdk_internal_vm_vector_VectorSupport, vector_mask_oper_name, vector_mask_oper_sig, F_S) \
do_signature(vector_mask_oper_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)I") \
do_name(vector_mask_oper_name, "maskReductionCoerced") \
\
\
/* (2) Bytecode intrinsics */ \
\
Expand Down Expand Up @@ -1795,7 +1800,7 @@ class vmIntrinsics: AllStatic {
#undef VM_INTRINSIC_ENUM

ID_LIMIT,
LAST_COMPILER_INLINE = _VectorScatterOp,
LAST_COMPILER_INLINE = _VectorMaskOp,
FIRST_MH_SIG_POLY = _invokeGeneric,
FIRST_MH_STATIC = _linkToVirtual,
LAST_MH_SIG_POLY = _linkToInterface,
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_VectorConvert:
case vmIntrinsics::_VectorInsert:
case vmIntrinsics::_VectorExtract:
case vmIntrinsics::_VectorMaskOp:
return EnableVectorSupport;

default:
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/opto/classes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ macro(StoreVectorScatter)
macro(LoadVectorMasked)
macro(StoreVectorMasked)
macro(VectorMaskGen)
macro(VectorMaskOp)
macro(VectorMaskTrueCount)
macro(VectorMaskFirstTrue)
macro(VectorMaskLastTrue)
macro(Pack)
macro(PackB)
macro(PackS)
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_broadcast_coerced();
case vmIntrinsics::_VectorShuffleIota:
return inline_vector_shuffle_iota();
case vmIntrinsics::_VectorMaskOp:
return inline_vector_mask_operation();
case vmIntrinsics::_VectorShuffleToVector:
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorLoadOp:
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_broadcast_coerced();
bool inline_vector_shuffle_to_vector();
bool inline_vector_shuffle_iota();
bool inline_vector_mask_operation();
bool inline_vector_mem_operation(bool is_store);
bool inline_vector_gather_scatter(bool is_scatter);
bool inline_vector_reduction();
Expand Down
54 changes: 54 additions & 0 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,60 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
return true;
}

// <E, M>
// int maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
// int length, M m, VectorMaskOp<M> defaultImpl)
bool LibraryCallKit::inline_vector_mask_operation() {
const TypeInt* oper = gvn().type(argument(0))->isa_int();
const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr();
const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr();
const TypeInt* vlen = gvn().type(argument(3))->isa_int();
Node* mask = argument(4);

if (mask_klass == NULL || elem_klass == NULL || mask->is_top() || vlen == NULL) {
return false; // dead code
}

if (!is_klass_initialized(mask_klass)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** klass argument not initialized");
}
return false;
}

int num_elem = vlen->get_con();
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
BasicType elem_bt = elem_type->basic_type();

if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
Op_LoadVector, num_elem, type2name(T_BOOLEAN));
}
return false; // not supported
}

int mopc = VectorSupport::vop2ideal(oper->get_con(), elem_bt);
if (!arch_supports_vector(mopc, num_elem, elem_bt, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
mopc, num_elem, type2name(elem_bt));
}
return false; // not supported
}

const Type* elem_ty = Type::get_const_basic_type(elem_bt);
ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true);
Node* store_mask = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem));
Node* maskoper = gvn().transform(VectorMaskOpNode::make(store_mask, TypeInt::INT, mopc));
set_result(maskoper);

C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
return true;
}

// <VM ,Sh extends VectorShuffle<E>, E>
// VM shuffleToVector(Class<VM> VecClass, Class<?>E , Class<?> ShuffleClass, Sh s, int length,
// ShuffleToVectorOperation<VM,Sh,E> defaultImpl)
Expand Down
15 changes: 15 additions & 0 deletions src/hotspot/share/opto/vectornode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,21 @@ const TypeFunc* VectorBoxNode::vec_box_type(const TypeInstPtr* box_type) {
return TypeFunc::make(domain, range);
}

Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) {
switch(mopc) {
case Op_VectorMaskTrueCount:
return new VectorMaskTrueCountNode(mask, ty);
case Op_VectorMaskLastTrue:
return new VectorMaskLastTrueNode(mask, ty);
case Op_VectorMaskFirstTrue:
return new VectorMaskFirstTrueNode(mask, ty);
default:
assert(false, "Unhandled operation");
}
return NULL;
}


#ifndef PRODUCT
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
CallStaticJavaNode::dump_spec(st);
Expand Down
Loading

0 comments on commit 40d8643

Please sign in to comment.