diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 7005c4088b9..c8d7ee20e3a 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -2815,6 +2815,22 @@ void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool mer emit_operand(dst, src); } +void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_avx512vlbw(), ""); + assert(src != xnoreg, "sanity"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); + emit_int8(0x7F); + emit_operand(src, dst); +} + void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 0f4f36c76de..612ebc4cc24 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1542,6 +1542,7 @@ class Assembler : public AbstractAssembler { void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len); void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 89cdbee5604..d2134a270d3 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -987,6 +987,22 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2); reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2); } + +void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) { + /* Not introduce full 8252848, will be changed in JDK-8261553 and JDK-8262355 + if (ArrayCopyPartialInlineSize <= 32) { + mov64(dst, 1); + shlxq(dst, dst, len); + decq(dst); + } else { + mov64(dst, -1); + movq(temp, len); + negptr(temp); + addptr(temp, 64); + shrxq(dst, dst, temp); + } + */ +} #endif // _LP64 void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { @@ -1033,6 +1049,15 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2); } +void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { + MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); +} + +void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { + MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); +} + + void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src, XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 5e62c3705d3..33db61e9bd8 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -67,6 +67,9 @@ void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); + void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); + void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); + // extract void extract(BasicType typ, Register dst, XMMRegister src, int idx); XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); @@ -90,6 +93,7 @@ void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); #ifdef _LP64 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void genmask(Register dst, Register len, Register temp); #endif // _LP64 // dst = reduce(op, src2) using vtmp as temps diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index f411993b773..0004ce8ba49 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10578,6 +10578,55 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len bind(done); } + +void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { + switch(type) { + case T_BYTE: + case T_BOOLEAN: + evmovdqub(dst, kmask, src, false, vector_len); + break; + case T_CHAR: + case T_SHORT: + evmovdquw(dst, kmask, src, false, vector_len); + break; + case T_INT: + case T_FLOAT: + evmovdqul(dst, kmask, src, false, vector_len); + break; + case T_LONG: + case T_DOUBLE: + evmovdquq(dst, kmask, src, false, vector_len); + break; + default: + fatal("Unexpected type argument %s", type2name(type)); + break; + } +} + +void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { + switch(type) { + case T_BYTE: + case T_BOOLEAN: + evmovdqub(dst, kmask, src, true, vector_len); + break; + case T_CHAR: + case T_SHORT: + evmovdquw(dst, kmask, src, true, vector_len); + break; + case T_INT: + case T_FLOAT: + evmovdqul(dst, kmask, src, true, vector_len); + break; + case T_LONG: + case T_DOUBLE: + evmovdquq(dst, kmask, src, true, vector_len); + break; + default: + fatal("Unexpected type argument %s", type2name(type)); + break; + } +} + Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { switch (cond) { // Note some conditions are synonyms for others diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 461304c1d33..b1b856d0a5f 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1129,10 +1129,14 @@ class MacroAssembler: public Assembler { void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); // AVX512 Unaligned + void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); + void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); + void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } + void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 7dd6dd4bde2..499493e6aa2 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1624,6 +1624,13 @@ const bool Matcher::match_rule_supported(int opcode) { return false; } break; + case Op_VectorMaskGen: + case Op_LoadVectorMasked: + case Op_StoreVectorMasked: + if (UseAVX < 3) { + return false; + } + break; case Op_SqrtF: if (UseSSE < 1) { return false; @@ -1692,6 +1699,16 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; + case Op_VectorMaskGen: + case Op_LoadVectorMasked: + case Op_StoreVectorMasked: + if (!VM_Version::supports_avx512bw()) { + return false; + } + if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { + return false; + } + break; case Op_CMoveVD: if (vlen != 4) { return false; // implementation limitation (only vcmov4D_reg is present) @@ -7887,3 +7904,51 @@ instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ %} ins_pipe( pipe_slow ); %} + +#ifdef _LP64 +// ---------------------------------- Masked Block Copy ------------------------------------ + +instruct vmasked_load64(vec dst, memory mem, rRegL mask) %{ + match(Set dst (LoadVectorMasked mem mask)); + format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} + ins_encode %{ + BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); + int vector_len = vector_length_encoding(this); + __ kmovql(k2, $mask$$Register); + __ evmovdqu(elmType, k2, $dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmask_gen(rRegL dst, rRegL len, rRegL tempLen) %{ + match(Set dst (VectorMaskGen len)); + effect(TEMP_DEF dst, TEMP tempLen); + format %{ "vector_mask_gen $len \t! vector mask generator" %} + ins_encode %{ + __ genmask($dst$$Register, $len$$Register, $tempLen$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmask_gen_imm(rRegL dst, immL len) %{ + match(Set dst (VectorMaskGen len)); + format %{ "vector_mask_gen $len \t! vector mask generator" %} + ins_encode %{ + __ mov64($dst$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmasked_store64(memory mem, vec src, rRegL mask) %{ + match(Set mem (StoreVectorMasked mem (Binary src mask))); + format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} + ins_encode %{ + const MachNode* src_node = static_cast(this->in(this->operand_index($src))); + BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); + int vector_len = vector_length_encoding(src_node); + __ kmovql(k2, $mask$$Register); + __ evmovdqu(elmType, k2, $mem$$Address, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 diff --git a/src/hotspot/share/adlc/forms.cpp b/src/hotspot/share/adlc/forms.cpp index 7037c54cc7e..f8038dc4c32 100644 --- a/src/hotspot/share/adlc/forms.cpp +++ b/src/hotspot/share/adlc/forms.cpp @@ -267,6 +267,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const { if( strcmp(opType,"LoadS")==0 ) return Form::idealS; if( strcmp(opType,"LoadVector")==0 ) return Form::idealV; if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV; + if( strcmp(opType,"LoadVectorMasked")==0 ) return Form::idealV; assert( strcmp(opType,"Load") != 0, "Must type Loads" ); return Form::none; } @@ -284,6 +285,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const { if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass; if( strcmp(opType,"StoreVector")==0 ) return Form::idealV; if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV; + if( strcmp(opType,"StoreVectorMasked")==0 ) return Form::idealV; assert( strcmp(opType,"Store") != 0, "Must type Stores" ); return Form::none; } diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index 3c0a9267e51..6e387556b42 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -779,8 +779,9 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const { !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") || #endif !strcmp(_matrule->_rChild->_opType,"StrInflatedCopy") || + !strcmp(_matrule->_rChild->_opType,"VectorMaskGen")|| !strcmp(_matrule->_rChild->_opType,"CompareAndExchangeP") || - !strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true; + !strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true; else if ( is_ideal_load() == Form::idealP ) return true; else if ( is_ideal_store() != Form::none ) return true; @@ -3511,7 +3512,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { "StoreB","StoreC","Store" ,"StoreFP", "LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , - "StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter", + "StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter", "LoadVectorMasked", "StoreVectorMasked", "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned", "LoadPLocked", "StorePConditional", "StoreIConditional", "StoreLConditional", @@ -4195,7 +4196,7 @@ bool MatchRule::is_vector() const { "VectorRearrange","VectorLoadShuffle", "VectorLoadConst", "VectorCastB2X", "VectorCastS2X", "VectorCastI2X", "VectorCastL2X", "VectorCastF2X", "VectorCastD2X", - "VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret", + "VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret","LoadVectorMasked","StoreVectorMasked", "FmaVD", "FmaVF","PopCountVI", // Next are not supported currently. "PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D", diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index c95115f25ac..37aecbf0899 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -394,6 +394,9 @@ macro(LoadVector) macro(LoadVectorGather) macro(StoreVector) macro(StoreVectorScatter) +macro(LoadVectorMasked) +macro(StoreVectorMasked) +macro(VectorMaskGen) macro(Pack) macro(PackB) macro(PackS) diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 928596e0f07..fcbaf88017c 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -3743,6 +3743,9 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { case Op_StoreVector: case Op_LoadVectorGather: case Op_StoreVectorScatter: + case Op_VectorMaskGen: + case Op_LoadVectorMasked: + case Op_StoreVectorMasked: break; case Op_AddReductionVI: diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp index 6a6105faf53..86845270e60 100644 --- a/src/hotspot/share/opto/lcm.cpp +++ b/src/hotspot/share/opto/lcm.cpp @@ -718,6 +718,7 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_ case Op_StoreN: case Op_StoreVector: case Op_StoreVectorScatter: + case Op_StoreVectorMasked: case Op_StoreNKlass: for (uint k = 1; k < m->req(); k++) { Node *in = m->in(k); diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index cb9f16b82c9..85d292924d4 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2209,6 +2209,7 @@ void Matcher::find_shared( Node *n ) { case Op_FmaVD: case Op_FmaVF: case Op_MacroLogicV: + case Op_LoadVectorMasked: case Op_ThreadRefetch: // This must be added, otherwise we couldn't match the ThreadRefetchNode. set_shared(n); // Force result into register (it will be anyways) break; @@ -2379,6 +2380,12 @@ void Matcher::find_shared( Node *n ) { n->del_req(3); break; } + case Op_StoreVectorMasked: { + Node* pair = new BinaryNode(n->in(3), n->in(4)); + n->set_req(3, pair); + n->del_req(4); + break; + } case Op_LoopLimit: { Node *pair1 = new BinaryNode(n->in(1),n->in(2)); n->set_req(1,pair1); diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index 63b707bca3c..8a37c58b816 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -157,6 +157,8 @@ class TypeNode; class UnlockNode; class VectorNode; class LoadVectorNode; +class LoadVectorMaskedNode; +class StoreVectorMaskedNode; class LoadVectorGatherNode; class StoreVectorNode; class StoreVectorScatterNode; @@ -699,13 +701,15 @@ class Node { DEFINE_CLASS_ID(Parm, Proj, 4) DEFINE_CLASS_ID(MachProj, Proj, 5) - DEFINE_CLASS_ID(Mem, Node, 4) - DEFINE_CLASS_ID(Load, Mem, 0) + DEFINE_CLASS_ID(Mem, Node, 4) + DEFINE_CLASS_ID(Load, Mem, 0) DEFINE_CLASS_ID(LoadVector, Load, 0) DEFINE_CLASS_ID(LoadVectorGather, LoadVector, 0) + DEFINE_CLASS_ID(LoadVectorMasked, LoadVector, 1) DEFINE_CLASS_ID(Store, Mem, 1) DEFINE_CLASS_ID(StoreVector, Store, 0) DEFINE_CLASS_ID(StoreVectorScatter, StoreVector, 0) + DEFINE_CLASS_ID(StoreVectorMasked, StoreVector, 1) DEFINE_CLASS_ID(LoadStore, Mem, 2) DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0) DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0) diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 723b35c78d4..c9fbe40401c 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -648,6 +648,41 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem, return new StoreVectorNode(ctl, mem, adr, atyp, val); } +Node* LoadVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* mask_len = in(3)->in(1); + const TypeLong* ty = phase->type(mask_len)->isa_long(); + if (ty && ty->is_con()) { + BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type()->array_element_basic_type(); + uint load_sz = type2aelembytes(mask_bt) * ty->get_con(); + if ( load_sz == 32 || load_sz == 64) { + assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size"); + Node* ctr = in(MemNode::Control); + Node* mem = in(MemNode::Memory); + Node* adr = in(MemNode::Address); + return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type())); + } + } + return NULL; +} + +Node* StoreVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* mask_len = in(4)->in(1); + const TypeLong* ty = phase->type(mask_len)->isa_long(); + if (ty && ty->is_con()) { + BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type()->array_element_basic_type(); + uint load_sz = type2aelembytes(mask_bt) * ty->get_con(); + if ( load_sz == 32 || load_sz == 64) { + assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size"); + Node* ctr = in(MemNode::Control); + Node* mem = in(MemNode::Memory); + Node* adr = in(MemNode::Address); + Node* val = in(MemNode::ValueIn); + return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val)); + } + } + return NULL; +} + int ExtractNode::opcode(BasicType bt) { switch (bt) { case T_BOOLEAN: return Op_ExtractUB; diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index eae8d2a9fa0..4c0b9e152db 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -775,6 +775,56 @@ class StoreVectorNode : public StoreNode { idx == MemNode::ValueIn + 1; } }; +class StoreVectorMaskedNode : public StoreVectorNode { + public: + StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask) + : StoreVectorNode(c, mem, dst, at, src) { + assert(mask->bottom_type()->is_long(), "sanity"); + init_class_id(Class_StoreVector); + set_mismatched_access(); + add_req(mask); + } + + virtual int Opcode() const; + + virtual uint match_edge(uint idx) const { + return idx > 1; + } + Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +class LoadVectorMaskedNode : public LoadVectorNode { + public: + LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask) + : LoadVectorNode(c, mem, src, at, vt) { + assert(mask->bottom_type()->is_long(), "sanity"); + init_class_id(Class_LoadVector); + set_mismatched_access(); + add_req(mask); + } + + virtual int Opcode() const; + + virtual uint match_edge(uint idx) const { + return idx > 1; + } + Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +class VectorMaskGenNode : public TypeNode { + public: + VectorMaskGenNode(Node* length, const Type* ty, const Type* ety): TypeNode(ty, 2), _elemType(ety) { + init_req(1, length); + } + + virtual int Opcode() const; + const Type* get_elem_type() { return _elemType;} + virtual uint size_of() const { return sizeof(VectorMaskGenNode); } + + private: + const Type* _elemType; +}; + //=========================Promote_Scalar_to_Vector============================ //------------------------------ReplicateBNode---------------------------------