Support the automatic init of K register mask by JVM options

Summary: Introduce "reg_mask_init" in x86 platform.
JinZhonghui · Nov 2, 2023 · c4d5591 · c4d5591
1 parent db0dd20
commit c4d5591
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 7 deletions.
diff --git a/src/hotspot/cpu/x86/c2_init_x86.cpp b/src/hotspot/cpu/x86/c2_init_x86.cpp
@@ -29,6 +29,8 @@
 
 // processor dependent initialization for i486
 
+extern void reg_mask_init();
+
 void Compile::pd_compiler2_init() {
   guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
   // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could
@@ -58,4 +60,5 @@ void Compile::pd_compiler2_init() {
       OptoReg::invalidate(i);
     }
   }
+  reg_mask_init();
 }
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -40,6 +40,7 @@ class MacroAssembler: public Assembler {
 
  public:
 
+  // special instructions for EVEX
   void setvectmask(Register dst, Register src, KRegister mask);
   void restorevectmask(KRegister mask);
 
@@ -160,12 +161,6 @@ class MacroAssembler: public Assembler {
   void incrementq(Register reg, int value = 1);
   void incrementq(Address dst, int value = 1);
 
-#ifdef COMPILER2
-  // special instructions for EVEX
-  void setvectmask(Register dst, Register src);
-  void restorevectmask();
-#endif
-
   // Support optimal SSE move instructions.
   void movflt(XMMRegister dst, XMMRegister src) {
     if (dst-> encoding() == src->encoding()) return;

diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
@@ -695,7 +695,8 @@ alloc_class chunk2(K7, K7_H,
                    K2, K2_H,
                    K1, K1_H);
 
-reg_class  vectmask_reg(K2, K2_H,
+reg_class  vectmask_reg(K1, K1_H,
+                        K2, K2_H,
                         K3, K3_H,
                         K4, K4_H,
                         K5, K5_H,

diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
@@ -263,6 +263,18 @@ source %{
 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 // fast versions of NegF/NegD and AbsF/AbsD.
 
+void reg_mask_init() {
+  if (Matcher::has_predicated_vectors()) {
+    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
+    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
+    // curruption of value held in K1 register.
+    if (PostLoopMultiversioning) {
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
+    }
+  }
+}
+
 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address

diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
@@ -550,6 +550,18 @@ source %{
 
 #define __ _masm.
 
+void reg_mask_init() {
+  if (Matcher::has_predicated_vectors()) {
+    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
+    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
+    // curruption of value held in K1 register.
+    if (PostLoopMultiversioning) {
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
+    }
+  }
+}
+
 static bool generate_vzeroupper(Compile* C) {
   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 }