From ab31d6e3b75d3b28dd62097d3d06e731ef8e2c78 Mon Sep 17 00:00:00 2001
From: Joshua Zhu <yueshi.zwj@alibaba-inc.com>
Date: Thu, 2 Nov 2023 11:48:09 +0800
Subject: [PATCH] Support the automatic init of K register mask by JVM options

Summary: Introduce "reg_mask_init" in x86 platform.
---
 src/hotspot/cpu/x86/c2_init_x86.cpp        |  3 +++
 src/hotspot/cpu/x86/macroAssembler_x86.hpp |  7 +------
 src/hotspot/cpu/x86/x86.ad                 |  3 ++-
 src/hotspot/cpu/x86/x86_32.ad              | 12 ++++++++++++
 src/hotspot/cpu/x86/x86_64.ad              | 12 ++++++++++++
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/src/hotspot/cpu/x86/c2_init_x86.cpp b/src/hotspot/cpu/x86/c2_init_x86.cpp
index 522af0038ae..81f1e4e50c3 100644
--- a/src/hotspot/cpu/x86/c2_init_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_init_x86.cpp
@@ -29,6 +29,8 @@
 
 // processor dependent initialization for i486
 
+extern void reg_mask_init();
+
 void Compile::pd_compiler2_init() {
   guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
   // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could
@@ -58,4 +60,5 @@ void Compile::pd_compiler2_init() {
       OptoReg::invalidate(i);
     }
   }
+  reg_mask_init();
 }
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 067f815a77f..b0ae16f4cec 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -40,6 +40,7 @@ class MacroAssembler: public Assembler {
 
  public:
 
+  // special instructions for EVEX
   void setvectmask(Register dst, Register src, KRegister mask);
   void restorevectmask(KRegister mask);
 
@@ -160,12 +161,6 @@ class MacroAssembler: public Assembler {
   void incrementq(Register reg, int value = 1);
   void incrementq(Address dst, int value = 1);
 
-#ifdef COMPILER2
-  // special instructions for EVEX
-  void setvectmask(Register dst, Register src);
-  void restorevectmask();
-#endif
-
   // Support optimal SSE move instructions.
   void movflt(XMMRegister dst, XMMRegister src) {
     if (dst-> encoding() == src->encoding()) return;
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 0c8b9ba503c..3327b8497d5 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -695,7 +695,8 @@ alloc_class chunk2(K7, K7_H,
                    K2, K2_H,
                    K1, K1_H);
 
-reg_class  vectmask_reg(K2, K2_H,
+reg_class  vectmask_reg(K1, K1_H,
+                        K2, K2_H,
                         K3, K3_H,
                         K4, K4_H,
                         K5, K5_H,
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index bdc5ea35a32..1e7eb575f09 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -263,6 +263,18 @@ source %{
 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 // fast versions of NegF/NegD and AbsF/AbsD.
 
+void reg_mask_init() {
+  if (Matcher::has_predicated_vectors()) {
+    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
+    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
+    // curruption of value held in K1 register.
+    if (PostLoopMultiversioning) {
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
+    }
+  }
+}
+
 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index d5477fea012..6e08e476a56 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -550,6 +550,18 @@ source %{
 
 #define __ _masm.
 
+void reg_mask_init() {
+  if (Matcher::has_predicated_vectors()) {
+    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
+    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
+    // curruption of value held in K1 register.
+    if (PostLoopMultiversioning) {
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
+      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
+    }
+  }
+}
+
 static bool generate_vzeroupper(Compile* C) {
   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 }