From ab31d6e3b75d3b28dd62097d3d06e731ef8e2c78 Mon Sep 17 00:00:00 2001 From: Joshua Zhu Date: Thu, 2 Nov 2023 11:48:09 +0800 Subject: [PATCH] Support the automatic init of K register mask by JVM options Summary: Introduce "reg_mask_init" in x86 platform. --- src/hotspot/cpu/x86/c2_init_x86.cpp | 3 +++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 7 +------ src/hotspot/cpu/x86/x86.ad | 3 ++- src/hotspot/cpu/x86/x86_32.ad | 12 ++++++++++++ src/hotspot/cpu/x86/x86_64.ad | 12 ++++++++++++ 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_init_x86.cpp b/src/hotspot/cpu/x86/c2_init_x86.cpp index 522af0038ae..81f1e4e50c3 100644 --- a/src/hotspot/cpu/x86/c2_init_x86.cpp +++ b/src/hotspot/cpu/x86/c2_init_x86.cpp @@ -29,6 +29,8 @@ // processor dependent initialization for i486 +extern void reg_mask_init(); + void Compile::pd_compiler2_init() { guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could @@ -58,4 +60,5 @@ void Compile::pd_compiler2_init() { OptoReg::invalidate(i); } } + reg_mask_init(); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 067f815a77f..b0ae16f4cec 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -40,6 +40,7 @@ class MacroAssembler: public Assembler { public: + // special instructions for EVEX void setvectmask(Register dst, Register src, KRegister mask); void restorevectmask(KRegister mask); @@ -160,12 +161,6 @@ class MacroAssembler: public Assembler { void incrementq(Register reg, int value = 1); void incrementq(Address dst, int value = 1); -#ifdef COMPILER2 - // special instructions for EVEX - void setvectmask(Register dst, Register src); - void restorevectmask(); -#endif - // Support optimal SSE move instructions. void movflt(XMMRegister dst, XMMRegister src) { if (dst-> encoding() == src->encoding()) return; diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 0c8b9ba503c..3327b8497d5 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -695,7 +695,8 @@ alloc_class chunk2(K7, K7_H, K2, K2_H, K1, K1_H); -reg_class vectmask_reg(K2, K2_H, +reg_class vectmask_reg(K1, K1_H, + K2, K2_H, K3, K3_H, K4, K4_H, K5, K5_H, diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index bdc5ea35a32..1e7eb575f09 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -263,6 +263,18 @@ source %{ // instructions, to allow sign-masking or sign-bit flipping. They allow // fast versions of NegF/NegD and AbsF/AbsD. +void reg_mask_init() { + if (Matcher::has_predicated_vectors()) { + // Post-loop multi-versioning expects mask to be present in K1 register, till the time + // its fixed, RA should not be allocting K1 register, this shall prevent any accidental + // curruption of value held in K1 register. + if (PostLoopMultiversioning) { + const_cast(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); + const_cast(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); + } + } +} + // Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index d5477fea012..6e08e476a56 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -550,6 +550,18 @@ source %{ #define __ _masm. +void reg_mask_init() { + if (Matcher::has_predicated_vectors()) { + // Post-loop multi-versioning expects mask to be present in K1 register, till the time + // its fixed, RA should not be allocting K1 register, this shall prevent any accidental + // curruption of value held in K1 register. + if (PostLoopMultiversioning) { + const_cast(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); + const_cast(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); + } + } +} + static bool generate_vzeroupper(Compile* C) { return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper }