diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv index d071463f4..001545d10 100644 --- a/hardware/include/ara_pkg.sv +++ b/hardware/include/ara_pkg.sv @@ -151,6 +151,8 @@ package ara_pkg; VCPOP, VFIRST, // Mask operations VMANDNOT, VMAND, VMOR, VMXOR, VMORNOT, VMNAND, VMNOR, VMXNOR, + // Complex permutations + VRGATHER, VRGATHEREI16, VCOMPRESS, // Scalar moves from VRF VMVXS, VFMVFS, // Slide instructions diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index fe883e534..968fe6edc 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -623,6 +623,22 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b001001: ara_req.op = ara_pkg::VAND; 6'b001010: ara_req.op = ara_pkg::VOR; 6'b001011: ara_req.op = ara_pkg::VXOR; + 6'b001100: begin + ara_req.op = ara_pkg::VRGATHER; + ara_req.eew_vs1 = eew_q[ara_req.vs1]; + // The MASKU will ask for elements from vs2 through the MaskB opqueue + // and deshuffle them with eew_vd_op encoding + ara_req.eew_vd_op = eew_q[ara_req.vs2]; + end + 6'b001110: begin // VRGATHEREI16 + ara_req.op = ara_pkg::VRGATHEREI16; + // This allows the MASKU to deshuffle vs1 correctly + // Then, the MASKU will treat vs1 with eew == EW16 + ara_req.eew_vs1 = eew_q[ara_req.vs1]; + // The MASKU will ask for elements from vs2 through the MaskB opqueue + // and deshuffle them with eew_vd_op encoding + ara_req.eew_vd_op = eew_q[ara_req.vs2]; + end 6'b010000: begin ara_req.op = ara_pkg::VADC; @@ -862,6 +878,12 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b001001: ara_req.op = ara_pkg::VAND; 6'b001010: ara_req.op = ara_pkg::VOR; 6'b001011: ara_req.op = ara_pkg::VXOR; + 6'b001100: begin + ara_req.op = ara_pkg::VRGATHER; + // The MASKU will ask for elements from vs2 through the MaskB opqueue + // and deshuffle them with eew_vd_op encoding + ara_req.eew_vd_op = eew_q[ara_req.vs2]; + end 6'b001110: begin ara_req.op = ara_pkg::VSLIDEUP; ara_req.stride = acc_req_i.rs1; @@ -1100,6 +1122,12 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b001001: ara_req.op = ara_pkg::VAND; 6'b001010: ara_req.op = ara_pkg::VOR; 6'b001011: ara_req.op = ara_pkg::VXOR; + 6'b001100: begin + ara_req.op = ara_pkg::VRGATHER; + // The MASKU will ask for elements from vs2 through the MaskB opqueue + // and deshuffle them with eew_vd_op encoding + ara_req.eew_vd_op = eew_q[ara_req.vs2]; + end 6'b001110: begin ara_req.op = ara_pkg::VSLIDEUP; ara_req.stride = {{ELEN{insn.varith_type.rs1[19]}}, insn.varith_type.rs1}; @@ -1460,6 +1488,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b001001: ara_req.op = ara_pkg::VAADD; 6'b001010: ara_req.op = ara_pkg::VASUBU; 6'b001011: ara_req.op = ara_pkg::VASUB; + 6'b010111: begin + ara_req.op = ara_pkg::VCOMPRESS; + // Encoding corresponding to unmasked operations are reserved + if (!insn.varith_type.vm) illegal_insn = 1'b1; + end 6'b011000: begin ara_req.op = ara_pkg::VMANDNOT; // The source operands should have the same byte encoding diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index ce61ab46f..cdd2efb6d 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -221,6 +221,10 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( logic [NrLanes*DataWidth-1:0] alu_result_vm, alu_result_vm_m, alu_result_vm_shuf; logic found_one, found_one_d, found_one_q; + // VRGATHER/VCOMPRESS signals + // Current vrgather index + logic [15:0] vrgat_idx; + // How many elements we are processing per cycle logic [idx_width(NrLanes*DataWidth):0] delta_elm_d, delta_elm_q; @@ -685,12 +689,6 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( logic [NrLanes*DataWidth-1:0] vrgat_res; // Buffer for the current element logic [DataWidth-1:0] vrgat_buf; - // EEW of the current element - vsew_e vrgat_eew; - // Max 64Ki elements to count - logic [15:0] vrgat_cnt; - // Current index - logic [15:0] vrgat_idx; // Extract the correct elements vrgat_res = '1; // Default assignment @@ -986,6 +984,12 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( // VCOMPRESS + + // Stream the address requests into the lanes in parallel + + // Save the indices into the MASKU ALU vrgather/vcompress queue for later use + + /////////////////////// // MASKU ALU Control // ///////////////////////