Skip to content

Commit

Permalink
hw: Clean up shuffle unit
Browse files Browse the repository at this point in the history
  • Loading branch information
fischeti committed Jul 25, 2024
1 parent bdda4cf commit 3470cd1
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 104 deletions.
1 change: 1 addition & 0 deletions Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ sources:
- hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral_reg_top.sv
- hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv
- hw/snitch_cluster/src/snitch_fpu.sv
- hw/snitch_cluster/src/snitch_shuffle_unit.sv
- hw/snitch_cluster/src/snitch_sequencer.sv
- hw/snitch_cluster/src/snitch_tcdm_interconnect.sv
# Level 1
Expand Down
8 changes: 5 additions & 3 deletions hw/snitch/src/riscv_instr.sv
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,6 @@ package riscv_instr;
localparam logic [31:0] DMSTAT = 32'b0000101?????00000000?????0101011;
localparam logic [31:0] DMSTR = 32'b0000110??????????000000000101011;
localparam logic [31:0] DMREP = 32'b000011100000?????000000000101011;
localparam logic [31:0] DMMCAST = 32'b000100000000?????000000000101011;
localparam logic [31:0] FREP_O = 32'b????????????????????????10001011;
localparam logic [31:0] FREP_I = 32'b????????????????????????00001011;
localparam logic [31:0] IREP = 32'b?????????????????????????0111111;
Expand Down Expand Up @@ -841,8 +840,11 @@ package riscv_instr;
localparam logic [31:0] VL4R_V = 32'b000011101000?????110?????0000111;
localparam logic [31:0] VL8R_V = 32'b000111101000?????111?????0000111;
localparam logic [31:0] VFSHUFFLE_S = 32'b1011111??????????000?????0110011;
localparam logic [31:0] VFSHUFFLE_H = 32'b1011111??????????010?????0110011;
localparam logic [31:0] VFSHUFFLE_B = 32'b1011111??????????011?????0110011;
localparam logic [31:0] VFSHUFFLE_H = 32'b1011111??????????001?????0110011;
localparam logic [31:0] VFSHUFFLE_B = 32'b1011111??????????010?????0110011;
localparam logic [31:0] VFSHUFFLE2_S = 32'b1011111??????????100?????0110011;
localparam logic [31:0] VFSHUFFLE2_H = 32'b1011111??????????101?????0110011;
localparam logic [31:0] VFSHUFFLE2_B = 32'b1011111??????????110?????0110011;
localparam logic [31:0] IMV_X_W = 32'b111000000000?????000?????1011011;
localparam logic [31:0] IMV_W_X = 32'b111100000000?????000?????1011011;
localparam logic [31:0] IADDI = 32'b?????????????????000?????1111011;
Expand Down
39 changes: 27 additions & 12 deletions hw/snitch/src/snitch.sv
Original file line number Diff line number Diff line change
Expand Up @@ -1601,18 +1601,6 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
illegal_inst = 1'b1;
end
end
VFSHUFFLE_S,
VFSHUFFLE_H,
VFSHUFFLE_B: begin
if (FP_EN && XFVEC && RVF) begin
opb_select = Reg;
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
shuffle = 1'b1;
end else begin
illegal_inst = 1'b1;
end
end
VFCVT_S_B,
VFCVTU_S_B: begin
if (FP_EN && XFVEC && RVF && FLEN >= 16) begin
Expand Down Expand Up @@ -1721,6 +1709,33 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
illegal_inst = 1'b1;
end
end
VFSHUFFLE_S,
VFSHUFFLE2_S: begin
if (FP_EN && XFVEC && FLEN >= 64) begin
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
end else begin
illegal_inst = 1'b1;
end
end
VFSHUFFLE_H,
VFSHUFFLE2_H: begin
if (FP_EN && XFVEC && FLEN >= 32) begin
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
end else begin
illegal_inst = 1'b1;
end
end
VFSHUFFLE_B,
VFSHUFFLE2_B: begin
if (FP_EN && XFVEC && FLEN >= 16) begin
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
end else begin
illegal_inst = 1'b1;
end
end
// Offload FP-Int Instructions - fire and forget
// Double Precision Floating-Point
FLE_D,
Expand Down
149 changes: 60 additions & 89 deletions hw/snitch_cluster/src/snitch_fp_ss.sv
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ module snitch_fp_ss import snitch_pkg::*; #(
// Shuffle Unit
logic use_shfl;
logic shfl_in_valid, shfl_in_ready;
logic shfl_valid;
logic shfl_in_ssr;
logic shfl_out_valid, shfl_out_ready;
logic [FLEN-1:0] shfl_result;
tag_t shfl_tag_in, shfl_tag_out;

// FPU Controller
logic fpu_out_valid, fpu_out_ready;
Expand Down Expand Up @@ -246,7 +247,7 @@ module snitch_fp_ss import snitch_pkg::*; #(
// 2. The LSU request can be handled
// 3. The regfile operand is ready
// 4. The Shuffle Unit and all operands are ready
assign shfl_in_valid = use_shfl & acc_req_valid_q & &(op_ready) & dst_ready;
assign shfl_in_valid = use_shfl & (&op_ready) & dst_ready;
assign fpu_in_valid = use_fpu & acc_req_valid_q & (&op_ready) & dst_ready;
// FPU ready
assign acc_req_ready_q = dst_ready & ((fpu_in_ready & fpu_in_valid)
Expand All @@ -259,7 +260,7 @@ module snitch_fp_ss import snitch_pkg::*; #(
| (shfl_in_ready & shfl_in_valid));

// Shuffle Unit is ready to compute when Write Port is ready for shuffle result
assign shfl_in_ready = (!(acc_req_valid_q && result_select == ResAccBus)
assign shfl_out_ready = (!(acc_req_valid_q && result_select == ResAccBus)
& !(fpu_out_valid && !fpu_tag_out.acc) & !lsu_pvalid);

// either the FPU or the regfile produced a result
Expand Down Expand Up @@ -329,13 +330,13 @@ module snitch_fp_ss import snitch_pkg::*; #(
fpu_tag_in.rd = rd;
fpu_tag_in.acc = 1'b0; // RD is on accelerator bus
fpu_tag_in.ssr = ssr_active_q & is_rd_ssr;
shfl_tag_in = fpu_tag_in;

is_store = 1'b0;
is_load = 1'b0;
ls_size = Word;

use_shfl = 1'b0;
shfl_in_ssr = ssr_active_q & is_rd_ssr;

// Destination register is in FPR
rd_is_fp = 1'b1;
Expand Down Expand Up @@ -548,15 +549,24 @@ module snitch_fp_ss import snitch_pkg::*; #(
end
riscv_instr::VFSHUFFLE_S: begin
op_select[0] = RegA;
op_select[1] = AccBus;
op_select[1] = RegB;
src_fmt = fpnew_pkg::FP32;
dst_fmt = fpnew_pkg::FP32;
vectorial_op = 1'b1;
use_fpu = 1'b0;
use_shfl = 1'b1;
end
riscv_instr::VFSHUFFLE2_S: begin
op_select[0] = RegA;
op_select[1] = RegB;
op_select[2] = RegDest;
src_fmt = fpnew_pkg::FP32;
dst_fmt = fpnew_pkg::FP32;
vectorial_op = 1'b1;
set_dyn_rm = 1'b1; // fix round mode for vectors and fp16alt
use_fpu = 1'b0;
use_shfl = 1'b1;
end
op_mode = 1'b1;
end
// Double Precision
riscv_instr::FADD_D: begin
fpu_op = fpnew_pkg::ADD;
Expand Down Expand Up @@ -1127,14 +1137,23 @@ module snitch_fp_ss import snitch_pkg::*; #(
end
riscv_instr::VFSHUFFLE_H: begin
op_select[0] = RegA;
op_select[1] = AccBus;
op_select[1] = RegB;
src_fmt = fpnew_pkg::FP16;
dst_fmt = fpnew_pkg::FP16;
vectorial_op = 1'b1;
use_fpu = 1'b0;
use_shfl = 1'b1;
end
riscv_instr::VFSHUFFLE2_H: begin
op_select[0] = RegA;
op_select[1] = RegB;
op_select[2] = RegDest;
src_fmt = fpnew_pkg::FP16;
dst_fmt = fpnew_pkg::FP16;
vectorial_op = 1'b1;
set_dyn_rm = 1'b1; // fix round mode for vectors and fp16alt
use_fpu = 1'b0;
use_shfl = 1'b1;
op_mode = 1'b1;
end
// [Alternate] Quarter Precision
riscv_instr::FADD_B: begin
Expand Down Expand Up @@ -1658,14 +1677,23 @@ module snitch_fp_ss import snitch_pkg::*; #(
end
riscv_instr::VFSHUFFLE_B: begin
op_select[0] = RegA;
op_select[1] = AccBus;
op_select[1] = RegB;
src_fmt = fpnew_pkg::FP8;
dst_fmt = fpnew_pkg::FP8;
vectorial_op = 1'b1;
use_fpu = 1'b0;
use_shfl = 1'b1;
end
riscv_instr::VFSHUFFLE2_B: begin
op_select[0] = RegA;
op_select[1] = RegB;
op_select[2] = RegDest;
src_fmt = fpnew_pkg::FP8;
dst_fmt = fpnew_pkg::FP8;
vectorial_op = 1'b1;
set_dyn_rm = 1'b1; // fix round mode for vectors and fp16alt
use_fpu = 1'b0;
use_shfl = 1'b1;
op_mode = 1'b1;
end
// -------------------
// From float to int
Expand Down Expand Up @@ -2483,81 +2511,24 @@ module snitch_fp_ss import snitch_pkg::*; #(
// Shuffle Unit
// ----------------------

logic [FLEN-1:0] shfl_result;
logic [7:0] vec_mask;
logic [7:0][2:0] element_mask;
logic [31:0] num_elements;

logic [(FLEN/32)-1:0][31:0] rA_32, rD_32, rA_op_32, rD_op_32;
logic [(FLEN/16)-1:0][15:0] rA_16, rD_16, rA_op_16, rD_op_16;
logic [(FLEN/8)-1:0][7:0] rA_8, rD_8, rA_op_8, rD_op_8;

always_comb begin
shfl_valid = 1'b0;
shfl_result = '0;
vec_mask = '0;
element_mask = '0;
rA_32 = '0;
rD_32 = '0;
rA_16 = '0;
rD_16 = '0;
rA_8 = '0;
rD_8 = '0;

if (shfl_in_valid & shfl_in_ready) begin

for (int i = 0; i < 8; i++) begin
vec_mask[i] = op[1][(i*4)+3];
element_mask[i] = op[1][(i*4) +: 3];
end

unique case (src_fmt)
fpnew_pkg::FP32: begin
num_elements = FLEN/32;

rA_op_32 = op[0];
rD_op_32 = op[2];

for (int i = 0; i < (num_elements); i++) begin

rA_32[i] = rA_op_32[element_mask[i]];
rD_32[i] = rD_op_32[element_mask[i]];

shfl_result[(i*32) +: 32] = vec_mask[i] ? rA_32[i] : rD_32[i];
end
end
fpnew_pkg::FP16: begin
num_elements = FLEN/16;

rA_op_16 = op[0];
rD_op_16 = op[2];

for (int i = 0; i < (num_elements); i++) begin
rA_16[i] = rA_op_16[element_mask[i]];
rD_16[i] = rD_op_16[element_mask[i]];

shfl_result[(i*16) +: 16] = vec_mask[i] ? rA_16[i] : rD_16[i];
end
end
fpnew_pkg::FP8: begin
num_elements = FLEN/8;

rA_op_8 = op[0];
rD_op_8 = op[2];

for (int i = 0; i < (num_elements); i++) begin

rA_8[i] = rA_op_8[element_mask[i]];
rD_8[i] = rD_op_8[element_mask[i]];

shfl_result[(i*8) +: 8] = vec_mask[i] ? rA_8[i] : rD_8[i];
end
end
endcase

shfl_valid = 1'b1;
end
end
snitch_shuffle_unit #(
.XFVEC(XFVEC),
.FLEN (FLEN)
) i_snitch_shuffle_unit (
.clk_i,
.rst_ni ( ~rst_i ),
.operands_i (op),
.op_mod_i (op_mode),
.src_fmt_i (src_fmt),
.dst_fmt_i (dst_fmt),
.tag_i (shfl_tag_in),
.in_valid_i (shfl_in_valid),
.in_ready_o (shfl_in_ready),
.result_o (shfl_result),
.tag_o (shfl_tag_out),
.out_valid_o(shfl_out_valid),
.out_ready_i(shfl_out_ready)
);

// ----------------------
// Operand Select
Expand Down Expand Up @@ -2718,9 +2689,9 @@ module snitch_fp_ss import snitch_pkg::*; #(
fpr_waddr = lsu_rd;
fpr_wvalid = 1'b1;
fpr_wready = 1'b0;
end else if (shfl_valid) begin
end else if (shfl_out_valid) begin
fpr_we = 1'b1;
if (shfl_in_ssr) begin
if (shfl_tag_out.ssr) begin
ssr_wvalid_o = 1'b1;
// stall write-back to SSR
if (!ssr_wready_i) begin
Expand Down
Loading

0 comments on commit 3470cd1

Please sign in to comment.