Skip to content

Commit

Permalink
[hardware] WIP: vrgather / vcompress debug
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Nov 25, 2024
1 parent fd96b2d commit ef73126
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 43 deletions.
4 changes: 4 additions & 0 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,10 @@ package ara_pkg;
// VRGATHER / VCOMPRESS //
//////////////////////////

// Buffer more elements in MaskB opqueue
// This should be a power of 2
localparam VrgatherOpQueueBufDepth = 2;

// Indices are 16-bit at most because of RISC-V V VLEN limitation at 64Kibit
typedef logic [$clog2(rvv_pkg::RISCV_MAX_VLEN)-1:0] max_vlen_t;

Expand Down
6 changes: 6 additions & 0 deletions hardware/src/lane/lane.sv
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ module lane import ara_pkg::*; import rvv_pkg::*; #(
logic [NrVInsn-1:0] alu_vinsn_done;
logic mfpu_ready;
logic [NrVInsn-1:0] mfpu_vinsn_done;
// Interface with the MaskB operand queue (VRGATHER/VCOMPRESS)
logic mask_b_cmd_pop;

// Additional signals to please Verilator's hierarchical verilation
pe_req_t pe_req;
Expand Down Expand Up @@ -246,6 +248,8 @@ module lane import ara_pkg::*; import rvv_pkg::*; #(
.operand_request_ready_i(operand_request_ready),
.alu_vinsn_done_o (alu_vinsn_done_o ),
.mfpu_vinsn_done_o (mfpu_vinsn_done_o ),
// Interface with the Operand Queue
.mask_b_cmd_pop_i (mask_b_cmd_pop ),
// Interface with the VFUs
.vfu_operation_o (vfu_operation ),
.vfu_operation_valid_o (vfu_operation_valid ),
Expand Down Expand Up @@ -437,6 +441,8 @@ module lane import ara_pkg::*; import rvv_pkg::*; #(
.operand_queue_ready_o (operand_queue_ready ),
.operand_queue_cmd_i (operand_queue_cmd ),
.operand_queue_cmd_valid_i (operand_queue_cmd_valid ),
// Interface with the Lane Sequencer
.mask_b_cmd_pop_o (mask_b_cmd_pop ),
// Interface with the VFUs
// ALU
.alu_operand_o (alu_operand ),
Expand Down
29 changes: 26 additions & 3 deletions hardware/src/lane/lane_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
input logic [NrOperandQueues-1:0] operand_request_ready_i,
output logic alu_vinsn_done_o,
output logic mfpu_vinsn_done_o,
// Interface with the Operand Queue (MaskB - for VRGATHER)
input logic mask_b_cmd_pop_i,
// Interface with the lane's VFUs
output vfu_operation_t vfu_operation_o,
output logic vfu_operation_valid_o,
Expand Down Expand Up @@ -167,6 +169,8 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
vrgat_req_t masku_vrgat_req_q;
logic masku_vrgat_req_ready_d, masku_vrgat_req_valid_q;

logic [idx_width(VrgatherOpQueueBufDepth)-1:0] vrgat_cmd_req_cnt_d, vrgat_cmd_req_cnt_q;

spill_register #(
.T ( vrgat_req_t )
) i_spill_register_vrgat_req (
Expand All @@ -183,6 +187,10 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
always_comb begin
masku_vrgat_req_ready_d = 1'b0;

vrgat_state_d = vrgat_state_q;

vrgat_cmd_req_cnt_d = vrgat_cmd_req_cnt_q;

// If MASKU request arrives, wait until the MaskB requester is free
// Also, lock the MaskB opqueue
unique case (vrgat_state_q)
Expand All @@ -192,8 +200,17 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
end
end
REQUESTING: begin
// Pop if the operand requester is ready to accept a request
masku_vrgat_req_ready_d = masku_vrgat_req_valid_q && !(operand_request_valid_o[MaskB]);
// Pop if the operand requester and the queue are ready to accept requests
masku_vrgat_req_ready_d = masku_vrgat_req_valid_q & !(operand_request_valid_o[MaskB])
& (vrgat_cmd_req_cnt_q != (VrgatherOpQueueBufDepth-1));

// Increase the counter if we handshake
if (masku_vrgat_req_ready_d)
vrgat_cmd_req_cnt_d = vrgat_cmd_req_cnt_q + 1;
// Decrease the counter if the MaskB opqueue popped a cmd
if (mask_b_cmd_pop_i)
vrgat_cmd_req_cnt_d = vrgat_cmd_req_cnt_q - 1;

// If the MASKU is over with VRGATHER/VCOMPRESS, return to idle
if (masku_vrgat_req_ready_d && masku_vrgat_req_q.is_last_req) begin
vrgat_state_d = IDLE;
Expand Down Expand Up @@ -874,7 +891,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
hazard : '0,
default : '0
};
operand_request_push[MaskB] = masku_vrgat_req_valid_q && !(operand_request_valid_o[MaskB]);
operand_request_push[MaskB] = masku_vrgat_req_ready_d;
end
end: sequencer

Expand All @@ -888,6 +905,9 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::

alu_vinsn_done_o <= 1'b0;
mfpu_vinsn_done_o <= 1'b0;

vrgat_state_q <= IDLE;
vrgat_cmd_req_cnt_q <= '0;
end else begin
vinsn_done_q <= vinsn_done_d;
vinsn_running_q <= vinsn_running_d;
Expand All @@ -897,6 +917,9 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::

alu_vinsn_done_o <= alu_vinsn_done_d;
mfpu_vinsn_done_o <= mfpu_vinsn_done_d;

vrgat_state_q <= vrgat_state_d;
vrgat_cmd_req_cnt_q <= vrgat_cmd_req_cnt_d;
end
end

Expand Down
14 changes: 13 additions & 1 deletion hardware/src/lane/operand_queue.sv
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
parameter int unsigned NrSlaves = 1,
parameter int unsigned NrLanes = 0,
parameter int unsigned VLEN = 0,
parameter bit IsVrgatherOpqueue = 0,
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
// Supported conversions
Expand All @@ -37,6 +38,8 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Interface with the Operand Requester
input operand_queue_cmd_t operand_queue_cmd_i,
input logic operand_queue_cmd_valid_i,
// Interface with the Lane Sequencer
output logic mask_b_cmd_pop_o,
// Interface with the Vector Register File
input elen_t operand_i,
input logic operand_valid_i,
Expand All @@ -54,7 +57,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
//////////////////////

operand_queue_cmd_t cmd;
logic cmd_pop;
logic cmd_pop, cmd_pop_q;

fifo_v3 #(
.DEPTH(CmdBufDepth ),
Expand All @@ -73,6 +76,13 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
.usage_o (/* Unused */ )
);

// If this is the MaskB opqueue, propagate the
// pop information for the cmd buffer
if (IsVrgatherOpqueue)
assign mask_b_cmd_pop_o = cmd_pop_q;
else
assign mask_b_cmd_pop_o = 1'b0;

//////////////
// Buffer //
//////////////
Expand Down Expand Up @@ -123,8 +133,10 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
always_ff @(posedge clk_i or negedge rst_ni) begin: p_ibuf_usage_ff
if (!rst_ni) begin
ibuf_usage_q <= '0;
cmd_pop_q <= 1'b0;
end else begin
ibuf_usage_q <= ibuf_usage_d;
cmd_pop_q <= cmd_pop;
end
end

Expand Down
33 changes: 24 additions & 9 deletions hardware/src/lane/operand_queues_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
output logic [NrOperandQueues-1:0] operand_queue_ready_o,
input operand_queue_cmd_t [NrOperandQueues-1:0] operand_queue_cmd_i,
input logic [NrOperandQueues-1:0] operand_queue_cmd_valid_i,
// Interface with the Lane Sequencer
output logic mask_b_cmd_pop_o,
// Interface with the VFUs
// ALU
output elen_t [1:0] alu_operand_o,
Expand Down Expand Up @@ -73,6 +75,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[AluA] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[AluA]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[AluA] ),
.operand_valid_i (operand_valid_i[AluA] ),
.operand_issued_i (operand_issued_i[AluA] ),
Expand Down Expand Up @@ -102,6 +105,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[AluB] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[AluB]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[AluB] ),
.operand_valid_i (operand_valid_i[AluB] ),
.operand_issued_i (operand_issued_i[AluB] ),
Expand Down Expand Up @@ -133,6 +137,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[MulFPUA] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[MulFPUA]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[MulFPUA] ),
.operand_valid_i (operand_valid_i[MulFPUA] ),
.operand_issued_i (operand_issued_i[MulFPUA] ),
Expand Down Expand Up @@ -160,6 +165,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[MulFPUB] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[MulFPUB]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[MulFPUB] ),
.operand_valid_i (operand_valid_i[MulFPUB] ),
.operand_issued_i (operand_issued_i[MulFPUB] ),
Expand Down Expand Up @@ -187,6 +193,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[MulFPUC] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[MulFPUC]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[MulFPUC] ),
.operand_valid_i (operand_valid_i[MulFPUC] ),
.operand_issued_i (operand_issued_i[MulFPUC] ),
Expand Down Expand Up @@ -215,6 +222,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[StA] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[StA]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[StA] ),
.operand_valid_i (operand_valid_i[StA] ),
.operand_issued_i (operand_issued_i[StA] ),
Expand Down Expand Up @@ -259,6 +267,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[SlideAddrGenA] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[SlideAddrGenA] ),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[SlideAddrGenA] ),
.operand_valid_i (operand_valid_i[SlideAddrGenA] ),
.operand_issued_i (operand_issued_i[SlideAddrGenA] ),
Expand All @@ -274,22 +283,24 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
/////////////////

operand_queue #(
.CmdBufDepth (MaskuInsnQueueDepth ),
.DataBufDepth (1 ),
.FPUSupport (FPUSupportNone ),
.SupportIntExt2 (1'b1 ),
.SupportIntExt4 (1'b1 ),
.SupportIntExt8 (1'b1 ),
.NrLanes (NrLanes ),
.VLEN (VLEN ),
.operand_queue_cmd_t(operand_queue_cmd_t )
.CmdBufDepth (MaskuInsnQueueDepth + VrgatherOpQueueBufDepth ),
.DataBufDepth (MaskuInsnQueueDepth + VrgatherOpQueueBufDepth ),
.IsVrgatherOpqueue (1'b1 ),
.FPUSupport (FPUSupportNone ),
.SupportIntExt2 (1'b1 ),
.SupportIntExt4 (1'b1 ),
.SupportIntExt8 (1'b1 ),
.NrLanes (NrLanes ),
.VLEN (VLEN ),
.operand_queue_cmd_t(operand_queue_cmd_t )
) i_operand_queue_mask_b (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i (1'b0 ),
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[MaskB] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[MaskB]),
.mask_b_cmd_pop_o (mask_b_cmd_pop_o ),
.operand_i (operand_i[MaskB] ),
.operand_valid_i (operand_valid_i[MaskB] ),
.operand_issued_i (operand_issued_i[MaskB] ),
Expand All @@ -314,6 +325,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.lane_id_i (lane_id_i ),
.operand_queue_cmd_i (operand_queue_cmd_i[MaskM] ),
.operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[MaskM]),
.mask_b_cmd_pop_o (/* Unused */ ),
.operand_i (operand_i[MaskM] ),
.operand_valid_i (operand_valid_i[MaskM] ),
.operand_issued_i (operand_issued_i[MaskM] ),
Expand All @@ -324,4 +336,7 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
.operand_ready_i (mask_operand_ready_i[0] )
);

// Checks
if (VrgatherOpQueueBufDepth % 2 != 0) $fatal(1, "Parameter VrgatherOpQueueBufDepth must be power of 2.");

endmodule : operand_queues_stage
2 changes: 2 additions & 0 deletions hardware/src/lane/operand_requester.sv
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
output logic [NrOperandQueues-1:0] operand_issued_o,
output operand_queue_cmd_t [NrOperandQueues-1:0] operand_queue_cmd_o,
output logic [NrOperandQueues-1:0] operand_queue_cmd_valid_o,
// VRGATHER/VCOMPRESS support
output logic masku_b_operand_queue_ready_o,
// Interface with the VFUs
// ALU
input logic alu_result_req_i,
Expand Down
4 changes: 2 additions & 2 deletions hardware/src/lane/valu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -786,13 +786,13 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
//////////////////////////////

if (!vinsn_queue_full && vfu_operation_valid_i &&
(vfu_operation_i.vfu == VFU_Alu || vfu_operation_i.op inside {[VMSEQ:VMXNOR]})) begin
(vfu_operation_i.vfu == VFU_Alu || vfu_operation_i.op inside {[VMSEQ:VCOMPRESS]})) begin
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt] = vfu_operation_i;
// Do not wait for masks if, during a reduction, this lane is just a pass-through
// The only valid instructions here with vl == '0 are reductions
// Instructions that execute in the mask unit will process the mask there directly
// VMADC/VMSBC requires mask bits in the ALU
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].vm = vfu_operation_i.op inside {[VMSEQ:VMXNOR]} && !(vfu_operation_i.op inside {[VMADC:VMSBC]})
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].vm = vfu_operation_i.op inside {[VMSEQ:VCOMPRESS]} && !(vfu_operation_i.op inside {[VMADC:VMSBC]})
? 1'b1
: vfu_operation_i.vm | (vfu_operation_i.vl == '0);

Expand Down
Loading

0 comments on commit ef73126

Please sign in to comment.