Skip to content

Commit

Permalink
[hardware] Debugging VIOTA (two commits ago, it worked until masked t…
Browse files Browse the repository at this point in the history
…est)
  • Loading branch information
mp-17 committed Nov 14, 2024
1 parent 73cd355 commit c54eba6
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 133 deletions.
28 changes: 21 additions & 7 deletions hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(

`FF(csr_vstart_q, csr_vstart_d, '0)
`FF(csr_vl_q, csr_vl_d, '0)
`FF(csr_vtype_q, csr_vtype_d, '{vill: 1'b1, default: '0})
`FF(csr_vtype_q, csr_vtype_d, '{vill: 1'b1, vsew: EW8, vlmul: LMUL_1, default: '0})
`FF(csr_vxsat_q, csr_vxsat_d, '0)
`FF(csr_vxrm_q, csr_vxrm_d, '0)
// Converts between the internal representation of `vtype_t` and the full XLEN-bit CSR.
Expand Down Expand Up @@ -505,7 +505,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
(csr_vtype_d.vlmul == LMUL_RSVD) || // reserved value
// LMUL >= SEW/ELEN
(signed'($clog2(ELENB)) + signed'(csr_vtype_d.vlmul) < signed'(csr_vtype_d.vsew))) begin
csr_vtype_d = '{vill: 1'b1, default: '0};
csr_vtype_d = '{vill: 1'b1, vsew: EW8, vlmul: LMUL_1, default: '0};
csr_vl_d = '0;
end

Expand Down Expand Up @@ -1279,12 +1279,26 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req_d.use_vs1 = 1'b0;
ara_req_d.use_vd_op = 1'b1;
ara_req_d.eew_vs2 = eew_q[ara_req_d.vs2]; // Force reshuffle
ara_req_d.eew_vd_op = eew_q[ara_req_d.vd]; // Force reshuffle
ara_req_d.vtype.vsew = eew_q[ara_req_d.vd];
ara_req_d.eew_vd_op = eew_q[ara_req_d.vd];
case (insn.varith_type.rs1)
5'b00001: ara_req_d.op = ara_pkg::VMSBF;
5'b00010: ara_req_d.op = ara_pkg::VMSOF;
5'b00011: ara_req_d.op = ara_pkg::VMSIF;
5'b00001: begin
ara_req_d.op = ara_pkg::VMSBF;
// This is a mask-to-mask operation, vsew does not have any meaning
// So, avoid reshuffling
ara_req_d.vtype.vsew = eew_q[ara_req_d.vd];
end
5'b00010: begin
ara_req_d.op = ara_pkg::VMSOF;
// This is a mask-to-mask operation, vsew does not have any meaning
// So, avoid reshuffling
ara_req_d.vtype.vsew = eew_q[ara_req_d.vd];
end
5'b00011: begin
ara_req_d.op = ara_pkg::VMSIF;
// This is a mask-to-mask operation, vsew does not have any meaning
// So, avoid reshuffling
ara_req_d.vtype.vsew = eew_q[ara_req_d.vd];
end
5'b10000: ara_req_d.op = ara_pkg::VIOTA;
5'b10001: ara_req_d.op = ara_pkg::VID;
endcase
Expand Down
10 changes: 5 additions & 5 deletions hardware/src/lane/lane_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
vfu_operation_d.vl = pe_req.vl / NrLanes;
// If lane_id_i < vl % NrLanes, this lane has to execute one extra micro-operation.
// Also, if the ALU/VMFPU should pre-process data for the MASKU, force a balanced payload
if (lane_id_i < pe_req.vl[idx_width(NrLanes)-1:0] || pe_req.op inside {[VMFEQ:VMXNOR]})
if (lane_id_i < pe_req.vl[idx_width(NrLanes)-1:0] || (|pe_req.vl[idx_width(NrLanes)-1:0] && pe_req.op inside {[VMFEQ:VMXNOR]}))
vfu_operation_d.vl += 1;

// Calculate the start element for Lane[i]. This will be forwarded to both opqueues
Expand Down Expand Up @@ -757,17 +757,17 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
// Request a balanced load from every lane despite it being active or not.
// Since this request goes outside of the lane, we might need to request an
// extra operand regardless of whether it is valid in this lane or not.
if ((operand_request[MaskM].vl * NrLanes) != pe_req.vl)
operand_request[MaskM].vl += 1;
if ((operand_request[MaskB].vl * NrLanes) != pe_req.vl)
operand_request[MaskB].vl += 1;
end else begin // Mask logical, VMSBF, VMSIF, VMSOF, VCPOP, VFIRST
// Mask layout
operand_request[MaskB].eew = EW64;
operand_request[MaskB].vl = (pe_req.vl / NrLanes / ELEN);
// Request a balanced load from every lane despite it being active or not.
// Since this request goes outside of the lane, we might need to request an
// extra operand regardless of whether it is valid in this lane or not.
if ((operand_request[MaskM].vl * NrLanes * ELEN) != pe_req.vl)
operand_request[MaskM].vl += 1;
if ((operand_request[MaskB].vl * NrLanes * ELEN) != pe_req.vl)
operand_request[MaskB].vl += 1;
end
operand_request_push[MaskB] = pe_req.use_vd_op;

Expand Down
95 changes: 33 additions & 62 deletions hardware/src/lane/valu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,11 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
// Remaining elements of the current instruction in the commit phase
vlen_t commit_cnt_d, commit_cnt_q;

// How many elements are issued/committed
logic [3:0] element_cnt_buf_issue, element_cnt_buf_commit;
logic [6:0] element_cnt_issue;
logic [6:0] element_cnt_commit;

always_comb begin: p_valu
// Maintain state
vinsn_queue_d = vinsn_queue_q;
Expand Down Expand Up @@ -439,6 +444,13 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
// Don't prevent commit by default
prevent_commit = 1'b0;

// How many elements are we processing this cycle?
element_cnt_buf_issue = (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew));
element_cnt_issue = vinsn_issue_q.op inside {[VMSBF:VMXNOR]} ? ELEN : {2'b0, element_cnt_buf_issue};

element_cnt_buf_commit = (unsigned'(EW64) - unsigned'(vinsn_commit.vtype.vsew));
element_cnt_commit = vinsn_commit.op inside {[VMSBF:VMXNOR]} ? ELEN : {2'b0, element_cnt_buf_commit};

////////////////////////////////////////
// Write data into the result queue //
////////////////////////////////////////
Expand All @@ -453,7 +465,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
(alu_operand_valid_i[0] || !vinsn_issue_q.use_vs1) &&
(mask_valid_i || vinsn_issue_q.vm)) begin
// How many elements are we committing with this word?
automatic logic [3:0] element_cnt = (1 << (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew)));
automatic logic [6:0] element_cnt = element_cnt_issue;

if (element_cnt > issue_cnt_q)
element_cnt = issue_cnt_q;
Expand Down Expand Up @@ -527,16 +539,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
vinsn_queue_d.issue_pnt = vinsn_queue_q.issue_pnt + 1;

// Assign vector length for next instruction in the instruction queue
if (vinsn_queue_d.issue_cnt != 0) begin
if (!(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].op inside {[VMANDNOT:VMXNOR]}))
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;
else begin
$warning("vstart was never tested for op inside {[VMANDNOT:VMXNOR]}");
issue_cnt_d = (vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl / 8) >>
vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vtype.vsew;
issue_cnt_d += |vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl[2:0];
end
end
if (vinsn_queue_d.issue_cnt != 0)
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;
end
end
end
Expand All @@ -553,7 +557,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
(alu_operand_valid_i[0] || !vinsn_issue_q.use_vs1 || !first_op_q) &&
(mask_valid_i || vinsn_issue_q.vm)) begin
// How many elements are we committing with this word?
automatic logic [3:0] element_cnt = (1 << (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew)));
automatic logic [6:0] element_cnt = element_cnt_issue;

if (element_cnt > issue_cnt_q)
element_cnt = issue_cnt_q;

Expand Down Expand Up @@ -659,16 +664,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
vinsn_queue_d.issue_pnt = vinsn_queue_q.issue_pnt + 1;

// Assign vector length for next instruction in the instruction queue
if (vinsn_queue_d.issue_cnt != 0) begin
if (!(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].op inside {[VMANDNOT:VMXNOR]}))
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;
else begin
$warning("vstart was never tested for op inside {[VMANDNOT:VMXNOR]}");
issue_cnt_d = (vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl / 8) >>
vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vtype.vsew;
issue_cnt_d += |vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl[2:0];
end
end
if (vinsn_queue_d.issue_cnt != 0)
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;

// Give the done to the main sequencer
commit_cnt_d = '0;
Expand Down Expand Up @@ -696,16 +693,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
vinsn_queue_d.issue_pnt = vinsn_queue_q.issue_pnt + 1;

// Assign vector length for next instruction in the instruction queue
if (vinsn_queue_d.issue_cnt != 0) begin
if (!(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].op inside {[VMANDNOT:VMXNOR]}))
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;
else begin
$warning("vstart was never tested for op inside {[VMANDNOT:VMXNOR]}");
issue_cnt_d = (vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl / 8) >>
vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vtype.vsew;
issue_cnt_d += |vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl[2:0];
end
end
if (vinsn_queue_d.issue_cnt != 0)
issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl;

// Commit and give the done to the main sequencer
commit_cnt_d = '0;
Expand Down Expand Up @@ -757,9 +746,11 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;

// Decrement the counter of remaining vector elements waiting to be written
// Don't do it in case of a reduction
if (!is_reduction(vinsn_commit.op))
commit_cnt_d = commit_cnt_q - (1 << (unsigned'(EW64) - vinsn_commit.vtype.vsew));
if (commit_cnt_q < (1 << (unsigned'(EW64) - vinsn_commit.vtype.vsew))) commit_cnt_d = '0;
if (!is_reduction(vinsn_commit.op)) begin
automatic logic [6:0] element_cnt = element_cnt_commit;
commit_cnt_d = commit_cnt_q - element_cnt;
if (commit_cnt_q < element_cnt) commit_cnt_d = '0;
end
end

// Finished committing the results of a vector instruction
Expand All @@ -773,18 +764,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
else vinsn_queue_d.commit_pnt += 1;

// Update the commit counter for the next instruction
if (vinsn_queue_d.commit_cnt != '0) begin
if (!(vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].op inside {[VMANDNOT:VMXNOR]}))
commit_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl;
else begin
// We are asking for bits, and we want at least one chunk of bits if
// vl > 0. Therefore, commit_cnt = ceil((vl / 8) >> sew)
$warning("vstart was never tested for op inside {[VMANDNOT:VMXNOR]}");
commit_cnt_d = (vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl / 8) >>
vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vtype.vsew;
commit_cnt_d += |vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl[2:0];
end
end
if (vinsn_queue_d.commit_cnt != '0)
commit_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.commit_pnt].vl;

// Initialize counters and alu state if needed by the next instruction
// After a reduction, the next instructions starts after the reduction commits
Expand All @@ -809,7 +790,10 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt] = vfu_operation_i;
// Do not wait for masks if, during a reduction, this lane is just a pass-through
// The only valid instructions here with vl == '0 are reductions
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].vm = vfu_operation_i.vm | (vfu_operation_i.vl == '0);
// Instructions that execute in the mask unit will process the mask there directly
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].vm = vfu_operation_i.op inside {[VMSEQ:VMXNOR]}
? 1'b1
: vfu_operation_i.vm | (vfu_operation_i.vl == '0);

// Initialize counters and alu state if the instruction queue was empty
// and the lane is not reducing
Expand All @@ -825,22 +809,9 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
sldu_transactions_cnt_d = $clog2(NrLanes) + 1;

issue_cnt_d = vfu_operation_i.vl;
if (!(vfu_operation_i.op inside {[VMANDNOT:VMXNOR]}))
issue_cnt_d = vfu_operation_i.vl;
else begin
issue_cnt_d = (vfu_operation_i.vl / 8) >>
vfu_operation_i.vtype.vsew;
issue_cnt_d += |vfu_operation_i.vl[2:0];
end
end
if (vinsn_queue_d.commit_cnt == '0)
if (!(vfu_operation_i.op inside {[VMANDNOT:VMXNOR]}))
commit_cnt_d = vfu_operation_i.vl;
else begin
// Operations between mask vectors operate on bits
commit_cnt_d = (vfu_operation_i.vl / 8) >> vfu_operation_i.vtype.vsew;
commit_cnt_d += |vfu_operation_i.vl[2:0];
end
commit_cnt_d = vfu_operation_i.vl;

// Bump pointers and counters of the vector instruction queue
vinsn_queue_d.accept_pnt += 1;
Expand Down
Loading

0 comments on commit c54eba6

Please sign in to comment.