Skip to content

Commit

Permalink
[hardware] Fixes for segment hardware
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Oct 26, 2024
1 parent 95fbc1e commit 83ad4a3
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 143 deletions.
4 changes: 3 additions & 1 deletion hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// The handshake signals are just passed-through if the insn is non-segment
ara_resp_t ara_resp;
segment_sequencer #(
.SegSupport(SegSupport)
.SegSupport(SegSupport),
.ara_req_t (ara_req_t ),
.ara_resp_t(ara_resp_t)
) i_segment_sequencer (
.clk_i(clk_i),
.rst_ni(rst_ni),
Expand Down
298 changes: 156 additions & 142 deletions hardware/src/segment_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
// it has low-impact on the physical implementation.

module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
parameter bit SegSupport = 1'b0
parameter bit SegSupport = 1'b0,
parameter type ara_req_t = logic,
parameter type ara_resp_t = logic
) (
// Clock and reset
input logic clk_i,
Expand All @@ -35,154 +37,166 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(

import cf_math_pkg::idx_width;

logic ara_resp_valid_d, ara_resp_valid_q;
ara_resp_t ara_resp_d, ara_resp_q;
logic is_vload_d, is_vload_q;
logic [$bits(ara_req_i.vstart):0] next_vstart_cnt;

typedef enum logic [1:0] {
IDLE,
SEGMENT_MICRO_OPS,
SEGMENT_MICRO_OPS_END
} state_e;
state_e state_d, state_q;

// Track the elements within each segment
logic new_seg_mem_op;
logic segment_cnt_en, segment_cnt_clear;
logic [$bits(ara_req_i.nf)-1:0] segment_cnt_q;

counter #(
.WIDTH($bits(ara_req_i.nf)),
.STICKY_OVERFLOW(1'b0)
) i_segment_cnt (
.clk_i,
.rst_ni,
.clear_i(segment_cnt_clear),
.en_i(segment_cnt_en),
.load_i(1'b0),
.down_i(1'b0),
.d_i('0),
.q_o(segment_cnt_q),
.overflow_o( /* Unused */ )
);
assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf));

// Track the number of segments
logic vstart_cnt_en;
logic [$bits(ara_req_i.vstart)-1:0] vstart_cnt_q;

counter #(
.WIDTH($bits(ara_req_i.vstart)),
.STICKY_OVERFLOW(1'b0)
) i_vstart_cnt (
.clk_i,
.rst_ni,
.clear_i( /* Unused */ ),
.en_i(vstart_cnt_en),
.load_i(new_seg_mem_op),
.down_i(1'b0),
.d_i(ara_req_i.vstart),
.q_o(vstart_cnt_q),
.overflow_o( /* Unused */ )
);
// Change destination vector index when all the fields of the segment have been processed
assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf);

// Next vstart count
assign next_vstart_cnt = vstart_cnt_q + 1;

// Signal if the micro op seq is on
assign segment_micro_op_on_o = state_q != IDLE;

always_comb begin
state_d = state_q;

// Pass through
ara_req_o = ara_req_i;
ara_resp_o = ara_resp_i;
ara_resp_valid_o = ara_resp_valid_i;
// Block load/store_complete
load_complete_o = 1'b0;
store_complete_o = 1'b0;

ara_resp_d = ara_resp_q;
ara_resp_valid_d = ara_resp_valid_q;
is_vload_d = is_vload_q;

// Don't count up by default
new_seg_mem_op = 1'b0;
segment_cnt_en = 1'b0;

// Low-perf Moore's FSM
unique case (state_q)
IDLE: begin
// Send a first micro operation upon valid segment mem op
if (is_segment_mem_op_i && !illegal_insn_i) begin
// If we are here, the backend is able to accept the request
// Set-up sequencing
new_seg_mem_op = 1'b1;
// Set up the first micro operation
ara_req_o.vl = 1;
// Start sequencing
state_d = SEGMENT_MICRO_OPS;
if (SegSupport == SegSupportEnable) begin : gen_segment_support

logic ara_resp_valid_d, ara_resp_valid_q;
ara_resp_t ara_resp_d, ara_resp_q;
logic is_vload_d, is_vload_q;
logic [$bits(ara_req_i.vstart):0] next_vstart_cnt;

typedef enum logic [1:0] {
IDLE,
SEGMENT_MICRO_OPS,
SEGMENT_MICRO_OPS_END
} state_e;
state_e state_d, state_q;

// Track the elements within each segment
logic new_seg_mem_op;
logic segment_cnt_en, segment_cnt_clear;
logic [$bits(ara_req_i.nf)-1:0] segment_cnt_q;

counter #(
.WIDTH($bits(ara_req_i.nf)),
.STICKY_OVERFLOW(1'b0)
) i_segment_cnt (
.clk_i,
.rst_ni,
.clear_i(segment_cnt_clear),
.en_i(segment_cnt_en),
.load_i(1'b0),
.down_i(1'b0),
.d_i('0),
.q_o(segment_cnt_q),
.overflow_o( /* Unused */ )
);
assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf));

// Track the number of segments
logic vstart_cnt_en;
logic [$bits(ara_req_i.vstart)-1:0] vstart_cnt_q;

counter #(
.WIDTH($bits(ara_req_i.vstart)),
.STICKY_OVERFLOW(1'b0)
) i_vstart_cnt (
.clk_i,
.rst_ni,
.clear_i( /* Unused */ ),
.en_i(vstart_cnt_en),
.load_i(new_seg_mem_op),
.down_i(1'b0),
.d_i(ara_req_i.vstart),
.q_o(vstart_cnt_q),
.overflow_o( /* Unused */ )
);
// Change destination vector index when all the fields of the segment have been processed
assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf);

// Next vstart count
assign next_vstart_cnt = vstart_cnt_q + 1;

// Signal if the micro op seq is on
assign segment_micro_op_on_o = state_q != IDLE;

always_comb begin
state_d = state_q;

// Pass through
ara_req_o = ara_req_i;
ara_resp_o = ara_resp_i;
ara_resp_valid_o = ara_resp_valid_i;
// Block load/store_complete
load_complete_o = 1'b0;
store_complete_o = 1'b0;

ara_resp_d = ara_resp_q;
ara_resp_valid_d = ara_resp_valid_q;
is_vload_d = is_vload_q;

// Don't count up by default
new_seg_mem_op = 1'b0;
segment_cnt_en = 1'b0;

// Low-perf Moore's FSM
unique case (state_q)
IDLE: begin
// Send a first micro operation upon valid segment mem op
if (is_segment_mem_op_i && !illegal_insn_i) begin
// If we are here, the backend is able to accept the request
// Set-up sequencing
new_seg_mem_op = 1'b1;
// Set up the first micro operation
ara_req_o.vl = 1;
// Start sequencing
state_d = SEGMENT_MICRO_OPS;
end
end
end
SEGMENT_MICRO_OPS: begin
// Manipulate the memory micro request in advance
ara_req_o.vl = 1;
ara_req_o.vstart = vstart_cnt_q;
ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q;
ara_req_o.vd = ara_req_i.vd + segment_cnt_q;
ara_resp_valid_o = 1'b0;

// Wait for an answer from Ara's backend
if (ara_resp_valid_i) begin
// Pass to the next field if the previous micro op finished
segment_cnt_en = 1'b1;
// If exception, stop the execution
if (ara_resp_i.error) begin
ara_resp_valid_o = ara_resp_valid_i;
// If no exception, continue with the micro ops
end else begin
// If over - stop in the next cycle
if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin
// Sample the last answer
ara_resp_d = ara_resp_i;
ara_resp_valid_d = ara_resp_valid_i;
is_vload_d = is_vload_i;
state_d = SEGMENT_MICRO_OPS_END;
SEGMENT_MICRO_OPS: begin
// Manipulate the memory micro request in advance
ara_req_o.vl = 1;
ara_req_o.vstart = vstart_cnt_q;
ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q;
ara_req_o.vd = ara_req_i.vd + segment_cnt_q;
ara_resp_valid_o = 1'b0;

// Wait for an answer from Ara's backend
if (ara_resp_valid_i) begin
// Pass to the next field if the previous micro op finished
segment_cnt_en = 1'b1;
// If exception, stop the execution
if (ara_resp_i.exception.valid) begin
ara_resp_valid_o = ara_resp_valid_i;
// If no exception, continue with the micro ops
end else begin
// If over - stop in the next cycle
if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin
// Sample the last answer
ara_resp_d = ara_resp_i;
ara_resp_valid_d = ara_resp_valid_i;
is_vload_d = is_vload_i;
state_d = SEGMENT_MICRO_OPS_END;
end
end
end
end
end
SEGMENT_MICRO_OPS_END: begin
ara_resp_valid_o = 1'b0;
// Wait for idle to give the final load/store_complete
if (ara_idle_i) begin
ara_resp_o = ara_resp_q;
ara_resp_valid_o = ara_resp_valid_q;
load_complete_o = is_vload_q;
store_complete_o = ~is_vload_q;
state_d = IDLE;
SEGMENT_MICRO_OPS_END: begin
ara_resp_valid_o = 1'b0;
// Wait for idle to give the final load/store_complete
if (ara_idle_i) begin
ara_resp_o = ara_resp_q;
ara_resp_valid_o = ara_resp_valid_q;
load_complete_o = is_vload_q;
store_complete_o = ~is_vload_q;
state_d = IDLE;
end
end
end
default:;
endcase
end
default:;
endcase
end

always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
state_q <= IDLE;
is_vload_q <= 1'b0;
ara_resp_q <= '0;
ara_resp_valid_q <= '0;
end else begin
state_q <= state_d;
is_vload_q <= is_vload_d;
ara_resp_q <= ara_resp_d;
ara_resp_valid_q <= ara_resp_valid_d;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
state_q <= IDLE;
is_vload_q <= 1'b0;
ara_resp_q <= '0;
ara_resp_valid_q <= '0;
end else begin
state_q <= state_d;
is_vload_q <= is_vload_d;
ara_resp_q <= ara_resp_d;
ara_resp_valid_q <= ara_resp_valid_d;
end
end
end else begin : gen_no_segment_support
// No segment micro-ops here
assign segment_micro_op_on_o = 1'b0;
// Pass through if segment support is disabled
assign load_complete_o = load_complete_i;
assign store_complete_o = store_complete_i;
assign ara_req_o = ara_req_i;
assign ara_resp_o = ara_resp_i;
assign ara_resp_valid_o = ara_resp_valid_i;
end

endmodule

0 comments on commit 83ad4a3

Please sign in to comment.