From 83ad4a37f4cc37ed8cc1d37f6b28bc9660d1cf30 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Sat, 26 Oct 2024 12:04:47 +0200 Subject: [PATCH] [hardware] Fixes for segment hardware --- hardware/src/ara_dispatcher.sv | 4 +- hardware/src/segment_sequencer.sv | 298 ++++++++++++++++-------------- 2 files changed, 159 insertions(+), 143 deletions(-) diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index b6768aeab..d2f3aa6e0 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -260,7 +260,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // The handshake signals are just passed-through if the insn is non-segment ara_resp_t ara_resp; segment_sequencer #( - .SegSupport(SegSupport) + .SegSupport(SegSupport), + .ara_req_t (ara_req_t ), + .ara_resp_t(ara_resp_t) ) i_segment_sequencer ( .clk_i(clk_i), .rst_ni(rst_ni), diff --git a/hardware/src/segment_sequencer.sv b/hardware/src/segment_sequencer.sv index 6551d624e..50d4c7e8a 100644 --- a/hardware/src/segment_sequencer.sv +++ b/hardware/src/segment_sequencer.sv @@ -8,7 +8,9 @@ // it has low-impact on the physical implementation. module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #( - parameter bit SegSupport = 1'b0 + parameter bit SegSupport = 1'b0, + parameter type ara_req_t = logic, + parameter type ara_resp_t = logic ) ( // Clock and reset input logic clk_i, @@ -35,154 +37,166 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #( import cf_math_pkg::idx_width; - logic ara_resp_valid_d, ara_resp_valid_q; - ara_resp_t ara_resp_d, ara_resp_q; - logic is_vload_d, is_vload_q; - logic [$bits(ara_req_i.vstart):0] next_vstart_cnt; - - typedef enum logic [1:0] { - IDLE, - SEGMENT_MICRO_OPS, - SEGMENT_MICRO_OPS_END - } state_e; - state_e state_d, state_q; - - // Track the elements within each segment - logic new_seg_mem_op; - logic segment_cnt_en, segment_cnt_clear; - logic [$bits(ara_req_i.nf)-1:0] segment_cnt_q; - - counter #( - .WIDTH($bits(ara_req_i.nf)), - .STICKY_OVERFLOW(1'b0) - ) i_segment_cnt ( - .clk_i, - .rst_ni, - .clear_i(segment_cnt_clear), - .en_i(segment_cnt_en), - .load_i(1'b0), - .down_i(1'b0), - .d_i('0), - .q_o(segment_cnt_q), - .overflow_o( /* Unused */ ) - ); - assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf)); - - // Track the number of segments - logic vstart_cnt_en; - logic [$bits(ara_req_i.vstart)-1:0] vstart_cnt_q; - - counter #( - .WIDTH($bits(ara_req_i.vstart)), - .STICKY_OVERFLOW(1'b0) - ) i_vstart_cnt ( - .clk_i, - .rst_ni, - .clear_i( /* Unused */ ), - .en_i(vstart_cnt_en), - .load_i(new_seg_mem_op), - .down_i(1'b0), - .d_i(ara_req_i.vstart), - .q_o(vstart_cnt_q), - .overflow_o( /* Unused */ ) - ); - // Change destination vector index when all the fields of the segment have been processed - assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf); - - // Next vstart count - assign next_vstart_cnt = vstart_cnt_q + 1; - - // Signal if the micro op seq is on - assign segment_micro_op_on_o = state_q != IDLE; - - always_comb begin - state_d = state_q; - - // Pass through - ara_req_o = ara_req_i; - ara_resp_o = ara_resp_i; - ara_resp_valid_o = ara_resp_valid_i; - // Block load/store_complete - load_complete_o = 1'b0; - store_complete_o = 1'b0; - - ara_resp_d = ara_resp_q; - ara_resp_valid_d = ara_resp_valid_q; - is_vload_d = is_vload_q; - - // Don't count up by default - new_seg_mem_op = 1'b0; - segment_cnt_en = 1'b0; - - // Low-perf Moore's FSM - unique case (state_q) - IDLE: begin - // Send a first micro operation upon valid segment mem op - if (is_segment_mem_op_i && !illegal_insn_i) begin - // If we are here, the backend is able to accept the request - // Set-up sequencing - new_seg_mem_op = 1'b1; - // Set up the first micro operation - ara_req_o.vl = 1; - // Start sequencing - state_d = SEGMENT_MICRO_OPS; + if (SegSupport == SegSupportEnable) begin : gen_segment_support + + logic ara_resp_valid_d, ara_resp_valid_q; + ara_resp_t ara_resp_d, ara_resp_q; + logic is_vload_d, is_vload_q; + logic [$bits(ara_req_i.vstart):0] next_vstart_cnt; + + typedef enum logic [1:0] { + IDLE, + SEGMENT_MICRO_OPS, + SEGMENT_MICRO_OPS_END + } state_e; + state_e state_d, state_q; + + // Track the elements within each segment + logic new_seg_mem_op; + logic segment_cnt_en, segment_cnt_clear; + logic [$bits(ara_req_i.nf)-1:0] segment_cnt_q; + + counter #( + .WIDTH($bits(ara_req_i.nf)), + .STICKY_OVERFLOW(1'b0) + ) i_segment_cnt ( + .clk_i, + .rst_ni, + .clear_i(segment_cnt_clear), + .en_i(segment_cnt_en), + .load_i(1'b0), + .down_i(1'b0), + .d_i('0), + .q_o(segment_cnt_q), + .overflow_o( /* Unused */ ) + ); + assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf)); + + // Track the number of segments + logic vstart_cnt_en; + logic [$bits(ara_req_i.vstart)-1:0] vstart_cnt_q; + + counter #( + .WIDTH($bits(ara_req_i.vstart)), + .STICKY_OVERFLOW(1'b0) + ) i_vstart_cnt ( + .clk_i, + .rst_ni, + .clear_i( /* Unused */ ), + .en_i(vstart_cnt_en), + .load_i(new_seg_mem_op), + .down_i(1'b0), + .d_i(ara_req_i.vstart), + .q_o(vstart_cnt_q), + .overflow_o( /* Unused */ ) + ); + // Change destination vector index when all the fields of the segment have been processed + assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf); + + // Next vstart count + assign next_vstart_cnt = vstart_cnt_q + 1; + + // Signal if the micro op seq is on + assign segment_micro_op_on_o = state_q != IDLE; + + always_comb begin + state_d = state_q; + + // Pass through + ara_req_o = ara_req_i; + ara_resp_o = ara_resp_i; + ara_resp_valid_o = ara_resp_valid_i; + // Block load/store_complete + load_complete_o = 1'b0; + store_complete_o = 1'b0; + + ara_resp_d = ara_resp_q; + ara_resp_valid_d = ara_resp_valid_q; + is_vload_d = is_vload_q; + + // Don't count up by default + new_seg_mem_op = 1'b0; + segment_cnt_en = 1'b0; + + // Low-perf Moore's FSM + unique case (state_q) + IDLE: begin + // Send a first micro operation upon valid segment mem op + if (is_segment_mem_op_i && !illegal_insn_i) begin + // If we are here, the backend is able to accept the request + // Set-up sequencing + new_seg_mem_op = 1'b1; + // Set up the first micro operation + ara_req_o.vl = 1; + // Start sequencing + state_d = SEGMENT_MICRO_OPS; + end end - end - SEGMENT_MICRO_OPS: begin - // Manipulate the memory micro request in advance - ara_req_o.vl = 1; - ara_req_o.vstart = vstart_cnt_q; - ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q; - ara_req_o.vd = ara_req_i.vd + segment_cnt_q; - ara_resp_valid_o = 1'b0; - - // Wait for an answer from Ara's backend - if (ara_resp_valid_i) begin - // Pass to the next field if the previous micro op finished - segment_cnt_en = 1'b1; - // If exception, stop the execution - if (ara_resp_i.error) begin - ara_resp_valid_o = ara_resp_valid_i; - // If no exception, continue with the micro ops - end else begin - // If over - stop in the next cycle - if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin - // Sample the last answer - ara_resp_d = ara_resp_i; - ara_resp_valid_d = ara_resp_valid_i; - is_vload_d = is_vload_i; - state_d = SEGMENT_MICRO_OPS_END; + SEGMENT_MICRO_OPS: begin + // Manipulate the memory micro request in advance + ara_req_o.vl = 1; + ara_req_o.vstart = vstart_cnt_q; + ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q; + ara_req_o.vd = ara_req_i.vd + segment_cnt_q; + ara_resp_valid_o = 1'b0; + + // Wait for an answer from Ara's backend + if (ara_resp_valid_i) begin + // Pass to the next field if the previous micro op finished + segment_cnt_en = 1'b1; + // If exception, stop the execution + if (ara_resp_i.exception.valid) begin + ara_resp_valid_o = ara_resp_valid_i; + // If no exception, continue with the micro ops + end else begin + // If over - stop in the next cycle + if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin + // Sample the last answer + ara_resp_d = ara_resp_i; + ara_resp_valid_d = ara_resp_valid_i; + is_vload_d = is_vload_i; + state_d = SEGMENT_MICRO_OPS_END; + end end end end - end - SEGMENT_MICRO_OPS_END: begin - ara_resp_valid_o = 1'b0; - // Wait for idle to give the final load/store_complete - if (ara_idle_i) begin - ara_resp_o = ara_resp_q; - ara_resp_valid_o = ara_resp_valid_q; - load_complete_o = is_vload_q; - store_complete_o = ~is_vload_q; - state_d = IDLE; + SEGMENT_MICRO_OPS_END: begin + ara_resp_valid_o = 1'b0; + // Wait for idle to give the final load/store_complete + if (ara_idle_i) begin + ara_resp_o = ara_resp_q; + ara_resp_valid_o = ara_resp_valid_q; + load_complete_o = is_vload_q; + store_complete_o = ~is_vload_q; + state_d = IDLE; + end end - end - default:; - endcase - end + default:; + endcase + end - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - state_q <= IDLE; - is_vload_q <= 1'b0; - ara_resp_q <= '0; - ara_resp_valid_q <= '0; - end else begin - state_q <= state_d; - is_vload_q <= is_vload_d; - ara_resp_q <= ara_resp_d; - ara_resp_valid_q <= ara_resp_valid_d; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + state_q <= IDLE; + is_vload_q <= 1'b0; + ara_resp_q <= '0; + ara_resp_valid_q <= '0; + end else begin + state_q <= state_d; + is_vload_q <= is_vload_d; + ara_resp_q <= ara_resp_d; + ara_resp_valid_q <= ara_resp_valid_d; + end end + end else begin : gen_no_segment_support + // No segment micro-ops here + assign segment_micro_op_on_o = 1'b0; + // Pass through if segment support is disabled + assign load_complete_o = load_complete_i; + assign store_complete_o = store_complete_i; + assign ara_req_o = ara_req_i; + assign ara_resp_o = ara_resp_i; + assign ara_resp_valid_o = ara_resp_valid_i; end endmodule