From c8ae9abc14dfc9a3bf0cd90edcbbfb34259773db Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 2 Dec 2024 16:12:27 +0100 Subject: [PATCH] [hardware] Add support for fault-only-first loads --- hardware/include/ara/ara_typedef.svh | 2 ++ hardware/src/ara.sv | 11 ++++++++++- hardware/src/ara_dispatcher.sv | 5 +++-- hardware/src/ara_sequencer.sv | 4 +++- hardware/src/vlsu/addrgen.sv | 17 ++++++++++++++++- hardware/src/vlsu/vlsu.sv | 2 ++ 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/hardware/include/ara/ara_typedef.svh b/hardware/include/ara/ara_typedef.svh index 47ea302c5..5ca7db2ee 100644 --- a/hardware/include/ara/ara_typedef.svh +++ b/hardware/include/ara/ara_typedef.svh @@ -73,6 +73,8 @@ typedef struct packed { vlen_t vstart; rvv_pkg::vtype_t vtype; + logic fault_only_first; + // Hazards logic [NrVInsn-1:0] hazard_vs1; logic [NrVInsn-1:0] hazard_vs2; diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index abdec2c35..1a89f4a0d 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -129,6 +129,9 @@ module ara import ara_pkg::*; #( // Number of segments in segment mem op logic [2:0] nf; + // Is this a fault-only-first load? + logic fault_only_first; + // Rounding-Mode for FP operations fpnew_pkg::roundmode_e fp_rm; // Widen FP immediate (re-encoding) @@ -152,6 +155,9 @@ module ara import ara_pkg::*; #( // Instruction triggered an exception ariane_pkg::exception_t exception; + // Fault-only-first exception on element whose idx > 0 + logic fof_exception; + // New value for vstart vlen_t exception_vstart; } ara_resp_t; @@ -223,6 +229,7 @@ module ara import ara_pkg::*; #( logic addrgen_ack; ariane_pkg::exception_t addrgen_exception; vlen_t addrgen_exception_vstart; + logic addrgen_fof_exception; logic [NrLanes-1:0] alu_vinsn_done; logic [NrLanes-1:0] mfpu_vinsn_done; // Interface with the operand requesters @@ -277,7 +284,8 @@ module ara import ara_pkg::*; #( // Interface with the address generator .addrgen_ack_i (addrgen_ack ), .addrgen_exception_i (addrgen_exception ), - .addrgen_exception_vstart_i(addrgen_exception_vstart ) + .addrgen_exception_vstart_i(addrgen_exception_vstart), + .addrgen_fof_exception_i(addrgen_fof_exception) ); // Scalar move support @@ -482,6 +490,7 @@ module ara import ara_pkg::*; #( .addrgen_ack_o (addrgen_ack ), .addrgen_exception_o (addrgen_exception ), .addrgen_exception_vstart_o (addrgen_exception_vstart ), + .addrgen_fof_exception_o (addrgen_fof_exception ), // Interface with the Mask unit .mask_i (mask ), .mask_valid_i (mask_valid ), diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index b7abef276..519866c6b 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -2628,8 +2628,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req.vtype.vsew = EW8; end 5'b10000: begin // Unit-strided, fault-only first - // TODO: Not implemented - illegal_insn_load = 1'b1; + ara_req.fault_only_first = 1'b1; end default: begin // Reserved illegal_insn_load = 1'b1; @@ -2768,6 +2767,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // In case of exception, modify vstart if ( ara_resp.exception.valid ) begin csr_vstart_d = ara_resp.exception_vstart; + end else if (ara_resp.fof_exception) begin + csr_vl_d = ara_resp.exception_vstart; end end end diff --git a/hardware/src/ara_sequencer.sv b/hardware/src/ara_sequencer.sv index 3b01add88..f68afa6a5 100644 --- a/hardware/src/ara_sequencer.sv +++ b/hardware/src/ara_sequencer.sv @@ -47,7 +47,8 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i // Interface with the Address Generation input logic addrgen_ack_i, input ariane_pkg::exception_t addrgen_exception_i, - input vlen_t addrgen_exception_vstart_i + input vlen_t addrgen_exception_vstart_i, + input logic addrgen_fof_exception_i ); /////////////////////////////////// @@ -525,6 +526,7 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i ara_resp_valid_o = 1'b1; ara_resp_o.exception = addrgen_exception_i; ara_resp_o.exception_vstart = addrgen_exception_vstart_i; + ara_resp_o.fof_exception = addrgen_fof_exception_i; end // Wait for the scalar result diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index 44b4d3c22..db8f626c6 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -55,6 +55,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( output ariane_pkg::exception_t addrgen_exception_o, output logic addrgen_ack_o, output vlen_t addrgen_exception_vstart_o, + output logic addrgen_fof_exception_o, // fault-only-first output logic addrgen_illegal_load_o, output logic addrgen_illegal_store_o, // Interface with the load/store units @@ -108,6 +109,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( logic [1:0] vew; // Support only up to 64-bit logic is_load; logic is_burst; // Unit-strided instructions can be converted into AXI INCR bursts + logic fault_only_first; // Fault-only-first instruction vlen_t vstart; } addrgen_req_t; addrgen_req_t addrgen_req; @@ -327,6 +329,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : is_load(pe_req_q.op), // Unit-strided loads/stores trigger incremental AXI bursts. is_burst: (pe_req_q.op inside {VLE, VSE}), + fault_only_first: pe_req_q.fault_only_first, vstart : pe_req_q.vstart }; @@ -375,6 +378,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : is_load(pe_req_q.op), // Unit-strided loads/stores trigger incremental AXI bursts. is_burst: 1'b0, + fault_only_first: 1'b0, vstart : pe_req_q.vstart }; addrgen_req_valid = 1'b1; @@ -658,6 +662,9 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( mmu_vaddr_o = '0; mmu_is_store_o = 1'b0; + // No fault-only-first exception idx != 0 by default + addrgen_fof_exception_o = 1'b0; + // For addrgen FSM last_translation_completed = 1'b0; @@ -1002,7 +1009,15 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : axi_addrgen_q.is_load, is_exception : 1'b1 }; - axi_addrgen_queue_push = 1'b1; + // Don't take trap if fault-only-first and exception is on element whose idx > 0 + axi_addrgen_queue_push = ~(axi_addrgen_q.fault_only_first + & (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew))); + + // If fault-only-first and the idx > 0, this exception is special and does not trap + // Inform the dispatcher to effectively modify vl and not vstart + if (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew)) begin + addrgen_fof_exception_o = axi_addrgen_q.fault_only_first; + end // Set vstart: vl minus how many elements we have left // NOTE: this added complexity only comes from the fact that the beat counting diff --git a/hardware/src/vlsu/vlsu.sv b/hardware/src/vlsu/vlsu.sv index 3fc87b8dc..30fb4dd7a 100644 --- a/hardware/src/vlsu/vlsu.sv +++ b/hardware/src/vlsu/vlsu.sv @@ -48,6 +48,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #( output logic addrgen_ack_o, output ariane_pkg::exception_t addrgen_exception_o, output vlen_t addrgen_exception_vstart_o, + output logic addrgen_fof_exception_o, // Interface with the lanes // Store unit operands input elen_t [NrLanes-1:0] stu_operand_i, @@ -167,6 +168,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #( .addrgen_ack_o (addrgen_ack_o ), .addrgen_exception_o ( addrgen_exception_o ), .addrgen_exception_vstart_o ( addrgen_exception_vstart_o), + .addrgen_fof_exception_o ( addrgen_fof_exception_o ), .addrgen_illegal_load_o (addrgen_illegal_load ), .addrgen_illegal_store_o (addrgen_illegal_store ), // Interface with the lanes