From 958c844bafa039f2c34d18094ab24479d12aedda Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 2 Dec 2024 16:11:10 +0100 Subject: [PATCH 1/4] [apps] Add fault-only-first load tests --- apps/riscv-tests/isa/rv64uv/Makefrag | 3 +- apps/riscv-tests/isa/rv64uv/vlff.c | 44 ++++++++++++++-------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/apps/riscv-tests/isa/rv64uv/Makefrag b/apps/riscv-tests/isa/rv64uv/Makefrag index ea436ef4f..d8a32b605 100644 --- a/apps/riscv-tests/isa/rv64uv/Makefrag +++ b/apps/riscv-tests/isa/rv64uv/Makefrag @@ -143,6 +143,7 @@ rv64uv_sc_tests = vaadd \ vslide1down \ vfslide1down \ vl \ + vlff \ vlseg \ vlsseg \ vluxseg \ @@ -180,7 +181,7 @@ rv64uv_sc_tests = vaadd \ vfrec7 \ vfrsqrt7 -#rv64uv_sc_tests = vcompress vlff vrgather +#rv64uv_sc_tests = vcompress vrgather rv64uv_p_tests = $(addprefix rv64uv-p-, $(rv64uv_sc_tests)) diff --git a/apps/riscv-tests/isa/rv64uv/vlff.c b/apps/riscv-tests/isa/rv64uv/vlff.c index b837f525d..886e36bc0 100644 --- a/apps/riscv-tests/isa/rv64uv/vlff.c +++ b/apps/riscv-tests/isa/rv64uv/vlff.c @@ -10,58 +10,58 @@ void TEST_CASE1(void) { VSET(4, e8, m1); volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; - __asm__ volatile("vle8ff.v v1, (%0)" ::"r"(INP)); - VEC_CMP_8(1, v1, 0xff, 0x00, 0x0f, 0xf0); + asm volatile("vle8ff.v v1, (%0)" ::"r"(INP)); + VCMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0); } void TEST_CASE2(void) { VSET(4, e8, m1); volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; VLOAD_8(v0, 0x5, 0x0, 0x0, 0x0); - CLEAR(v1); - __asm__ volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP)); - VEC_CMP_8(2, v1, 0xff, 0x00, 0x0f, 0x00); + VCLEAR(v1); + asm volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP)); + VCMP_U8(2, v1, 0xff, 0x00, 0x0f, 0x00); } void TEST_CASE3(void) { VSET(3, e16, m1); volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; - __asm__ volatile("vle16ff.v v1, (%0)" ::"r"(INP)); - VEC_CMP_16(3, v1, 0xffff, 0x0000, 0x0f0f); + asm volatile("vle16ff.v v1, (%0)" ::"r"(INP)); + VCMP_U16(3, v1, 0xffff, 0x0000, 0x0f0f); } void TEST_CASE4(void) { VSET(3, e16, m1); volatile int16_t INP[] = {0xffff, 0x0001, 0x0f0f, 0xf0f0}; VLOAD_16(v0, 0x5, 0x0, 0x0, 0x0); - CLEAR(v1); - __asm__ volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP)); - VEC_CMP_16(4, v1, 0xffff, 0x0000, 0x0f0f); + VCLEAR(v1); + asm volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP)); + VCMP_U16(4, v1, 0xffff, 0x0000, 0x0f0f); } void TEST_CASE5(void) { VSET(4, e32, m1); volatile int32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0}; - __asm__ volatile("vle32ff.v v1, (%0)" ::"r"(INP)); - VEC_CMP_32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); + asm volatile("vle32ff.v v1, (%0)" ::"r"(INP)); + VCMP_U32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); } void TEST_CASE6(void) { VSET(4, e32, m1); volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0}; VLOAD_32(v0, 0x5, 0x0, 0x0, 0x0); - CLEAR(v1); - __asm__ volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP)); - VEC_CMP_32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0); + VCLEAR(v1); + asm volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP)); + VCMP_U32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0); } void TEST_CASE7(void) { VSET(4, e64, m1); volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; - __asm__ volatile("vle64ff.v v1,(%0)" ::"r"(INP)); - VEC_CMP_64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, - 0xdeadbeeff0f0f0f0); + asm volatile("vle64ff.v v1,(%0)" ::"r"(INP)); + VCMP_U64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, + 0xdeadbeeff0f0f0f0); } void TEST_CASE8(void) { @@ -69,10 +69,10 @@ void TEST_CASE8(void) { volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; VLOAD_64(v0, 0x5, 0x0, 0x0, 0x0); - CLEAR(v1); - __asm__ volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP)); - VEC_CMP_64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f, - 0x0000000000000000); + VCLEAR(v1); + asm volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP)); + VCMP_U64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f, + 0x0000000000000000); } int main(void) { From f9e825577f7147ec4870f2dda2ea790f169630df Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 2 Dec 2024 16:12:27 +0100 Subject: [PATCH 2/4] [hardware] Add support for fault-only-first loads --- hardware/include/ara/ara_typedef.svh | 2 ++ hardware/src/ara.sv | 11 ++++++++++- hardware/src/ara_dispatcher.sv | 13 +++++++++---- hardware/src/ara_sequencer.sv | 4 +++- hardware/src/vlsu/addrgen.sv | 21 +++++++++++++++++---- hardware/src/vlsu/vlsu.sv | 2 ++ 6 files changed, 43 insertions(+), 10 deletions(-) diff --git a/hardware/include/ara/ara_typedef.svh b/hardware/include/ara/ara_typedef.svh index 47ea302c5..5ca7db2ee 100644 --- a/hardware/include/ara/ara_typedef.svh +++ b/hardware/include/ara/ara_typedef.svh @@ -73,6 +73,8 @@ typedef struct packed { vlen_t vstart; rvv_pkg::vtype_t vtype; + logic fault_only_first; + // Hazards logic [NrVInsn-1:0] hazard_vs1; logic [NrVInsn-1:0] hazard_vs2; diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index abdec2c35..1a89f4a0d 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -129,6 +129,9 @@ module ara import ara_pkg::*; #( // Number of segments in segment mem op logic [2:0] nf; + // Is this a fault-only-first load? + logic fault_only_first; + // Rounding-Mode for FP operations fpnew_pkg::roundmode_e fp_rm; // Widen FP immediate (re-encoding) @@ -152,6 +155,9 @@ module ara import ara_pkg::*; #( // Instruction triggered an exception ariane_pkg::exception_t exception; + // Fault-only-first exception on element whose idx > 0 + logic fof_exception; + // New value for vstart vlen_t exception_vstart; } ara_resp_t; @@ -223,6 +229,7 @@ module ara import ara_pkg::*; #( logic addrgen_ack; ariane_pkg::exception_t addrgen_exception; vlen_t addrgen_exception_vstart; + logic addrgen_fof_exception; logic [NrLanes-1:0] alu_vinsn_done; logic [NrLanes-1:0] mfpu_vinsn_done; // Interface with the operand requesters @@ -277,7 +284,8 @@ module ara import ara_pkg::*; #( // Interface with the address generator .addrgen_ack_i (addrgen_ack ), .addrgen_exception_i (addrgen_exception ), - .addrgen_exception_vstart_i(addrgen_exception_vstart ) + .addrgen_exception_vstart_i(addrgen_exception_vstart), + .addrgen_fof_exception_i(addrgen_fof_exception) ); // Scalar move support @@ -482,6 +490,7 @@ module ara import ara_pkg::*; #( .addrgen_ack_o (addrgen_ack ), .addrgen_exception_o (addrgen_exception ), .addrgen_exception_vstart_o (addrgen_exception_vstart ), + .addrgen_fof_exception_o (addrgen_fof_exception ), // Interface with the Mask unit .mask_i (mask ), .mask_valid_i (mask_valid ), diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index b7abef276..022a35a89 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -2628,8 +2628,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ara_req.vtype.vsew = EW8; end 5'b10000: begin // Unit-strided, fault-only first - // TODO: Not implemented - illegal_insn_load = 1'b1; + ara_req.fault_only_first = 1'b1; end default: begin // Reserved illegal_insn_load = 1'b1; @@ -2765,8 +2764,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( acc_resp_o.resp_valid = 1'b1; acc_resp_o.exception = ara_resp.exception; ara_req_valid = 1'b0; - // In case of exception, modify vstart - if ( ara_resp.exception.valid ) begin + // In case of exception, modify vstart or vl, depending if the insn + // was a fault-only-first + if (ara_resp.fof_exception) begin + csr_vl_d = ara_resp.exception_vstart; + // Mask exception if we had a fault-only-first with exception on + // idx > 0 + acc_resp_o.exception.valid = 1'b0; + end else if (ara_resp.exception.valid) begin csr_vstart_d = ara_resp.exception_vstart; end end diff --git a/hardware/src/ara_sequencer.sv b/hardware/src/ara_sequencer.sv index 3b01add88..f68afa6a5 100644 --- a/hardware/src/ara_sequencer.sv +++ b/hardware/src/ara_sequencer.sv @@ -47,7 +47,8 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i // Interface with the Address Generation input logic addrgen_ack_i, input ariane_pkg::exception_t addrgen_exception_i, - input vlen_t addrgen_exception_vstart_i + input vlen_t addrgen_exception_vstart_i, + input logic addrgen_fof_exception_i ); /////////////////////////////////// @@ -525,6 +526,7 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i ara_resp_valid_o = 1'b1; ara_resp_o.exception = addrgen_exception_i; ara_resp_o.exception_vstart = addrgen_exception_vstart_i; + ara_resp_o.fof_exception = addrgen_fof_exception_i; end // Wait for the scalar result diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index 44b4d3c22..ed3bc59bc 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -55,6 +55,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( output ariane_pkg::exception_t addrgen_exception_o, output logic addrgen_ack_o, output vlen_t addrgen_exception_vstart_o, + output logic addrgen_fof_exception_o, // fault-only-first output logic addrgen_illegal_load_o, output logic addrgen_illegal_store_o, // Interface with the load/store units @@ -108,6 +109,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( logic [1:0] vew; // Support only up to 64-bit logic is_load; logic is_burst; // Unit-strided instructions can be converted into AXI INCR bursts + logic fault_only_first; // Fault-only-first instruction vlen_t vstart; } addrgen_req_t; addrgen_req_t addrgen_req; @@ -226,9 +228,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Nothing to acknowledge addrgen_ack_o = 1'b0; - addrgen_exception_o.valid = 1'b0; - addrgen_exception_o.tval = '0; - addrgen_exception_o.cause = '0; + addrgen_exception_o = '0; addrgen_illegal_load_o = 1'b0; addrgen_illegal_store_o = 1'b0; @@ -327,6 +327,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : is_load(pe_req_q.op), // Unit-strided loads/stores trigger incremental AXI bursts. is_burst: (pe_req_q.op inside {VLE, VSE}), + fault_only_first: pe_req_q.fault_only_first, vstart : pe_req_q.vstart }; @@ -375,6 +376,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : is_load(pe_req_q.op), // Unit-strided loads/stores trigger incremental AXI bursts. is_burst: 1'b0, + fault_only_first: 1'b0, vstart : pe_req_q.vstart }; addrgen_req_valid = 1'b1; @@ -658,6 +660,9 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( mmu_vaddr_o = '0; mmu_is_store_o = 1'b0; + // No fault-only-first exception idx != 0 by default + addrgen_fof_exception_o = 1'b0; + // For addrgen FSM last_translation_completed = 1'b0; @@ -1002,7 +1007,15 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( is_load : axi_addrgen_q.is_load, is_exception : 1'b1 }; - axi_addrgen_queue_push = 1'b1; + // Don't take trap if fault-only-first and exception is on element whose idx > 0 + axi_addrgen_queue_push = ~(axi_addrgen_q.fault_only_first + & (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew))); + + // If fault-only-first and the idx > 0, this exception is special and does not trap + // Inform the dispatcher to effectively modify vl and not vstart + if (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew)) begin + addrgen_fof_exception_o = axi_addrgen_q.fault_only_first; + end // Set vstart: vl minus how many elements we have left // NOTE: this added complexity only comes from the fact that the beat counting diff --git a/hardware/src/vlsu/vlsu.sv b/hardware/src/vlsu/vlsu.sv index 3fc87b8dc..30fb4dd7a 100644 --- a/hardware/src/vlsu/vlsu.sv +++ b/hardware/src/vlsu/vlsu.sv @@ -48,6 +48,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #( output logic addrgen_ack_o, output ariane_pkg::exception_t addrgen_exception_o, output vlen_t addrgen_exception_vstart_o, + output logic addrgen_fof_exception_o, // Interface with the lanes // Store unit operands input elen_t [NrLanes-1:0] stu_operand_i, @@ -167,6 +168,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #( .addrgen_ack_o (addrgen_ack_o ), .addrgen_exception_o ( addrgen_exception_o ), .addrgen_exception_vstart_o ( addrgen_exception_vstart_o), + .addrgen_fof_exception_o ( addrgen_fof_exception_o ), .addrgen_illegal_load_o (addrgen_illegal_load ), .addrgen_illegal_store_o (addrgen_illegal_store ), // Interface with the lanes From 2ba3657bd8c961bc24d543a3410b29a5861f73a8 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 2 Dec 2024 16:09:11 +0100 Subject: [PATCH 3/4] [FUNCTIONALITIES] Update Functionalities --- FUNCTIONALITIES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/FUNCTIONALITIES.md b/FUNCTIONALITIES.md index 67c08d9b1..6b0347677 100644 --- a/FUNCTIONALITIES.md +++ b/FUNCTIONALITIES.md @@ -15,6 +15,7 @@ This file specifies the functionalities of the RISC-V Vector Specification suppo - Vector strided stores: `vsse` - Vector indexed loads: `vluxei`, `vloxei` - Vector indexed stores: `vsuxei`, `vsoxei` +- Vector unit-strided fault-only-first loads: `vleff.v` - Vector unit-strided segment loads: `vlsege.v` - Vector unit-strided segment stores: `vssege.v` - Vector non-unit-strided segment loads: `vlssege.v` From fcaa1be06eb2ac19edf16e3608823b38dd8dc48a Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 2 Dec 2024 16:09:19 +0100 Subject: [PATCH 4/4] [CHANGELOG] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bace86a6..919277f9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Add Ara VLSU support for MMU exceptions - Add multi-precision conv3d - Add support for unit-stride, non-unit-stride, indexed segment memory instructions + - Add support for fault-only-first loads ### Changed