Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hardware] Add fault-only-first loads #380

Merged
merged 4 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Add Ara VLSU support for MMU exceptions
- Add multi-precision conv3d
- Add support for unit-stride, non-unit-stride, indexed segment memory instructions
- Add support for fault-only-first loads

### Changed

Expand Down
1 change: 1 addition & 0 deletions FUNCTIONALITIES.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This file specifies the functionalities of the RISC-V Vector Specification suppo
- Vector strided stores: `vsse<eew>`
- Vector indexed loads: `vluxei<eew>`, `vloxei<eew>`
- Vector indexed stores: `vsuxei<eew>`, `vsoxei<eew>`
- Vector unit-strided fault-only-first loads: `vle<eew>ff.v`
- Vector unit-strided segment loads: `vlseg<nf>e<eew>.v`
- Vector unit-strided segment stores: `vsseg<nf>e<eew>.v`
- Vector non-unit-strided segment loads: `vlsseg<nf>e<eew>.v`
Expand Down
3 changes: 2 additions & 1 deletion apps/riscv-tests/isa/rv64uv/Makefrag
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ rv64uv_sc_tests = vaadd \
vslide1down \
vfslide1down \
vl \
vlff \
vlseg \
vlsseg \
vluxseg \
Expand Down Expand Up @@ -180,7 +181,7 @@ rv64uv_sc_tests = vaadd \
vfrec7 \
vfrsqrt7

#rv64uv_sc_tests = vcompress vlff vrgather
#rv64uv_sc_tests = vcompress vrgather

rv64uv_p_tests = $(addprefix rv64uv-p-, $(rv64uv_sc_tests))

Expand Down
44 changes: 22 additions & 22 deletions apps/riscv-tests/isa/rv64uv/vlff.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,69 +10,69 @@
void TEST_CASE1(void) {
VSET(4, e8, m1);
volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0};
__asm__ volatile("vle8ff.v v1, (%0)" ::"r"(INP));
VEC_CMP_8(1, v1, 0xff, 0x00, 0x0f, 0xf0);
asm volatile("vle8ff.v v1, (%0)" ::"r"(INP));
VCMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0);
}

void TEST_CASE2(void) {
VSET(4, e8, m1);
volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0};
VLOAD_8(v0, 0x5, 0x0, 0x0, 0x0);
CLEAR(v1);
__asm__ volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP));
VEC_CMP_8(2, v1, 0xff, 0x00, 0x0f, 0x00);
VCLEAR(v1);
asm volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP));
VCMP_U8(2, v1, 0xff, 0x00, 0x0f, 0x00);
}

void TEST_CASE3(void) {
VSET(3, e16, m1);
volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0};
__asm__ volatile("vle16ff.v v1, (%0)" ::"r"(INP));
VEC_CMP_16(3, v1, 0xffff, 0x0000, 0x0f0f);
asm volatile("vle16ff.v v1, (%0)" ::"r"(INP));
VCMP_U16(3, v1, 0xffff, 0x0000, 0x0f0f);
}

void TEST_CASE4(void) {
VSET(3, e16, m1);
volatile int16_t INP[] = {0xffff, 0x0001, 0x0f0f, 0xf0f0};
VLOAD_16(v0, 0x5, 0x0, 0x0, 0x0);
CLEAR(v1);
__asm__ volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP));
VEC_CMP_16(4, v1, 0xffff, 0x0000, 0x0f0f);
VCLEAR(v1);
asm volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP));
VCMP_U16(4, v1, 0xffff, 0x0000, 0x0f0f);
}

void TEST_CASE5(void) {
VSET(4, e32, m1);
volatile int32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0};
__asm__ volatile("vle32ff.v v1, (%0)" ::"r"(INP));
VEC_CMP_32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0);
asm volatile("vle32ff.v v1, (%0)" ::"r"(INP));
VCMP_U32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0);
}

void TEST_CASE6(void) {
VSET(4, e32, m1);
volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0};
VLOAD_32(v0, 0x5, 0x0, 0x0, 0x0);
CLEAR(v1);
__asm__ volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP));
VEC_CMP_32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0);
VCLEAR(v1);
asm volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP));
VCMP_U32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0);
}

void TEST_CASE7(void) {
VSET(4, e64, m1);
volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000,
0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0};
__asm__ volatile("vle64ff.v v1,(%0)" ::"r"(INP));
VEC_CMP_64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f,
0xdeadbeeff0f0f0f0);
asm volatile("vle64ff.v v1,(%0)" ::"r"(INP));
VCMP_U64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f,
0xdeadbeeff0f0f0f0);
}

void TEST_CASE8(void) {
VSET(4, e64, m1);
volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000,
0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0};
VLOAD_64(v0, 0x5, 0x0, 0x0, 0x0);
CLEAR(v1);
__asm__ volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP));
VEC_CMP_64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f,
0x0000000000000000);
VCLEAR(v1);
asm volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP));
VCMP_U64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f,
0x0000000000000000);
}

int main(void) {
Expand Down
2 changes: 2 additions & 0 deletions hardware/include/ara/ara_typedef.svh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ typedef struct packed {
vlen_t vstart;
rvv_pkg::vtype_t vtype;

logic fault_only_first;

// Hazards
logic [NrVInsn-1:0] hazard_vs1;
logic [NrVInsn-1:0] hazard_vs2;
Expand Down
11 changes: 10 additions & 1 deletion hardware/src/ara.sv
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ module ara import ara_pkg::*; #(
// Number of segments in segment mem op
logic [2:0] nf;

// Is this a fault-only-first load?
logic fault_only_first;

// Rounding-Mode for FP operations
fpnew_pkg::roundmode_e fp_rm;
// Widen FP immediate (re-encoding)
Expand All @@ -152,6 +155,9 @@ module ara import ara_pkg::*; #(
// Instruction triggered an exception
ariane_pkg::exception_t exception;

// Fault-only-first exception on element whose idx > 0
logic fof_exception;

// New value for vstart
vlen_t exception_vstart;
} ara_resp_t;
Expand Down Expand Up @@ -223,6 +229,7 @@ module ara import ara_pkg::*; #(
logic addrgen_ack;
ariane_pkg::exception_t addrgen_exception;
vlen_t addrgen_exception_vstart;
logic addrgen_fof_exception;
logic [NrLanes-1:0] alu_vinsn_done;
logic [NrLanes-1:0] mfpu_vinsn_done;
// Interface with the operand requesters
Expand Down Expand Up @@ -277,7 +284,8 @@ module ara import ara_pkg::*; #(
// Interface with the address generator
.addrgen_ack_i (addrgen_ack ),
.addrgen_exception_i (addrgen_exception ),
.addrgen_exception_vstart_i(addrgen_exception_vstart )
.addrgen_exception_vstart_i(addrgen_exception_vstart),
.addrgen_fof_exception_i(addrgen_fof_exception)
);

// Scalar move support
Expand Down Expand Up @@ -482,6 +490,7 @@ module ara import ara_pkg::*; #(
.addrgen_ack_o (addrgen_ack ),
.addrgen_exception_o (addrgen_exception ),
.addrgen_exception_vstart_o (addrgen_exception_vstart ),
.addrgen_fof_exception_o (addrgen_fof_exception ),
// Interface with the Mask unit
.mask_i (mask ),
.mask_valid_i (mask_valid ),
Expand Down
13 changes: 9 additions & 4 deletions hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -2628,8 +2628,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vtype.vsew = EW8;
end
5'b10000: begin // Unit-strided, fault-only first
// TODO: Not implemented
illegal_insn_load = 1'b1;
ara_req.fault_only_first = 1'b1;
end
default: begin // Reserved
illegal_insn_load = 1'b1;
Expand Down Expand Up @@ -2765,8 +2764,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.exception = ara_resp.exception;
ara_req_valid = 1'b0;
// In case of exception, modify vstart
if ( ara_resp.exception.valid ) begin
// In case of exception, modify vstart or vl, depending if the insn
// was a fault-only-first
if (ara_resp.fof_exception) begin
csr_vl_d = ara_resp.exception_vstart;
// Mask exception if we had a fault-only-first with exception on
// idx > 0
acc_resp_o.exception.valid = 1'b0;
end else if (ara_resp.exception.valid) begin
csr_vstart_d = ara_resp.exception_vstart;
end
end
Expand Down
4 changes: 3 additions & 1 deletion hardware/src/ara_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Interface with the Address Generation
input logic addrgen_ack_i,
input ariane_pkg::exception_t addrgen_exception_i,
input vlen_t addrgen_exception_vstart_i
input vlen_t addrgen_exception_vstart_i,
input logic addrgen_fof_exception_i
);

///////////////////////////////////
Expand Down Expand Up @@ -525,6 +526,7 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
ara_resp_valid_o = 1'b1;
ara_resp_o.exception = addrgen_exception_i;
ara_resp_o.exception_vstart = addrgen_exception_vstart_i;
ara_resp_o.fof_exception = addrgen_fof_exception_i;
end

// Wait for the scalar result
Expand Down
21 changes: 17 additions & 4 deletions hardware/src/vlsu/addrgen.sv
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
output ariane_pkg::exception_t addrgen_exception_o,
output logic addrgen_ack_o,
output vlen_t addrgen_exception_vstart_o,
output logic addrgen_fof_exception_o, // fault-only-first
output logic addrgen_illegal_load_o,
output logic addrgen_illegal_store_o,
// Interface with the load/store units
Expand Down Expand Up @@ -108,6 +109,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
logic [1:0] vew; // Support only up to 64-bit
logic is_load;
logic is_burst; // Unit-strided instructions can be converted into AXI INCR bursts
logic fault_only_first; // Fault-only-first instruction
vlen_t vstart;
} addrgen_req_t;
addrgen_req_t addrgen_req;
Expand Down Expand Up @@ -226,9 +228,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(

// Nothing to acknowledge
addrgen_ack_o = 1'b0;
addrgen_exception_o.valid = 1'b0;
addrgen_exception_o.tval = '0;
addrgen_exception_o.cause = '0;
addrgen_exception_o = '0;
addrgen_illegal_load_o = 1'b0;
addrgen_illegal_store_o = 1'b0;

Expand Down Expand Up @@ -327,6 +327,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
is_load : is_load(pe_req_q.op),
// Unit-strided loads/stores trigger incremental AXI bursts.
is_burst: (pe_req_q.op inside {VLE, VSE}),
fault_only_first: pe_req_q.fault_only_first,
vstart : pe_req_q.vstart
};

Expand Down Expand Up @@ -375,6 +376,7 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
is_load : is_load(pe_req_q.op),
// Unit-strided loads/stores trigger incremental AXI bursts.
is_burst: 1'b0,
fault_only_first: 1'b0,
vstart : pe_req_q.vstart
};
addrgen_req_valid = 1'b1;
Expand Down Expand Up @@ -658,6 +660,9 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
mmu_vaddr_o = '0;
mmu_is_store_o = 1'b0;

// No fault-only-first exception idx != 0 by default
addrgen_fof_exception_o = 1'b0;

// For addrgen FSM
last_translation_completed = 1'b0;

Expand Down Expand Up @@ -1002,7 +1007,15 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
is_load : axi_addrgen_q.is_load,
is_exception : 1'b1
};
axi_addrgen_queue_push = 1'b1;
// Don't take trap if fault-only-first and exception is on element whose idx > 0
axi_addrgen_queue_push = ~(axi_addrgen_q.fault_only_first
& (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew)));

// If fault-only-first and the idx > 0, this exception is special and does not trap
// Inform the dispatcher to effectively modify vl and not vstart
if (pe_req_q.vl != (axi_addrgen_q.len >> axi_addrgen_q.vew)) begin
addrgen_fof_exception_o = axi_addrgen_q.fault_only_first;
end

// Set vstart: vl minus how many elements we have left
// NOTE: this added complexity only comes from the fact that the beat counting
Expand Down
2 changes: 2 additions & 0 deletions hardware/src/vlsu/vlsu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
output logic addrgen_ack_o,
output ariane_pkg::exception_t addrgen_exception_o,
output vlen_t addrgen_exception_vstart_o,
output logic addrgen_fof_exception_o,
// Interface with the lanes
// Store unit operands
input elen_t [NrLanes-1:0] stu_operand_i,
Expand Down Expand Up @@ -167,6 +168,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
.addrgen_ack_o (addrgen_ack_o ),
.addrgen_exception_o ( addrgen_exception_o ),
.addrgen_exception_vstart_o ( addrgen_exception_vstart_o),
.addrgen_fof_exception_o ( addrgen_fof_exception_o ),
.addrgen_illegal_load_o (addrgen_illegal_load ),
.addrgen_illegal_store_o (addrgen_illegal_store ),
// Interface with the lanes
Expand Down
Loading