Skip to content

Commit

Permalink
hw: CSR-based hardware cluster barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Oct 3, 2023
1 parent 4af25a6 commit 78c36f2
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 100 deletions.
1 change: 1 addition & 0 deletions hw/snitch/src/riscv_instr.sv
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ package riscv_instr;
localparam logic [11:0] CSR_MHARTID = 12'hf14;
localparam logic [11:0] CSR_SSR = 12'h7c0;
localparam logic [11:0] CSR_FPMODE = 12'h7c1;
localparam logic [11:0] CSR_BARRIER = 12'h7c2;
localparam logic [11:0] CSR_HTIMEDELTAH = 12'h615;
localparam logic [11:0] CSR_CYCLEH = 12'hc80;
localparam logic [11:0] CSR_TIMEH = 12'hc81;
Expand Down
22 changes: 19 additions & 3 deletions hw/snitch/src/snitch.sv
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
output fpnew_pkg::fmt_mode_t fpu_fmt_mode_o,
input fpnew_pkg::status_t fpu_status_i,
// Core events for performance counters
output snitch_pkg::core_events_t core_events_o
output snitch_pkg::core_events_t core_events_o,
// Cluster HW barrier
output logic barrier_o,
input logic barrier_i
);
// Debug module's base address
localparam logic [31:0] DmBaseAddress = 0;
Expand Down Expand Up @@ -232,6 +235,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
// -----
logic [31:0] csr_rvalue;
logic csr_en;
logic csr_stall_d, csr_stall_q;

localparam logic M = 0;
localparam logic S = 1;
Expand Down Expand Up @@ -293,6 +297,8 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
`FFNR(dscratch_q, dscratch_d, clk_i)
`FFAR(debug_q, debug_d, '0, clk_i, rst_i) // Debug mode

`FFAR(csr_stall_q, csr_stall_d, '0, clk_i, rst_i)

typedef struct packed {
fpnew_pkg::fmt_mode_t fmode;
fpnew_pkg::roundmode_e frm;
Expand Down Expand Up @@ -398,7 +404,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
| (~{(PPNSize){trans_active}} & {{{AddrWidth-32}{1'b0}}, pc_q[31:PageShift]});
assign inst_addr_o[PageShift-1:0] = pc_q[PageShift-1:0];
assign inst_cacheable_o = snitch_pma_pkg::is_inside_cacheable_regions(SnitchPMACfg, inst_addr_o);
assign inst_valid_o = ~wfi_q;
assign inst_valid_o = ~wfi_q && ~csr_stall_q;

// --------------------
// Control
Expand Down Expand Up @@ -454,7 +460,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
pc_d = pc_q;
npc = pc_q; // the next PC if we wouldn't be in debug mode
// if we got a valid instruction word increment the PC unless we are waiting for an event
if (!stall && !wfi_q) begin
if (!stall && !wfi_q && !csr_stall_q) begin
casez (next_pc)
Consec: npc = consec_pc;
Alu: npc = alu_result & {{31{1'b1}}, ~zero_lsb};
Expand Down Expand Up @@ -2273,6 +2279,11 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
dpc_d = dpc_q;
dscratch_d = dscratch_q;

csr_stall_d = csr_stall_q;

if (barrier_i) csr_stall_d = 1'b0;
barrier_o = 1'b0;

// DPC and DCSR update logic
if (!debug_q) begin
if (valid_instr && inst_data_i == EBREAK) begin
Expand Down Expand Up @@ -2468,6 +2479,11 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
if (!exception) fcsr_d = fcsr_t'(alu_result[9:0]);
end else illegal_csr = 1'b1;
end
// HW cluster barrier
CSR_BARRIER: begin
barrier_o = 1'b1;
csr_stall_d = 1'b1;
end
default: csr_rvalue = '0;
endcase
end else illegal_csr = 1'b1;
Expand Down
85 changes: 21 additions & 64 deletions hw/snitch_cluster/src/snitch_barrier.sv
Original file line number Diff line number Diff line change
@@ -1,80 +1,37 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

// Author: Florian Zaruba <[email protected]>
// Author: Fabian Schuiki <[email protected]>
//
// Author: Luca Colagrande <[email protected]>

`include "common_cells/registers.svh"

/// Hardware barrier to synchronize all cores in a cluster.
module snitch_barrier
import snitch_pkg::*;
import snitch_cluster_peripheral_reg_pkg::*;
#(
parameter int unsigned AddrWidth = 0,
parameter int NrPorts = 0,
parameter type dreq_t = logic,
parameter type drsp_t = logic,
/// Derived parameter *Do not override*
parameter type addr_t = logic [AddrWidth-1:0]
module snitch_barrier #(
parameter int NrCores = 0
) (
input logic clk_i,
input logic rst_ni,
input dreq_t [NrPorts-1:0] in_req_i,
output drsp_t [NrPorts-1:0] in_rsp_o,

output dreq_t [NrPorts-1:0] out_req_o,
input drsp_t [NrPorts-1:0] out_rsp_i,

input addr_t cluster_periph_start_address_i
input logic clk_i,
input logic rst_ni,
input logic [NrCores-1:0] barrier_i,
output logic barrier_o
);

typedef enum logic [1:0] {
Idle,
Wait,
Take
} barrier_state_e;
barrier_state_e [NrPorts-1:0] state_d, state_q;
logic [NrPorts-1:0] is_barrier;
logic take_barrier;
logic [NrCores-1:0] arrival_d, arrival_q;

generate
for (genvar i = 0; i < NrCores; i++) begin : gen_arrival_bit

assign take_barrier = &is_barrier;
`FF(arrival_q[i], arrival_d[i], 1'b0, clk_i, rst_ni)

always_comb begin
state_d = state_q;
is_barrier = '0;
out_req_o = in_req_i;
in_rsp_o = out_rsp_i;
always_comb begin
if (barrier_o) arrival_d[i] = 1'b0;
else if (barrier_i[i]) arrival_d[i] = 1'b1;
else arrival_d[i] = arrival_q[i];
end

for (int i = 0; i < NrPorts; i++) begin
case (state_q[i])
Idle: begin
if (in_req_i[i].q_valid &&
(in_req_i[i].q.addr ==
cluster_periph_start_address_i +
SNITCH_CLUSTER_PERIPHERAL_HW_BARRIER_OFFSET)) begin
state_d[i] = Wait;
out_req_o[i].q_valid = 0;
in_rsp_o[i].q_ready = 0;
end
end
Wait: begin
is_barrier[i] = 1;
out_req_o[i].q_valid = 0;
in_rsp_o[i].q_ready = 0;
if (take_barrier) state_d[i] = Take;
end
Take: begin
if (out_req_o[i].q_valid && in_rsp_o[i].q_ready) state_d[i] = Idle;
end
default: state_d[i] = Idle;
endcase
end
end
endgenerate

for (genvar i = 0; i < NrPorts; i++) begin : gen_ff
`FFARN(state_q[i], state_d[i], Idle, clk_i, rst_ni)
end
assign barrier_o = &arrival_q;

endmodule
9 changes: 7 additions & 2 deletions hw/snitch_cluster/src/snitch_cc.sv
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ module snitch_cc #(
output dma_events_t axi_dma_events_o,
// Core event strobes
output snitch_pkg::core_events_t core_events_o,
input addr_t tcdm_addr_base_i
input addr_t tcdm_addr_base_i,
// Cluster HW barrier
output logic barrier_o,
input logic barrier_i
);

// FMA architecture is "merged" -> mulexp and macexp instructions are supported
Expand Down Expand Up @@ -252,7 +255,9 @@ module snitch_cc #(
.fpu_rnd_mode_o ( fpu_rnd_mode ),
.fpu_fmt_mode_o ( fpu_fmt_mode ),
.fpu_status_i ( fpu_status ),
.core_events_o ( snitch_events)
.core_events_o ( snitch_events),
.barrier_o ( barrier_o ),
.barrier_i ( barrier_i )
);

reqrsp_iso #(
Expand Down
27 changes: 13 additions & 14 deletions hw/snitch_cluster/src/snitch_cluster.sv
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,8 @@ module snitch_cluster
snitch_icache_pkg::icache_events_t [NrCores-1:0] icache_events;

// 4. Memory Subsystem (Core side).
reqrsp_req_t [NrCores-1:0] core_req, filtered_core_req;
reqrsp_rsp_t [NrCores-1:0] core_rsp, filtered_core_rsp;
reqrsp_req_t [NrCores-1:0] core_req;
reqrsp_rsp_t [NrCores-1:0] core_rsp;
reqrsp_req_t [NrHives-1:0] ptw_req;
reqrsp_rsp_t [NrHives-1:0] ptw_rsp;

Expand All @@ -502,6 +502,8 @@ module snitch_cluster
// 5. Misc. Wires.
logic icache_prefetch_enable;
logic [NrCores-1:0] cl_interrupt;
logic [NrCores-1:0] barrier_in;
logic barrier_out;

// -------------
// DMA Subsystem
Expand Down Expand Up @@ -889,7 +891,9 @@ module snitch_cluster
.axi_dma_perf_o (),
.axi_dma_events_o (dma_core_events),
.core_events_o (core_events[i]),
.tcdm_addr_base_i (tcdm_start_address)
.tcdm_addr_base_i (tcdm_start_address),
.barrier_o (barrier_in[i]),
.barrier_i (barrier_out)
);
for (genvar j = 0; j < TcdmPorts; j++) begin : gen_tcdm_user
always_comb begin
Expand Down Expand Up @@ -1000,19 +1004,14 @@ module snitch_cluster
// --------
// Coes SoC
// --------

snitch_barrier #(
.AddrWidth (PhysicalAddrWidth),
.NrPorts (NrCores),
.dreq_t (reqrsp_req_t),
.drsp_t (reqrsp_rsp_t)
.NrCores(NrCores)
) i_snitch_barrier (
.clk_i,
.rst_ni,
.in_req_i (core_req),
.in_rsp_o (core_rsp),
.out_req_o (filtered_core_req),
.out_rsp_i (filtered_core_rsp),
.cluster_periph_start_address_i (cluster_periph_start_address)
.barrier_i(barrier_in),
.barrier_o(barrier_out)
);

reqrsp_req_t core_to_axi_req;
Expand All @@ -1032,8 +1031,8 @@ module snitch_cluster
) i_reqrsp_mux_core (
.clk_i,
.rst_ni,
.slv_req_i (filtered_core_req),
.slv_rsp_o (filtered_core_rsp),
.slv_req_i (core_req),
.slv_rsp_o (core_rsp),
.mst_req_o (core_to_axi_req),
.mst_rsp_i (core_to_axi_rsp),
.idx_o (/*unused*/)
Expand Down
2 changes: 1 addition & 1 deletion sw/deps/riscv-opcodes
Submodule riscv-opcodes updated 1 files
+1 −0 parse_opcodes
11 changes: 2 additions & 9 deletions sw/snRuntime/src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@ static inline uint32_t snrt_cls_base_addr() {
return l1_end_addr - cdata_size - cbss_size;
}

static inline void snrt_crt0_cluster_hw_barrier() {
uint32_t register r;
uint32_t hw_barrier =
SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET;
asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory");
}

static inline void snrt_init_tls() {
extern volatile uint32_t __tdata_start, __tdata_end;
extern volatile uint32_t __tbss_start, __tbss_end;
Expand Down Expand Up @@ -130,7 +123,7 @@ void snrt_main() {
#endif

#ifdef SNRT_CRT0_PRE_BARRIER
snrt_crt0_cluster_hw_barrier();
snrt_cluster_hw_barrier();
#endif

#ifdef SNRT_CRT0_CALLBACK5
Expand All @@ -147,7 +140,7 @@ void snrt_main() {
#endif

#ifdef SNRT_CRT0_POST_BARRIER
snrt_crt0_cluster_hw_barrier();
snrt_cluster_hw_barrier();
#endif

#ifdef SNRT_CRT0_CALLBACK7
Expand Down
7 changes: 1 addition & 6 deletions sw/snRuntime/src/sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,7 @@ inline void snrt_mutex_release(volatile uint32_t *pmtx) {

/// Synchronize cores in a cluster with a hardware barrier
inline void snrt_cluster_hw_barrier() {
uint32_t register r;

asm volatile("lw %0, 0(%1)"
: "=r"(r)
: "r"((uint32_t)snrt_cluster_hw_barrier_addr())
: "memory");
asm volatile("csrr x0, 0x7C2" ::: "memory");
}

/// Synchronize clusters globally with a global software barrier
Expand Down
2 changes: 1 addition & 1 deletion util/generate-opcodes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Solderpad Hardware License, Version 0.51, see LICENSE for details.
# SPDX-License-Identifier: SHL-0.51

# Generate the opcodes for the Snith system.
# Generate the opcodes for the Snitch system.
set -e
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)

Expand Down

0 comments on commit 78c36f2

Please sign in to comment.