diff --git a/hw/occamy/occamy_quadrant_s1.sv.tpl b/hw/occamy/occamy_quadrant_s1.sv.tpl index df21e6ccb..a8511d90f 100644 --- a/hw/occamy/occamy_quadrant_s1.sv.tpl +++ b/hw/occamy/occamy_quadrant_s1.sv.tpl @@ -66,7 +66,9 @@ module ${name}_quadrant_s1 `AXI_TLB_TYPEDEF_ALL(tlb, logic [AddrWidth-12-1:0], logic [AddrWidth-12-1:0]) // Signals from Controller - logic clk_quadrant, rst_quadrant_n; + logic clk_quadrant_uncore, rst_quadrant_n; + logic [${nr_clusters-1}:0] clk_quadrant_cluster; + logic [3:0] isolate, isolated; logic ro_enable, ro_flush_valid, ro_flush_ready; logic [${ro_cache_regions-1}:0][${soc_wide_xbar.in_quadrant_0.aw-1}:0] ro_start_addr, ro_end_addr; @@ -93,7 +95,7 @@ module ${name}_quadrant_s1 .declare(context) narrow_cluster_in_ctrl \ .cut(context, cuts_narrx_with_ctrl) \ - .isolate(context, "isolate[0]", "narrow_cluster_in_isolate", isolated="isolated[0]", terminated=True, to_clk="clk_quadrant", to_rst="rst_quadrant_n", num_pending=narrow_trans) \ + .isolate(context, "isolate[0]", "narrow_cluster_in_isolate", isolated="isolated[0]", terminated=True, to_clk="clk_quadrant_uncore", to_rst="rst_quadrant_n", num_pending=narrow_trans) \ .change_iw(context, narrow_xbar_quadrant_s1.in_top.iw, "narrow_cluster_in_iwc", to=narrow_xbar_quadrant_s1.in_top) %> @@ -172,7 +174,7 @@ module ${name}_quadrant_s1 .copy(name="wide_cluster_in_iwc") \ .declare(context) \ .cut(context, cuts_wideiwc_with_wideout) \ - .isolate(context, "isolate[2]", "wide_cluster_in_isolate", isolated="isolated[2]", terminated=True, atop_support=False, to_clk="clk_quadrant", to_rst="rst_quadrant_n", num_pending=wide_trans) \ + .isolate(context, "isolate[2]", "wide_cluster_in_isolate", isolated="isolated[2]", terminated=True, atop_support=False, to_clk="clk_quadrant_uncore", to_rst="rst_quadrant_n", num_pending=wide_trans) \ .cut(context, cuts_wideisolate_with_wideiwc_in) \ .change_iw(context, wide_xbar_quadrant_s1.in_top.iw, "wide_cluster_in_iwc", to=wide_xbar_quadrant_s1.in_top) %> @@ -190,7 +192,8 @@ module ${name}_quadrant_s1 .rst_ni, .test_mode_i, .chip_id_i, - .clk_quadrant_o (clk_quadrant), + .clk_quadrant_uncore_o (clk_quadrant_uncore), + .clk_quadrant_cluster_o (clk_quadrant_cluster), .rst_quadrant_no (rst_quadrant_n), .isolate_o (isolate), .isolated_i (isolated), @@ -235,7 +238,7 @@ module ${name}_quadrant_s1 assign hart_base_id_${i} = HartIdOffset + NrCoresClusterOffset[${i}]; ${cluster_name}_wrapper i_${name}_cluster_${i} ( - .clk_i (clk_quadrant), + .clk_i (clk_quadrant_cluster[${i}]), .rst_ni (rst_quadrant_n), .meip_i (meip_i[NrCoresClusterOffset[${i}]+:NrCoresCluster[${i}]]), .mtip_i (mtip_i[NrCoresClusterOffset[${i}]+:NrCoresCluster[${i}]]), diff --git a/hw/occamy/occamy_quadrant_s1_ctrl.sv.tpl b/hw/occamy/occamy_quadrant_s1_ctrl.sv.tpl index a38f58d97..d8409aee6 100644 --- a/hw/occamy/occamy_quadrant_s1_ctrl.sv.tpl +++ b/hw/occamy/occamy_quadrant_s1_ctrl.sv.tpl @@ -22,7 +22,8 @@ module ${name}_quadrant_s1_ctrl input chip_id_t chip_id_i, // Quadrant clock and reset - output logic clk_quadrant_o, + output logic [${num_clusters-1}:0] clk_quadrant_cluster_o, + output logic clk_quadrant_uncore_o, output logic rst_quadrant_no, // Quadrant control signals @@ -157,12 +158,22 @@ module ${name}_quadrant_s1_ctrl % endif // Quadrant clock gate controlled by register - tc_clk_gating i_tc_clk_gating_quadrant ( + % for cluster_idx in range(num_clusters): + tc_clk_gating i_tc_clk_gating_quadrant_cluster_${cluster_idx} ( .clk_i, - .en_i (reg2hw.clk_ena.q), + .en_i (reg2hw.clk_ena.ena_cluster_${cluster_idx}.q), .test_en_i (test_mode_i), - .clk_o (clk_quadrant_o) + .clk_o (clk_quadrant_cluster_o[${cluster_idx}]) ); + % endfor + + tc_clk_gating i_tc_clk_gating_quadrant_cluster_uncore ( + .clk_i, + .en_i (reg2hw.clk_ena.ena_quad_uncore.q), + .test_en_i (test_mode_i), + .clk_o (clk_quadrant_uncore_o) + ); + // Reset directly from register (i.e. (de)assertion inherently synchronized) // Multiplex with glitchless multiplexor, top reset for testing purposes diff --git a/hw/occamy/quadrant_s1_ctrl/occamy_quadrant_s1_reg.hjson.tpl b/hw/occamy/quadrant_s1_ctrl/occamy_quadrant_s1_reg.hjson.tpl index 8029cbecc..ba6695ead 100644 --- a/hw/occamy/quadrant_s1_ctrl/occamy_quadrant_s1_reg.hjson.tpl +++ b/hw/occamy/quadrant_s1_ctrl/occamy_quadrant_s1_reg.hjson.tpl @@ -17,7 +17,10 @@ hwaccess: "hro", // Clock disabled (i.e. gated) by default fields: [ - {bits: "0:0", name: "clk_ena", resval: 0, desc: "Clock gate enable"} +% for cluster_idx in range(num_clusters): + {bits: "${cluster_idx}:${cluster_idx}", name: "ena_cluster_${cluster_idx}", resval: 0, desc: "Clock gate enable for cluster ${cluster_idx}"}, +% endfor + {bits: "${num_clusters}:${num_clusters}", name: "ena_quad_uncore", resval: 0, desc: "Clock gate enable for cluster un-core"} ], }, { name: "RESET_N", diff --git a/target/sim/sw/host/apps/offload/src/offload.c b/target/sim/sw/host/apps/offload/src/offload.c index 20a0cf2ae..7f8b9c6d9 100644 --- a/target/sim/sw/host/apps/offload/src/offload.c +++ b/target/sim/sw/host/apps/offload/src/offload.c @@ -14,7 +14,7 @@ int main() { print_str(address_prefix, "[Occamy] Current Chip ID is: "); print_u8(address_prefix, chip_id); print_str(address_prefix, "\r\n"); - reset_and_ungate_quadrants(chip_id); + reset_and_ungate_quadrants_all(chip_id); print_str(address_prefix, "[Occamy] Snitch ungated. \r\n"); deisolate_all(chip_id); print_str(address_prefix, "[Occamy] Snitch deisolated. \r\n"); diff --git a/target/sim/sw/host/runtime/host.c b/target/sim/sw/host/runtime/host.c index 47327bea9..46c228a82 100644 --- a/target/sim/sw/host/runtime/host.c +++ b/target/sim/sw/host/runtime/host.c @@ -303,13 +303,17 @@ static inline volatile uint32_t* get_shared_lock() { //=============================================================== static inline void set_clk_ena_quad(uint8_t chip_id, uint32_t quad_idx, - uint32_t value) { + uint32_t value, uint32_t cluster_clk_enable_mask) { uint32_t* clk_ena_ptr = (uint32_t*)((uintptr_t)quad_cfg_clk_ena_ptr(quad_idx) | (uintptr_t)get_chip_baseaddress(chip_id)); - *clk_ena_ptr = value & 0x1; + *clk_ena_ptr = value & cluster_clk_enable_mask; } +// static inline void set_clk_ena_quad(uint32_t quad_idx, uint32_t value) { +// *quad_cfg_clk_ena_ptr(quad_idx) = value & 0x1; +// } + static inline void set_reset_n_quad(uint8_t chip_id, uint32_t quad_idx, uint32_t value) { uint32_t* reset_n_ptr = @@ -319,18 +323,21 @@ static inline void set_reset_n_quad(uint8_t chip_id, uint32_t quad_idx, } static inline void reset_and_ungate_quad(uint8_t chip_id, - uint32_t quadrant_idx) { + uint32_t quadrant_idx, uint32_t cluster_clk_enable_mask) { set_reset_n_quad(chip_id, quadrant_idx, 0); - set_clk_ena_quad(chip_id, quadrant_idx, 0); + set_clk_ena_quad(chip_id, quadrant_idx, 0, cluster_clk_enable_mask); __asm__ __volatile__("fence" ::: "memory"); - set_reset_n_quad(chip_id, quadrant_idx, 1); - set_clk_ena_quad(chip_id, quadrant_idx, 1); + set_reset_n_quad(chip_id, quadrant_idx, 0xFFFFFFFF); + set_clk_ena_quad(chip_id, quadrant_idx, 0xFFFFFFFF, cluster_clk_enable_mask); } -static inline void reset_and_ungate_quadrants(uint8_t chip_id) { - for (int i = 0; i < N_QUADS; i++) reset_and_ungate_quad(chip_id, i); +static inline void reset_and_ungate_quadrants(uint8_t chip_id, uint32_t cluster_clk_enable_mask) { + for (int i = 0; i < N_QUADS; i++) reset_and_ungate_quad(chip_id, i, cluster_clk_enable_mask); } +static inline void reset_and_ungate_quadrants_all(uint8_t chip_id) { + for (int i = 0; i < N_QUADS; i++) reset_and_ungate_quad(chip_id, i, 0xFFFF); +} //=============================================================== // Interrupts //=============================================================== diff --git a/util/occamygen/occamy.py b/util/occamygen/occamy.py index 593b37565..bd2c5de17 100644 --- a/util/occamygen/occamy.py +++ b/util/occamygen/occamy.py @@ -438,6 +438,7 @@ def get_soc_kwargs(occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xba def get_quadrant_ctrl_kwargs(occamy_cfg, soc_wide_xbar, soc_narrow_xbar, quadrant_s1_ctrl_xbars, quadrant_s1_ctrl_mux, name): + num_clusters = len(occamy_cfg["clusters"]) ro_cache_cfg = occamy_cfg["s1_quadrant"].get("ro_cache_cfg", {}) ro_cache_regions = ro_cache_cfg.get("address_regions", 1) narrow_tlb_cfg = occamy_cfg["s1_quadrant"].get("narrow_tlb_cfg", {}) @@ -448,6 +449,7 @@ def get_quadrant_ctrl_kwargs(occamy_cfg, soc_wide_xbar, soc_narrow_xbar, quadran quadrant_ctrl_kwargs = { "name": name, "occamy_cfg": occamy_cfg, + "num_clusters": num_clusters, "ro_cache_cfg": ro_cache_cfg, "ro_cache_regions": ro_cache_regions, "narrow_tlb_cfg": narrow_tlb_cfg, diff --git a/util/occamygen/occamygen.py b/util/occamygen/occamygen.py index 38b8bed38..4d5238054 100755 --- a/util/occamygen/occamygen.py +++ b/util/occamygen/occamygen.py @@ -444,7 +444,7 @@ def main(): occamy_cfg["s1_quadrant"]["wide_xbar_slv_id_width"], chipidw=occamy_cfg["hemaia_multichip"]["chip_id_width"], name="wide_xbar_quadrant_s1", - clk="clk_quadrant", + clk="clk_quadrant_uncore", rst="rst_quadrant_n", max_slv_trans=occamy_cfg["s1_quadrant"]["wide_xbar"]["max_slv_trans"], max_mst_trans=occamy_cfg["s1_quadrant"]["wide_xbar"]["max_mst_trans"], @@ -461,7 +461,7 @@ def main(): occamy_cfg["s1_quadrant"]["narrow_xbar_user_width"], chipidw=occamy_cfg["hemaia_multichip"]["chip_id_width"], name="narrow_xbar_quadrant_s1", - clk="clk_quadrant", + clk="clk_quadrant_uncore", rst="rst_quadrant_n", max_slv_trans=occamy_cfg["s1_quadrant"]["narrow_xbar"] ["max_slv_trans"],