diff --git a/sw/apps/transformer/src/transformer.h b/sw/apps/transformer/src/transformer.h index 8b5bb7bf1..a19a56a96 100644 --- a/sw/apps/transformer/src/transformer.h +++ b/sw/apps/transformer/src/transformer.h @@ -942,17 +942,23 @@ static inline void transformer_layer_fp64(transformer_layer_fp64_t *const l) { // now we will add the partial results together // in a logarithmic reduction fashion - uint32_t cluster_offset = 0x40000; + uint32_t cl_offset = 0x40000; // num_levels: number of levels in the reduction tree int num_levels = (int)log2(num_cores); for (int level = 0; level < num_levels; level++) { + // determine whether the current core is an active core + uint32_t is_active = (compute_id % (1 << level)) == 0; + dump_id(level); // CSR 5 + dump_idx(is_active); // CSR 6 + // every core adds its partial result to the result of the core // that is 2^level away from it, except for the last core after // the first iteration uint32_t src_core_id = compute_id; uint32_t dst_core_id = compute_id + (1 << level); - dump_idx(src_core_id); - dump_idx(dst_core_id); + // dump_idx(src_core_id); + // dump_idx(dst_core_id); + } }