Skip to content

Commit

Permalink
transformer: debug multi cluster log reduction
Browse files Browse the repository at this point in the history
  • Loading branch information
Viviane Potocnik committed Oct 22, 2023
1 parent f360af0 commit ec99160
Showing 1 changed file with 48 additions and 14 deletions.
62 changes: 48 additions & 14 deletions sw/apps/transformer/src/transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#define LAYERNORM 0 // enable this define to compute the layernorm
#define LINEAR_1 0 // enable this define to compute the linear layer 1
#define FLASH_ATTENTION 0 // enable this define to compute the flash attention
#define CONCAT 1 // enable this define to compute the head concatenation
#define CONCAT 0 // enable this define to compute the head concatenation

/**
* @struct transformer_layer_fp64_struct
Expand Down Expand Up @@ -240,6 +240,7 @@ dump_uint(ct, 11);
*/
static inline void transformer_layer_fp64(transformer_layer_fp64_t *const l) {
uint32_t compute_id = snrt_global_core_idx();
uint32_t cluster_id = snrt_cluster_idx();
uint32_t num_cores = snrt_cluster_compute_core_num();
uint32_t num_clusters = snrt_cluster_num();

Expand All @@ -253,6 +254,36 @@ static inline void transformer_layer_fp64(transformer_layer_fp64_t *const l) {
// dump_id(num_cores);
dump_id(num_clusters);

// TODO: below code only for debugging atm
// now we will add the partial results together
// in a logarithmic reduction fashion
uint32_t cl_offset = 0x40000;
uint32_t is_active = 0;
uint32_t is_sender = 0;
// num_levels: number of levels in the reduction tree
int num_levels = ceil(log2(num_clusters));
// stride: distance between two clusters in the reduction tree
for (int level = 0; level < num_levels; level++) {
// determine whether the current cluster is an active cluster
is_active = (cluster_id % (1 << level)) == 0;
if (is_active == 1) {
// the first cluster is always a receiver
// the last cluster is only in the second to last level a sender
if (cluster_id == 0) {
is_sender = 0;
} else if ((cluster_id == num_clusters - 1) && (level == num_levels - 1)) {
is_sender = 1;
} else {
is_sender = (cluster_id % (1 << (level + 1))) == 0;
}

dump_id(level);
dump_ct(is_active);
dump_idx(is_sender);
}

}

/////////////////////////////////////////////////////////////////////
////////////// MULTI-HEAD SELF-ATTENTION BLOCK /////////////////
///////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -943,21 +974,24 @@ static inline void transformer_layer_fp64(transformer_layer_fp64_t *const l) {
// now we will add the partial results together
// in a logarithmic reduction fashion
uint32_t cl_offset = 0x40000;
uint32_t is_active = 0;
uint32_t is_sender = 0;
// num_levels: number of levels in the reduction tree
int num_levels = (int)log2(num_cores);
int num_levels = (int)log2(num_clusters);
// stride: distance between two clusters in the reduction tree
for (int level = 0; level < num_levels; level++) {
// determine whether the current core is an active core
uint32_t is_active = (compute_id % (1 << level)) == 0;
dump_id(level); // CSR 5
dump_idx(is_active); // CSR 6

// every core adds its partial result to the result of the core
// that is 2^level away from it, except for the last core after
// the first iteration
uint32_t src_core_id = compute_id;
uint32_t dst_core_id = compute_id + (1 << level);
// dump_idx(src_core_id);
// dump_idx(dst_core_id);
// determine whether the current cluster is an active cluster
is_active = (cluster_id % (1 << level)) == 0;
if (is_active == 1) {
if (cluster_id == 0) {
is_sender = 0;
} else {
is_sender = (cluster_id % (1 << (level + 1))) == 0;
}

// the last cluster
}

}

}
Expand Down

0 comments on commit ec99160

Please sign in to comment.