Skip to content

Commit

Permalink
Extend layout utils to accept HW config as input
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Oct 28, 2023
1 parent 13acfd9 commit 0875bd0
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 11 deletions.
6 changes: 3 additions & 3 deletions sw/dnn/layernorm/layout.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
, setup, dma in, compute tile, dma out, dma in, compute tile, dma out
"range(0,8)", 1, , 3, , , 5,
8 , 1, 2, , 4, 5, , 7
, setup, dma in, compute tile, dma out, dma in, compute tile, dma out
"[i*9+j+cfg['cluster']['cluster_base_hartid'] for i in range(cfg['s1_quadrant']['nr_clusters']) for j in range(8)]", 1, , 3, , , 5,
"[i*9+8+cfg['cluster']['cluster_base_hartid'] for i in range(cfg['s1_quadrant']['nr_clusters'])]" , 1, 2, , 4, 5, , 7
9 changes: 7 additions & 2 deletions sw/dnn/layernorm/src/layernorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@ static inline void layernorm_fp32(float *input, float *output,
// layernorm_fp32(input, input, ldI, 0, 1, seq_len, embeddings, eps);
// }

// Tiles the seq_len axis
// Tiles the seq_len axis (assumes seq_len is an integer multiple of n_tiles)
// Distributes tiles to clusters (assumes n_tiles is an integer multiple of
// the number of clusters)
static inline void layernorm_layer(layernorm_layer_t l) {
snrt_mcycle();

// Compute the tiling parameters
uint32_t n_tiles = l.n_tiles;
uint32_t n_tiles_per_cluster = l.n_tiles / snrt_cluster_num();
uint32_t tile_seq_len = l.seq_len / n_tiles;
uint32_t tile_size = l.batch_size * tile_seq_len * l.embeddings;
uint32_t tile_offset = tile_seq_len * l.embeddings;
Expand All @@ -130,7 +133,9 @@ static inline void layernorm_layer(layernorm_layer_t l) {

// Iterate tiles
snrt_mcycle();
for (int tile_idx = 0; tile_idx < n_tiles; tile_idx++) {
for (uint32_t cluster_tile_idx = 0; cluster_tile_idx < n_tiles_per_cluster; cluster_tile_idx++) {
// Calculate absolute tile index
uint32_t tile_idx = snrt_cluster_idx() * n_tiles_per_cluster + cluster_tile_idx;
// Copy input tile
if (snrt_is_dm_core()) {
float *remote_itile = remote_ifmap + tile_idx * tile_offset;
Expand Down
3 changes: 2 additions & 1 deletion target/common/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ VLT_FLAGS += --unroll-count 1024
VLT_CFLAGS += -std=c++14 -pthread
VLT_CFLAGS +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test

ANNOTATE_FLAGS ?= -q --keep-time
ANNOTATE_FLAGS ?= -q --keep-time
LAYOUT_EVENTS_FLAGS ?= --cfg=$(CFG)

# We need a recent LLVM installation (>11) to compile Verilator.
# We also need to link the binaries with LLVM's libc++.
Expand Down
17 changes: 12 additions & 5 deletions util/trace/layout_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import csv
import pandas as pd
from math import isnan
import hjson


def main():
Expand All @@ -55,10 +56,9 @@ def main():
metavar='<layout>',
help='Layout CSV file')
parser.add_argument(
'--num-clusters',
type=int,
default=1,
help='Number of clusters')
'--cfg',
type=str,
help='System configuration .hjson file')
parser.add_argument(
'-o',
'--output',
Expand All @@ -71,6 +71,11 @@ def main():
# Read input CSV
df = pd.read_csv(args.csv)

# Read system configuration .hjson file
cfg = None
with open(args.cfg) as cfg_file:
cfg = hjson.load(cfg_file)

# Output CSV data
data = []
columns = []
Expand All @@ -92,7 +97,9 @@ def main():
# which generates a list of hart IDs
expr = row[0]
code = compile(expr, "<string>", "eval")
tids = eval(code, {}, {'num_clusters': args.num_clusters})
# Symbols must be added to globals to be used in list comprehensions
# see https://bugs.python.org/issue36300
tids = eval(code, {'cfg': cfg}, {'cfg': cfg})
if type(tids) == int:
tids = [tids]

Expand Down

0 comments on commit 0875bd0

Please sign in to comment.