diff --git a/.github/workflows/build-run-kernel-snake.yml b/.github/workflows/build-run-kernel-snake.yml index d3af9d1f..7b509cc6 100644 --- a/.github/workflows/build-run-kernel-snake.yml +++ b/.github/workflows/build-run-kernel-snake.yml @@ -20,4 +20,4 @@ jobs: working-directory: kernels/${{ matrix.kernel }} strategy: matrix: - kernel: [alloc, simple_copy, transform_copy, gemm, rescale, gemmini, streamer_alu, streamer_matmul] + kernel: [alloc, simple_copy, transform_copy, gemm, rescale, gemmini, streamer_alu, streamer_matmul, tiled_add] diff --git a/.github/workflows/build-run-kernel.yml b/.github/workflows/build-run-kernel.yml deleted file mode 100644 index 3cb113ef..00000000 --- a/.github/workflows/build-run-kernel.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Build and run kernels - -on: - push: - branches: - - main - pull_request: - -jobs: - build-and-run-kernels: - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v3 - - uses: prefix-dev/setup-pixi@v0.8.1 - with: - cache: true - cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - - name: Build and run kernels - run: pixi run make allrun - working-directory: kernels/${{ matrix.kernel }} - strategy: - matrix: - kernel: [tiled_add] diff --git a/kernels/gemm/gendata.py b/kernels/gemm/gendata.py index aa530748..063c2bb0 100755 --- a/kernels/gemm/gendata.py +++ b/kernels/gemm/gendata.py @@ -39,5 +39,5 @@ def create_data_files(): "D_golden": D_golden, } - create_header("data.h", sizes, variables) - create_data("data.c", variables) + create_header("data", sizes, variables) + create_data("data", variables) diff --git a/kernels/rescale/gendata.py b/kernels/rescale/gendata.py index ee605de3..04ca03a5 100644 --- a/kernels/rescale/gendata.py +++ b/kernels/rescale/gendata.py @@ -69,5 +69,5 @@ def create_data_files(): sizes = {"MODE": 0, "DATA_LEN": data_len} variables = {"A": A, "O": O, "G": G} - create_header("data.h", sizes, variables) - create_data("data.c", variables) + create_header("data", sizes, variables) + create_data("data", variables) diff --git a/kernels/simple_copy/gendata.py b/kernels/simple_copy/gendata.py index c97ecd6f..b70f6bcd 100644 --- a/kernels/simple_copy/gendata.py +++ b/kernels/simple_copy/gendata.py @@ -8,5 +8,5 @@ def create_data_files(): A = np.linspace(1, array_size, array_size, dtype=np.int32) sizes = {"N": array_size} variables = {"A": A} - create_header("data.h", sizes, variables) - create_data("data.c", variables) + create_header("data", sizes, variables) + create_data("data", variables) diff --git a/kernels/streamer_alu/gendata.py b/kernels/streamer_alu/gendata.py index 590113c5..e83b8c4a 100644 --- a/kernels/streamer_alu/gendata.py +++ b/kernels/streamer_alu/gendata.py @@ -29,5 +29,5 @@ def create_data_files(): sizes = {"MODE": 0, "DATA_LEN": array_size, "LOOP_ITER": loop_iter} variables = {"A": A, "B": B, "O": O, "G": G} - create_header("data.h", sizes, variables) - create_data("data.c", variables) + create_header("data", sizes, variables) + create_data("data", variables) diff --git a/kernels/tiled_add/Makefile b/kernels/tiled_add/Makefile deleted file mode 100644 index 0d471d29..00000000 --- a/kernels/tiled_add/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# Courtesy of Federico Ficarelli - -.DEFAULT_GOAL := all - -include ../../runtime/snax-alu.rules -include ../../runtime/Makefile.rules - -TESTS = -TESTS += untiled.acc_dialect.x -TESTS += tiled.acc_dialect.x -TESTS += tiled_pipelined.acc_dialect.x - -CFLAGS += -std=gnu11 -CFLAGS += -Wall -Wextra -g - -MLIRPREPROCFLAGS += --allow-unregistered-dialect -MLIRPREPROC2FLAGS += --allow-unregistered-dialect -MLIRPREPROC3FLAGS += --allow-unregistered-dialect - -ifdef NO_CHECK - CFLAGS += -DNO_CHECK -endif - - -ifdef ACCFGOPT - ACCFGOPTFLAGS="mlir-opt{executable=mlir-opt generic=true arguments=-cse,-canonicalize,-allow-unregistered-dialect,-mlir-print-op-generic}",accfg-dedup,accfg-config-overlap, -endif - -# Override snax-opt rules to avoid linalg-to-library-call pass -SNAXOPTACCFLAGS = -p insert-accfg-op{accelerator=snax_alu},convert-linalg-to-accfg,${ACCFGOPTFLAGS}convert-accfg-to-csr,snax-copy-to-dma,memref-to-snax,snax-to-func,clear-memory-space,snax-lower-mcycle - -# Set default -ARRAY_SIZE ?= 2048 -TILE_SIZE ?= 128 - -%.preprocfinal.sized.mlir: %.preprocfinal.mlir - cat $< | sed -E 's/128/${ARRAY_SIZE}/gm;t' | sed -E 's/%tile_size = arith.constant 16 : index/%tile_size = arith.constant ${TILE_SIZE} : index/gm;t' > $@ - -%.acc_dialect.snax-opt.mlir: %.preprocfinal.sized.mlir - $(SNAXOPT) $(SNAXOPTACCFLAGS) -o $@ $< - -data.c data.h: - $(PYTHON) gendata.py --array_size=${ARRAY_SIZE} - -%.x: %.o main.o data.o - $(LD) $(LDFLAGS) $^ -o $@ - -sim_%: % - rm -fr ./logs/ - $(VLTSIM) $< - -RUN = $(addprefix run_, $(TESTS)) -$(RUN): run_%: sim_% - mv logs $(subst sim_,,$<).logs - -all: $(TESTS) - -allrun: $(RUN) - -clean: - rm -fr *.ll12 *.x *.o *.logs/ logs/ data.h data.c diff --git a/kernels/tiled_add/Snakefile b/kernels/tiled_add/Snakefile new file mode 100644 index 00000000..50bd0516 --- /dev/null +++ b/kernels/tiled_add/Snakefile @@ -0,0 +1,148 @@ +from util.snake.configs import get_snax_alu_config + +config = get_snax_alu_config() + +# Untiled example needs preprocessing: +for entry in config["mlirpreprocflags"]: + entry.append("--allow-unregistered-dialect") + + +def get_snax_opt_flags(options): + flags = [] + match options: + case "deduponly": + flags = ["accfg-dedup"] + case "overlaponly": + flags = ["accfg-config-overlap"] + case "accfgboth": + flags = ["accfg-dedup", "accfg-config-overlap"] + return ",".join( + [ + "insert-accfg-op{accelerator=snax_alu}", + "convert-linalg-to-accfg", + "mlir-opt{" + + "\ ".join( + [ + "executable=mlir-opt", + "generic=true", + "arguments='" + + ",".join( + [ + "-cse", + "-canonicalize", + "-allow-unregistered-dialect", + "-mlir-print-op-generic", + ] + ), + ] + ) + + "'}", + *flags, + "convert-accfg-to-csr", + "snax-copy-to-dma", + "memref-to-snax", + "snax-to-func", + "clear-memory-space", + "snax-lower-mcycle", + ] + ) + + +module default_rules: + snakefile: + "../../util/snake/default_rules.smk" + config: + config + + +use rule * from default_rules as default_* + + +rule size_mlir: + input: + "{file}.preprocfinal.mlir", + output: + temp("{file}_{array_size}_{tile_size}.preprocfinal.sized.mlir"), + params: + sed_script_array_size=lambda wildcards: f"'s/128/{wildcards.array_size}/gm;t'", + sed_script_tile_size=lambda wildcards: f"'s/%tile_size = arith.constant 16 : index/%tile_size = arith.constant {wildcards.tile_size} : index/gm;t'", + shell: + "cat {input} |" + "sed -E {params.sed_script_array_size} |" + "sed -E {params.sed_script_tile_size}" + "> {output} " + + +rule snax_compile_mlir: + input: + "{file}.preprocfinal.sized.mlir", + output: + temp("{file}_{compiler_opt}.snax-opt.mlir"), + wildcard_constraints: + compiler_opt="noaccfgopt|deduponly|overlaponly|accfgboth", + params: + # hardcoded to no accfg for now + snax_opt_flags=lambda wildcards: get_snax_opt_flags(wildcards.compiler_opt), + shell: + "{config[snax-opt]} -p {params.snax_opt_flags} -o {output} {input}" + + +rule compile_main: + input: + "main_{array_size}.rendered.c", + "data_{array_size}.o", + output: + temp("main_{array_size}.o"), + shell: + "{config[cc]} {config[cflags]} -c {input} -o {output}" + + +rule compile_snax_binary: + input: + "{file}_{array_size}_{tile_size}_{compiler_opt}.o", + "main_{array_size}.o", + "data_{array_size}.o", + output: + "{file}_{array_size}_{tile_size}_{compiler_opt}.x", + shell: + "{config[ld]} {config[ldflags]} {input} -o {output}" + + +rule render_main: + input: + "main.c", + output: + temp("main_{array_size}.rendered.c"), + shell: + 'echo "#include \\"data_{wildcards.array_size}.h\\"" | cat - {input} > {output}' + + +ARRAY_SIZES = ["256"] +TILE_SIZES = ["16"] +FILES = ["tiled", "tiled_pipelined", "untiled"] +COMPILER_OPTS = ["accfgboth", "noaccfgopt"] + + +rule all: + input: + expand( + "{file}_{array_size}_{tile_size}_{compiler_opt}.x", + file=FILES, + array_size=ARRAY_SIZES, + tile_size=TILE_SIZES, + compiler_opt=COMPILER_OPTS, + ), + run: + for item in input: + shell("{config[vltsim]} {item}") + + +from gendata import generate_data + + +rule generate_data: + output: + "data_{array_size}.c", + "data_{array_size}.h", + run: + generate_data(int(wildcards.array_size)) diff --git a/kernels/tiled_add/genbenchmark.py b/kernels/tiled_add/genbenchmark.py deleted file mode 100644 index 0bcef421..00000000 --- a/kernels/tiled_add/genbenchmark.py +++ /dev/null @@ -1,53 +0,0 @@ -import pathlib - -from util.snax_benchmark import SNAXBenchmark - -if __name__ == "__main__": - - def run_all(binary: str, folder: str): - binary = "tiled.acc_dialect.x" - folder_no_opt = folder + "_no_opt" - SIZES = ("ARRAY_SIZE=256", "TILE_SIZE=16", "NO_CHECK=1") - - ## not optimised - bm = SNAXBenchmark( - kernel="tiled_add", - binary=binary, - src_dir=str(pathlib.Path.cwd()), - export_dir=str(pathlib.Path.cwd()), - output_dir=str(pathlib.Path.cwd()), - ) - bm.clean() - bm.build(build_opts=[*SIZES]) - bm.run() - bm.trace() - bm.process_traces(folder_no_opt) - bm.copy_binary(folder_no_opt) - bm.copy_logs(folder_no_opt) - - ## optimised - folder_opt = folder + "_opt" - bm = SNAXBenchmark( - kernel="tiled_add", - binary=binary, - src_dir=str(pathlib.Path.cwd()), - export_dir=str(pathlib.Path.cwd()), - output_dir=str(pathlib.Path.cwd()), - ) - bm.clean() - bm.build(build_opts=[*SIZES, "ACCFGOPT=1"]) - bm.run() - - bm.trace() - bm.process_traces(folder_opt) - bm.copy_binary(folder_opt) - bm.copy_logs(folder_opt) - - binaries = { - "run0": "untiled.acc_dialect.x", - "run1": "tiled.acc_dialect.x", - "run2": "tiled_pipelined.acc_dialect.x", - } - - for folder, binary in binaries.items(): - run_all(binary, folder) diff --git a/kernels/tiled_add/gendata.py b/kernels/tiled_add/gendata.py index 9ab8525d..3c64e826 100755 --- a/kernels/tiled_add/gendata.py +++ b/kernels/tiled_add/gendata.py @@ -1,21 +1,11 @@ -import argparse - import numpy as np from util.gendata import create_data, create_header -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate data for snax-alu operations." - ) - parser.add_argument( - "--array_size", type=int, default=1024, help="Size of the arrays to generate" - ) - args = parser.parse_args() +def generate_data(array_size): low_bound = -128 high_bound = 127 - array_size = args.array_size # snax-alu design-time spatial parallelism spatial_par = 4 @@ -37,5 +27,5 @@ sizes = {"MODE": 0, "DATA_LEN": array_size, "LOOP_ITER": loop_iter} variables = {"A": A, "B": B, "O": O, "G": G} - create_header("data.h", sizes, variables) - create_data("data.c", variables) + create_header(f"data_{array_size}", sizes, variables) + create_data(f"data_{array_size}", variables) diff --git a/kernels/tiled_add/main.c b/kernels/tiled_add/main.c index e8def9c2..4f363c1c 100644 --- a/kernels/tiled_add/main.c +++ b/kernels/tiled_add/main.c @@ -1,4 +1,3 @@ -#include "data.h" #include "memref.h" #include "snax_rt.h" #include "stdint.h" diff --git a/kernels/transform_copy/gendata.py b/kernels/transform_copy/gendata.py index e954e1b4..118bc1e6 100644 --- a/kernels/transform_copy/gendata.py +++ b/kernels/transform_copy/gendata.py @@ -13,5 +13,5 @@ def create_files(filename: str): B = B.flatten() sizes = {"N": array_size, "N_sqrt": sqrt(array_size)} variables = {"A": A, "B": B} - create_header(f"{filename}.h", sizes, variables) - create_data(f"{filename}.c", variables) + create_header(filename, sizes, variables) + create_data(filename, variables) diff --git a/tests/benchmark/test_snax_benchmark.py b/tests/benchmark/test_snax_benchmark.py deleted file mode 100644 index 374e4c68..00000000 --- a/tests/benchmark/test_snax_benchmark.py +++ /dev/null @@ -1,22 +0,0 @@ -import pathlib - -from util.snax_benchmark import SNAXBenchmark - - -def test_snax_benchmark_runner(): - folder = "test_run" - this_file = pathlib.Path(__file__) - bm = SNAXBenchmark( - kernel="tiled_add", - binary="untiled.acc_dialect.x", - src_dir=str(this_file.parent / ".." / ".." / "kernels" / "tiled_add" / ""), - export_dir=str(this_file.parent), - output_dir=str(this_file.parent), - ) - bm.clean() - bm.build(build_opts=["ARRAY_SIZE=128", "TILE_SIZE=16", "NO_CHECK=1"]) - bm.run() - bm.trace() - bm.process_traces(folder, accelerator="snax_alu") - bm.copy_binary(folder) - bm.copy_logs(folder) diff --git a/util/gendata.py b/util/gendata.py index b1c97c2f..1cb2ee5d 100644 --- a/util/gendata.py +++ b/util/gendata.py @@ -7,9 +7,10 @@ def create_header( file_name: str, sizes: dict[str, int], variables: dict[str, npt.NDArray] ) -> None: - if os.path.dirname(file_name): - os.makedirs(os.path.dirname(file_name), exist_ok=True) - with open(file_name, "w") as f: + header_file = f"{file_name}.h" + if os.path.dirname(header_file): + os.makedirs(os.path.dirname(header_file), exist_ok=True) + with open(header_file, "w") as f: includes = ["#include ", "#pragma once", ""] includes = "\n".join(includes) variables_string = [""] @@ -25,12 +26,13 @@ def create_header( def create_data(file_name: str, variables: dict[str, npt.NDArray]): - includes = ['#include "data.h"', "", ""] + includes = [f'#include "{os.path.basename(file_name)}.h"', "", ""] includes = "\n".join(includes) + c_file = f"{file_name}.c" variables = {i: np.reshape(j, j.size) for i, j in variables.items()} - if os.path.dirname(file_name): - os.makedirs(os.path.dirname(file_name), exist_ok=True) - with open(file_name, "w") as f: + if os.path.dirname(c_file): + os.makedirs(os.path.dirname(c_file), exist_ok=True) + with open(c_file, "w") as f: f.write(includes) for variable_name, variable_value in variables.items(): f.write(