Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test codegen #35

Merged
merged 12 commits into from
Jan 5, 2024
16 changes: 11 additions & 5 deletions .github/workflows/avx2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,20 @@ jobs:
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} --preset avx2
run: cmake -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} --preset avx2

- name: Build
# Build your program with the given configuration
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
run: cmake --build ${{github.workspace}}/build/avx2 --config ${{env.BUILD_TYPE}}

- name: Test
working-directory: ${{github.workspace}}/build
- name: Test Codegen
working-directory: ${{github.workspace}}/build/avx2
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest --test-dir ${{github.workspace}}/build -C ${{env.BUILD_TYPE}} -R correctness
run: ctest --test-dir ${{github.workspace}}/build/avx2 -C ${{env.BUILD_TYPE}} -R codegen

- name: Test Correctness
working-directory: ${{github.workspace}}/build/avx2
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest --test-dir ${{github.workspace}}/build/avx2 -C ${{env.BUILD_TYPE}} -R correctness
3 changes: 3 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ repos:
hooks:
- id: check-yaml
- id: end-of-file-fixer
exclude: ^test/codegen/expected/
- id: trailing-whitespace
exclude: ^test/codegen/expected/
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
Expand All @@ -15,3 +17,4 @@ repos:
hooks:
- id: clang-format
args: [--style=Google, -i]
exclude: ^test/codegen/expected/
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ if (NOT HAVE_AVX2 AND NOT HAVE_NEON)
message(FATAL_ERROR "cannot build without one of AVX2 or neon")
endif()

if (NOT DEFINED TARGET_ARCH)
message(FATAL_ERROR "TARGET_ARCH is not defined by the preset configuration")
endif()

find_package(Exo REQUIRED)

add_subdirectory(src)
Expand Down
9 changes: 6 additions & 3 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"cacheVariables": {
"CMAKE_OSX_ARCHITECTURES": "arm64",
"CMAKE_BUILD_TYPE": "Release",
"BLA_VENDOR": "Apple"
"BLA_VENDOR": "Apple",
"TARGET_ARCH": "apple-silicon"
}
},
{
Expand All @@ -25,7 +26,8 @@
"generator": "Ninja",
"binaryDir": "${sourceDir}/build/linux-arm64",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
"CMAKE_BUILD_TYPE": "Release",
"TARGET_ARCH": "linux-arm64"
}
},
{
Expand All @@ -37,7 +39,8 @@
"cacheVariables": {
"CMAKE_C_FLAGS": "-march=native -fno-tree-vectorize -fno-unroll-loops -O3 -ffast-math -std=c11",
"CMAKE_CXX_FLAGS": "-march=native -fno-tree-vectorize -fno-unroll-loops -O3 -ffast-math",
"CMAKE_BUILD_TYPE": "Release"
"CMAKE_BUILD_TYPE": "Release",
"TARGET_ARCH": "avx2"
}
}
]
Expand Down
2 changes: 1 addition & 1 deletion src/common/codegen_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def generate_stride_1_proc(template_proc, precision):


def export_exo_proc(globals, proc):
globals[proc.name()] = proc
globals[proc.name()] = simplify(proc)
globals.setdefault("__all__", []).append(proc.name())


Expand Down
4 changes: 2 additions & 2 deletions src/level1/nrm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def schedule_nrm2_stride_1(VEC_W, memory, instructions, precision):

simple_stride_1 = replace_all(simple_stride_1, instructions)

return simple_stride_1
return simplify(simple_stride_1)


def schedule_nrm2_stride_1_interleaved(
Expand Down Expand Up @@ -152,7 +152,7 @@ def interleave_instructions(proc, iter):
simple_stride_1 = interleave_instructions(simple_stride_1, "im")
simple_stride_1 = interleave_instructions(simple_stride_1, "im")

return simple_stride_1
return simplify(simple_stride_1)


#################################################
Expand Down
2 changes: 1 addition & 1 deletion src/level1/sad.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ def sad(n: size, x: i8[n] @ DRAM, y: i8[n] @ DRAM, result: R @ DRAM):
# sad = replace_all(sad, [mm256_sad_epu8])
sad = bind_builtins_args(sad, sad.body(), "i8")

print(sad)
sad = simplify(sad)
4 changes: 2 additions & 2 deletions src/level2/ger.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def specialize_ger(precision, alpha):
for arg in args:
specialized = set_precision(specialized, arg, precision)

return specialized
return simplify(specialized)


def schedule_ger_row_major_stride_1(
Expand All @@ -62,7 +62,7 @@ def schedule_ger_row_major_stride_1(

stride_1 = stage_mem(stride_1, stride_1.body(), "alpha", "alpha_")

return stride_1
return simplify(stride_1)


#################################################
Expand Down
4 changes: 2 additions & 2 deletions src/level2/syr.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def specialize_syr(syr, precision):
for arg in args:
specialized = set_precision(specialized, arg, precision)

return specialized
return simplify(specialized)


def schedule_interleave_syr_row_major_stride_1(
Expand All @@ -68,7 +68,7 @@ def schedule_interleave_syr_row_major_stride_1(
stride_1 = apply_to_block(stride_1, stride_1.forward(j_loop).body(), hoist_stmt)
stride_1 = replace_all(stride_1, instructions)

return stride_1
return simplify(stride_1)


#################################################
Expand Down
4 changes: 2 additions & 2 deletions src/level2/syr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def specialize_syr2(syr2, precision):
for arg in args:
specialized = set_precision(specialized, arg, precision)

return specialized
return simplify(specialized)


def schedule_interleave_syr2_row_major_stride_1(
Expand All @@ -58,7 +58,7 @@ def schedule_interleave_syr2_row_major_stride_1(
stride_1 = stride_1.add_assertion("stride(x, 0) == 1")
stride_1 = stride_1.add_assertion("stride(y, 0) == 1")

return stride_1
return simplify(stride_1)


#################################################
Expand Down
20 changes: 14 additions & 6 deletions src/level3/syrk.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def exo_syrk_upper_alphazero_beta(
def generate_syrk_gepp_base(self, syrk_win: Procedure):
gepp_syrk_base = rename(syrk_win, "gepp_syrk_base")
gepp_syrk_base = gepp_syrk_base.partial_eval(K=self.microkernel.K_blk)
return gepp_syrk_base
return simplify(gepp_syrk_base)

def generate_syrk_gepp_lower_notranspose_noalpha(
self, syrk: Procedure, diag_handler: Procedure
Expand Down Expand Up @@ -911,6 +911,7 @@ def unsafe_microkernel_scheduled(
)

unsafe_microkernel_base.unsafe_assert_eq(unsafe_microkernel_scheduled)
unsafe_microkernel_scheduled = simplify(unsafe_microkernel_scheduled)

diag_syrk_scheduled = call_eqv(
diag_syrk_scheduled,
Expand All @@ -924,6 +925,7 @@ def unsafe_microkernel_scheduled(
# diag_syrk_scheduled = diag_syrk_scheduled.add_assertion("stride(C, 0)==32")
# diag_syrk_scheduled.unsafe_assert_eq(diag_syrk_base)

diag_syrk_scheduled = simplify(diag_syrk_scheduled)
gepp_syrk_scheduled = call_eqv(
gepp_syrk_scheduled, "diag_handler(_)", diag_syrk_scheduled
)
Expand Down Expand Up @@ -1103,7 +1105,7 @@ def unsafe_microkernel_scheduled(
gepp_syrk_scheduled, "k_microkernel(_)", k_microkernel_scheduled
)

return gepp_syrk_scheduled, gepp_syrk_base
return simplify(gepp_syrk_scheduled), simplify(gepp_syrk_base)

def schedule_syrk_lower_notranspose_noalpha(self, ssyrk_base: Procedure):
syrk = divide_loop(
Expand All @@ -1117,14 +1119,14 @@ def schedule_syrk_lower_notranspose_noalpha(self, ssyrk_base: Procedure):
syrk, "gepp_syrk_base(_)", self.gepp_syrk_scheduled_lower_notranspose
)
# print(syrk)
return syrk
return simplify(syrk)

def bind(self, proc, buffer, reg, machine):
proc = bind_expr(proc, buffer, reg)
proc = expand_dim(proc, reg, machine.vec_width, "ji")
proc = lift_alloc(proc, f"{reg} : _", n_lifts=2)
proc = fission(proc, proc.find(f"{reg} = _").after())
return proc
return simplify(proc)

def stage(self, proc, buffer, reg, machine):
proc = stage_mem(
Expand All @@ -1133,7 +1135,7 @@ def stage(self, proc, buffer, reg, machine):
proc = expand_dim(proc, reg, machine.vec_width, f"ji")
proc = lift_alloc(proc, f"{reg} : _", n_lifts=2)
proc = fission(proc, proc.find(f"{reg}[_] = _").after())
return proc
return simplify(proc)

def schedule_apply_scalar(
self,
Expand Down Expand Up @@ -1203,7 +1205,7 @@ def specialize_syrk(self, syrk: Procedure, precision: str, args=list[str]):
syrk = rename(syrk, "exo_" + prefix + name)
for arg in args:
syrk = set_precision(syrk, arg, precision)
return syrk
return simplify(syrk)


k_blk = 256
Expand All @@ -1216,6 +1218,9 @@ def specialize_syrk(self, syrk: Procedure, precision: str, args=list[str]):

ssyrk = SYRK(C.Machine, "f32", k_blk, m_blk, m_blk_small, m_reg, n_reg, e_reg)

for i in range(13):
ssyrk.entry_points[i] = simplify(ssyrk.entry_points[i])

exo_ssyrk_lower_notranspose_noalpha_nobeta = ssyrk.entry_points[0]
exo_ssyrk_lower_notranspose_alpha_nobeta = ssyrk.entry_points[1]
exo_ssyrk_lower_notranspose_alpha_beta = ssyrk.entry_points[2]
Expand All @@ -1235,6 +1240,9 @@ def specialize_syrk(self, syrk: Procedure, precision: str, args=list[str]):
dsyrk = SYRK(C.Machine, "f64", k_blk, m_blk, m_blk_small, m_reg, n_reg // 2, e_reg)
C.Machine.vec_width *= 2

for i in range(13):
dsyrk.entry_points[i] = simplify(dsyrk.entry_points[i])

exo_dsyrk_lower_notranspose_noalpha_nobeta = dsyrk.entry_points[0]
exo_dsyrk_lower_notranspose_alpha_nobeta = dsyrk.entry_points[1]
exo_dsyrk_lower_notranspose_alpha_beta = dsyrk.entry_points[2]
Expand Down
6 changes: 5 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ find_package(Python 3 REQUIRED Interpreter)
# Generic test-case function
###############################################################################

function(add_exo_blas_test kernel precision)
function(add_exo_blas_test level kernel precision)
set(precision_kernel "${precision}${kernel}")

set(bla_vendor_benchmark_results_dir "${PROJECT_SOURCE_DIR}/benchmark_results/${BLA_VENDOR}")
Expand Down Expand Up @@ -80,6 +80,10 @@ function(add_exo_blas_test kernel precision)
ENVIRONMENT "OPENBLAS_NUM_THREADS=1;MKL_NUM_THREADS=1;VECLIB_MAXIMUM_THREADS=1"
)

# Add codgen test
add_test(NAME ${precision_kernel}_codegen
COMMAND Python::Interpreter "${PROJECT_SOURCE_DIR}/test/codegen/check_diff.py" ${TARGET_ARCH} ${level} exo_${kernel})

add_test(
NAME ${precision_kernel}_graph
COMMAND Python::Interpreter "${PROJECT_SOURCE_DIR}/test/graph.py" ${precision_kernel} ${PROJECT_SOURCE_DIR}/benchmark_results ${PROJECT_SOURCE_DIR}/graphs
Expand Down
39 changes: 39 additions & 0 deletions test/codegen/check_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import difflib
import sys
import os
from pathlib import Path


def check_diff(file1, file2):
# Open and read the files
with open(file1, "r") as f1:
f1_text = f1.readlines()

with open(file2, "r") as f2:
f2_text = f2.readlines()

if f1_text != f2_text:
diff = difflib.unified_diff(
f1_text, f2_text, fromfile=str(file1), tofile=str(file2), lineterm=""
)
diff = "\n".join(diff)
exit(f"Error: files {file1} and {file2} have the following diff:\n{diff}")


if __name__ == "__main__":
REPO_ROOT = Path(__file__).parent.parent.parent.resolve()

target_arch = sys.argv[1]
level = sys.argv[2]
kernel = sys.argv[3]

KERNEL_BUILD_DIR = (
REPO_ROOT / "build" / target_arch / "src" / level / f"{kernel}.exo"
)
EXPECTED_DIR = REPO_ROOT / "test" / "codegen" / "expected" / target_arch

header = f"{kernel}.h"
source = f"{kernel}.c"

check_diff(KERNEL_BUILD_DIR / header, EXPECTED_DIR / header)
check_diff(KERNEL_BUILD_DIR / source, EXPECTED_DIR / source)
Loading