verif: Add Verifier base class to fix NaN returns in DNN kernels (#…

…96) * util: Rename `elf.py` to `Elf.py` and add documentation * util: Correct `data_utils` docstrings * util: Add `Verifier` base class and check for `NaN` errors * docs: Add documentation for `verif_utils` * gelu: Switch from `sigmoid` to `tanh` approximation * sw: Update `verify.py` scripts to use `Verifier` class * sw: Move `datagen.py` and `verify.py` scripts to `scripts/` * axpy: Update to use new `DataGen` class --------- Co-authored-by: Luca Colagrande <[email protected]>
pulp-platform · Mar 7, 2024 · 11d0534 · 11d0534
1 parent 9b52124
commit 11d0534
Show file tree

Hide file tree

Showing 51 changed files with 877 additions and 900 deletions.
diff --git a/docs/rm/sim/Elf.md b/docs/rm/sim/Elf.md
@@ -0,0 +1 @@
+::: Elf
diff --git a/docs/rm/sim/verif_utils.md b/docs/rm/sim/verif_utils.md
@@ -0,0 +1 @@
+::: verif_utils
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -55,9 +55,11 @@ nav:
       - Software:
           - Simulation Utilities:
               - data_utils: rm/sim/data_utils.md
+              - verif_utils: rm/sim/verif_utils.md
               - sim_utils: rm/sim/sim_utils.md
               - rm/sim/Simulation.md
               - rm/sim/Simulator.md
+              - rm/sim/Elf.md
           - Snitch Runtime:
               - Pages: runtime/Pages/index.md
               - Files: runtime/Files/index.md

diff --git a/sw/apps/atax/scripts/verify.py b/sw/apps/atax/scripts/verify.py
@@ -5,55 +5,33 @@
 #
 # Luca Colagrande <[email protected]>
 
+import numpy as np
 import sys
 from pathlib import Path
-import numpy as np
 from datagen import AtaxDataGen
 
-sys.path.append(str(Path(__file__).parent / "../../../util/sim/"))
-import verification  # noqa: E402
-from elf import Elf  # noqa: E402
-from data_utils import from_buffer  # noqa: E402
-
-
-ERR_THRESHOLD = 1e-10
+sys.path.append(str(Path(__file__).parent / '../../../util/sim/'))
+from verif_utils import Verifier  # noqa: E402
 
 
-def main():
-    # Run simulation and get outputs
-    args = verification.parse_args()
-    raw_results = verification.simulate(
-        sim_bin=args.sim_bin,
-        snitch_bin=args.snitch_bin,
-        symbols_bin=args.symbols_bin,
-        log=args.log,
-        output_uids=["y"],
-    )
+class AtaxVerifier(Verifier):
 
-    # Extract input operands from ELF file
-    if args.symbols_bin:
-        elf = Elf(args.symbols_bin)
-    else:
-        elf = Elf(args.snitch_bin)
-    A = elf.from_symbol('A', 'double')
-    x = elf.from_symbol('x', 'double')
-    M = elf.from_symbol('M', 'uint32_t')[0]
-    N = elf.from_symbol('N', 'uint32_t')[0]
-    A = np.reshape(A, (M, N))
+    OUTPUT_UIDS = ['y']
 
-    # Verify results
-    y_actual = from_buffer(raw_results['y'], 'double')
-    y_golden = AtaxDataGen().golden_model(A, x).flatten()
+    def get_actual_results(self):
+        return self.get_output_from_symbol('y', 'double')
 
-    relative_err = np.absolute((y_golden - y_actual) / y_golden)
-    fail = np.any(relative_err > ERR_THRESHOLD)
-    if (fail):
-        print('Simulation results are incorrect.')
-        verification.dump_results_to_csv([y_golden, y_actual, relative_err],
-                                         Path.cwd() / 'results.csv')
+    def get_expected_results(self):
+        A = self.get_input_from_symbol('A', 'double')
+        x = self.get_input_from_symbol('x', 'double')
+        M = self.get_input_from_symbol('M', 'uint32_t')[0]
+        N = self.get_input_from_symbol('N', 'uint32_t')[0]
+        A = np.reshape(A, (M, N))
+        return AtaxDataGen().golden_model(A, x).flatten()
 
-    return int(fail)
+    def check_results(self, *args):
+        return super().check_results(*args, rtol=1e-10)
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(AtaxVerifier().main())
diff --git a/sw/apps/correlation/scripts/verify.py b/sw/apps/correlation/scripts/verify.py
@@ -5,54 +5,32 @@
 #
 # Luca Colagrande <[email protected]>
 
+import numpy as np
 import sys
 from pathlib import Path
-import numpy as np
 from datagen import CorrelationDataGen
 
-sys.path.append(str(Path(__file__).parent / "../../../util/sim/"))
-import verification  # noqa: E402
-from elf import Elf  # noqa: E402
-from data_utils import from_buffer  # noqa: E402
-
-
-ERR_THRESHOLD = 1e-10
+sys.path.append(str(Path(__file__).parent / '../../../util/sim/'))
+from verif_utils import Verifier  # noqa: E402
 
 
-def main():
-    # Run simulation and get outputs
-    args = verification.parse_args()
-    raw_results = verification.simulate(
-        sim_bin=args.sim_bin,
-        snitch_bin=args.snitch_bin,
-        symbols_bin=args.symbols_bin,
-        log=args.log,
-        output_uids=['corr'],
-    )
+class CorrelationVerifier(Verifier):
 
-    # Extract input operands from ELF file
-    if args.symbols_bin:
-        elf = Elf(args.symbols_bin)
-    else:
-        elf = Elf(args.snitch_bin)
-    M = elf.from_symbol('M', 'uint32_t')[0]
-    N = elf.from_symbol('N', 'uint32_t')[0]
-    data = elf.from_symbol('data', 'double')
-    data = np.reshape(data, (N, M))
+    OUTPUT_UIDS = ['corr']
 
-    # Verify results
-    corr_actual = from_buffer(raw_results['corr'], 'double')
-    corr_golden = CorrelationDataGen().golden_model(data).flatten()
+    def get_actual_results(self):
+        return self.get_output_from_symbol('corr', 'double')
 
-    relative_err = np.absolute((corr_golden - corr_actual) / corr_golden)
-    fail = np.any(relative_err > ERR_THRESHOLD)
-    if (fail):
-        print('Simulation results are incorrect.')
-        verification.dump_results_to_csv([corr_golden, corr_actual, relative_err],
-                                         Path.cwd() / 'results.csv')
+    def get_expected_results(self):
+        M = self.get_input_from_symbol('M', 'uint32_t')[0]
+        N = self.get_input_from_symbol('N', 'uint32_t')[0]
+        data = self.get_input_from_symbol('data', 'double')
+        data = np.reshape(data, (N, M))
+        return CorrelationDataGen().golden_model(data).flatten()
 
-    return int(fail)
+    def check_results(self, *args):
+        return super().check_results(*args, rtol=1e-10)
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(CorrelationVerifier().main())
diff --git a/sw/apps/covariance/scripts/verify.py b/sw/apps/covariance/scripts/verify.py
@@ -5,54 +5,32 @@
 #
 # Luca Colagrande <[email protected]>
 
+import numpy as np
 import sys
 from pathlib import Path
-import numpy as np
 from datagen import CovarianceDataGen
 
-sys.path.append(str(Path(__file__).parent / "../../../util/sim/"))
-import verification  # noqa: E402
-from elf import Elf  # noqa: E402
-from data_utils import from_buffer  # noqa: E402
-
-
-ERR_THRESHOLD = 1e-10
+sys.path.append(str(Path(__file__).parent / '../../../util/sim/'))
+from verif_utils import Verifier  # noqa: E402
 
 
-def main():
-    # Run simulation and get outputs
-    args = verification.parse_args()
-    raw_results = verification.simulate(
-        sim_bin=args.sim_bin,
-        snitch_bin=args.snitch_bin,
-        symbols_bin=args.symbols_bin,
-        log=args.log,
-        output_uids=['cov'],
-    )
+class CovarianceVerifier(Verifier):
 
-    # Extract input operands from ELF file
-    if args.symbols_bin:
-        elf = Elf(args.symbols_bin)
-    else:
-        elf = Elf(args.snitch_bin)
-    M = elf.from_symbol('M', 'uint32_t')[0]
-    N = elf.from_symbol('N', 'uint32_t')[0]
-    data = elf.from_symbol('data', 'double')
-    data = np.reshape(data, (N, M))
+    OUTPUT_UIDS = ['cov']
 
-    # Verify results
-    cov_actual = from_buffer(raw_results['cov'], 'double')
-    cov_golden = CovarianceDataGen().golden_model(data).flatten()
+    def get_actual_results(self):
+        return self.get_output_from_symbol('cov', 'double')
 
-    relative_err = np.absolute((cov_golden - cov_actual) / cov_golden)
-    fail = np.any(relative_err > ERR_THRESHOLD)
-    if (fail):
-        print('Simulation results are incorrect.')
-        verification.dump_results_to_csv([cov_golden, cov_actual, relative_err],
-                                         Path.cwd() / 'results.csv')
+    def get_expected_results(self):
+        M = self.get_input_from_symbol('M', 'uint32_t')[0]
+        N = self.get_input_from_symbol('N', 'uint32_t')[0]
+        data = self.get_input_from_symbol('data', 'double')
+        data = np.reshape(data, (N, M))
+        return CovarianceDataGen().golden_model(data).flatten()
 
-    return int(fail)
+    def check_results(self, *args):
+        return super().check_results(*args, rtol=1e-10)
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(CovarianceVerifier().main())
diff --git a/sw/blas/axpy/Makefile b/sw/blas/axpy/Makefile
@@ -9,18 +9,21 @@ MK_DIR   := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
 DATA_DIR := $(realpath $(MK_DIR)/data)
 SRC_DIR  := $(realpath $(MK_DIR)/src)
 
-LENGTH  ?= 24
-SECTION ?=
+DATA_CFG ?= $(DATA_DIR)/params.json
+SECTION  ?=
 
 APP     ?= axpy
-SRCS    ?= $(SRC_DIR)/main.c
-INCDIRS += $(DATA_DIR) $(SRC_DIR)
+SRCS    ?= $(realpath $(SRC_DIR)/main.c)
+INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR)
 
-DATAGEN_PY = $(DATA_DIR)/datagen.py
-DATA_H     = $(DATA_DIR)/data.h
+DATAGEN_PY = $(MK_DIR)/scripts/datagen.py
+DATA_H    ?= $(DATA_DIR)/data.h
 
-$(DATA_H): $(DATAGEN_PY)
-	$< $(LENGTH) --section="$(SECTION)" > $@
+$(dir $(DATA_H)):
+	mkdir -p $@
+
+$(DATA_H): $(DATAGEN_PY) $(DATA_CFG) | $(dir $(DATA_H))
+	$< -c $(DATA_CFG) --section="$(SECTION)" > $@
 
 .PHONY: clean-data clean
 

diff --git a/sw/blas/axpy/data/datagen.py b/sw/blas/axpy/data/datagen.py
diff --git a/sw/blas/axpy/data/params.json b/sw/blas/axpy/data/params.json
@@ -0,0 +1,7 @@
+// Copyright 2023 ETH Zurich and University of Bologna.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+{
+    n: 24
+}
diff --git a/sw/blas/axpy/scripts/datagen.py b/sw/blas/axpy/scripts/datagen.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# Copyright 2023 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Author: Luca Colagrande <[email protected]>
+
+import numpy as np
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
+from data_utils import format_scalar_definition, format_array_definition, \
+                       format_array_declaration, format_ifdef_wrapper, DataGen  # noqa: E402
+
+
+class AxpyDataGen(DataGen):
+
+    MIN = -1000
+    MAX = +1000
+    # AXI splits bursts crossing 4KB address boundaries. To minimize
+    # the occurrence of these splits the data should be aligned to 4KB
+    BURST_ALIGNMENT = 4096
+
+    def golden_model(self, a, x, y):
+        return a*x + y
+
+    def emit_header(self, **kwargs):
+        header = [super().emit_header()]
+
+        n = kwargs['n']
+        a = np.random.uniform(self.MIN, self.MAX, 1)
+        x = np.random.uniform(self.MIN, self.MAX, n)
+        y = np.random.uniform(self.MIN, self.MAX, n)
+        g = self.golden_model(a, x, y)
+
+        assert (n % 8) == 0, "n must be an integer multiple of the number of cores"
+
+        header += [format_scalar_definition('const uint32_t', 'n', n)]
+        header += [format_scalar_definition('const double', 'a', a[0])]
+        header += [format_array_definition('double', 'x', x, alignment=self.BURST_ALIGNMENT,
+                                           section=kwargs['section'])]
+        header += [format_array_definition('double', 'y', y, alignment=self.BURST_ALIGNMENT,
+                                           section=kwargs['section'])]
+        header += [format_array_declaration('double', 'z', [n], alignment=self.BURST_ALIGNMENT,
+                                            section=kwargs['section'])]
+        result_def = format_array_definition('double', 'g', g)
+        header += [format_ifdef_wrapper('BIST', result_def)]
+        header = '\n\n'.join(header)
+
+        return header
+
+
+if __name__ == '__main__':
+    sys.exit(AxpyDataGen().main())