From 72d7dc8aa2b24a298793ea7d1d3e2a972cb03750 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 23 Aug 2024 13:01:03 +0200 Subject: [PATCH] sw: Uniformize random data generation --- sw/apps/atax/scripts/datagen.py | 5 ++-- sw/apps/correlation/scripts/datagen.py | 3 +- sw/apps/covariance/scripts/datagen.py | 6 ++-- sw/apps/doitgen/scripts/datagen.py | 8 ++--- sw/blas/axpy/scripts/datagen.py | 12 ++++---- sw/blas/dot/scripts/datagen.py | 7 ++--- sw/blas/gemm/scripts/datagen.py | 16 +++++----- sw/blas/syrk/scripts/datagen.py | 12 ++++---- util/sim/data_utils.py | 41 ++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 36 deletions(-) diff --git a/sw/apps/atax/scripts/datagen.py b/sw/apps/atax/scripts/datagen.py index 51317c70ec..c73ae70a29 100755 --- a/sw/apps/atax/scripts/datagen.py +++ b/sw/apps/atax/scripts/datagen.py @@ -8,6 +8,7 @@ import numpy as np +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \ format_array_declaration, format_ifdef_wrapper, DataGen @@ -26,8 +27,8 @@ def emit_header(self, **kwargs): header = [super().emit_header()] M, N = kwargs['M'], kwargs['N'] - A = np.random.randint(-200, 100, size=(M, N))/100 - x = np.random.randint(-200, 100, size=(N, 1))/100 + A = du.generate_random_array((M, N)) + x = du.generate_random_array((N, 1)) y = self.golden_model(A, x) assert (M % 8) == 0, "M must be an integer multiple of the number of cores" diff --git a/sw/apps/correlation/scripts/datagen.py b/sw/apps/correlation/scripts/datagen.py index b2047d5eb5..7880c16934 100755 --- a/sw/apps/correlation/scripts/datagen.py +++ b/sw/apps/correlation/scripts/datagen.py @@ -8,6 +8,7 @@ import numpy as np +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \ format_array_declaration, format_ifdef_wrapper, DataGen @@ -26,7 +27,7 @@ def emit_header(self, **kwargs): header = [super().emit_header()] M, N = kwargs['M'], kwargs['N'] - data = np.random.randint(-200, 100, size=(N, M))/100 + data = du.generate_random_array((N, M)) corr = self.golden_model(data) data = data.flatten() diff --git a/sw/apps/covariance/scripts/datagen.py b/sw/apps/covariance/scripts/datagen.py index c3b7cd8b3a..07bb92d0a3 100755 --- a/sw/apps/covariance/scripts/datagen.py +++ b/sw/apps/covariance/scripts/datagen.py @@ -8,7 +8,7 @@ import numpy as np -from snitch.util.sim import data_utils +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_array_definition, \ format_array_declaration, format_struct_definition, DataGen @@ -42,14 +42,14 @@ def validate(self, **kwargs): total_size = 2 * a_tile_size + b_tile_size if DOUBLE_BUFFER: total_size *= 2 - data_utils.validate_tcdm_footprint(total_size) + du.validate_tcdm_footprint(total_size) def emit_header(self, **kwargs): header = [super().emit_header()] self.validate(**kwargs) - data = np.random.randint(-200, 100, size=(kwargs['n'], kwargs['m'])) + data = du.generate_random_array((kwargs['n'], kwargs['m'])) cov = self.golden_model(data) data = data.transpose().flatten() diff --git a/sw/apps/doitgen/scripts/datagen.py b/sw/apps/doitgen/scripts/datagen.py index d0dddf6f5a..5f14ec86dd 100755 --- a/sw/apps/doitgen/scripts/datagen.py +++ b/sw/apps/doitgen/scripts/datagen.py @@ -7,7 +7,7 @@ import numpy as np -from snitch.util.sim import data_utils +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_array_definition, format_struct_definition, DataGen np.random.seed(42) @@ -49,15 +49,15 @@ def validate(self, **kwargs): total_size = 2 * a_tile_size + x_size if DOUBLE_BUFFER: total_size *= 2 - data_utils.validate_tcdm_footprint(total_size) + du.validate_tcdm_footprint(total_size) def emit_header(self, **kwargs): header = [super().emit_header()] self.validate(**kwargs) - A = np.random.randint(-100, 100, size=(kwargs['r'], kwargs['q'], kwargs['s'])) - x = np.random.randint(-100, 100, size=(kwargs['s'], kwargs['s'])) + A = du.generate_random_array((kwargs['r'], kwargs['q'], kwargs['s'])) + x = du.generate_random_array((kwargs['s'], kwargs['s'])) _ = self.golden_model(A, x) diff --git a/sw/blas/axpy/scripts/datagen.py b/sw/blas/axpy/scripts/datagen.py index cf67956679..ec00a4c880 100755 --- a/sw/blas/axpy/scripts/datagen.py +++ b/sw/blas/axpy/scripts/datagen.py @@ -8,15 +8,13 @@ import numpy as np import sys -from snitch.util.sim import data_utils +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \ format_array_declaration, format_ifdef_wrapper, format_struct_definition, DataGen class AxpyDataGen(DataGen): - MIN = -1000 - MAX = +1000 # AXI splits bursts crossing 4KB address boundaries. To minimize # the occurrence of these splits the data should be aligned to 4KB BURST_ALIGNMENT = 4096 @@ -36,16 +34,16 @@ def validate_config(self, **kwargs): # Note: doesn't account for gaps created by data alignment vec_size = n_per_tile * 8 total_size = 2 * 3 * vec_size - data_utils.validate_tcdm_footprint(total_size) + du.validate_tcdm_footprint(total_size) def emit_header(self, **kwargs): header = [super().emit_header()] self.validate_config(**kwargs) - a = np.random.uniform(self.MIN, self.MAX, 1)[0] - x = np.random.uniform(self.MIN, self.MAX, kwargs['n']) - y = np.random.uniform(self.MIN, self.MAX, kwargs['n']) + a = du.generate_random_array(1)[0] + x = du.generate_random_array(kwargs['n']) + y = du.generate_random_array(kwargs['n']) g = self.golden_model(a, x, y) x_uid = 'x' diff --git a/sw/blas/dot/scripts/datagen.py b/sw/blas/dot/scripts/datagen.py index 01560c51ff..d11b53ff86 100755 --- a/sw/blas/dot/scripts/datagen.py +++ b/sw/blas/dot/scripts/datagen.py @@ -6,14 +6,13 @@ import numpy as np import sys +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \ format_scalar_declaration, format_ifdef_wrapper, DataGen class DotDataGen(DataGen): - MIN = -1000 - MAX = +1000 # AXI splits bursts crossing 4KB address boundaries. To minimize # the occurrence of these splits the data should be aligned to 4KB BURST_ALIGNMENT = 4096 @@ -25,8 +24,8 @@ def emit_header(self, **kwargs): header = [super().emit_header()] n = kwargs['n'] - x = np.random.uniform(self.MIN, self.MAX, n) - y = np.random.uniform(self.MIN, self.MAX, n) + x = du.generate_random_array(n) + y = du.generate_random_array(n) g = self.golden_model(x, y) assert (n % (8 * 4)) == 0, "n must be an integer multiple of the number of cores times " \ diff --git a/sw/blas/gemm/scripts/datagen.py b/sw/blas/gemm/scripts/datagen.py index da7f8ba578..6361756040 100755 --- a/sw/blas/gemm/scripts/datagen.py +++ b/sw/blas/gemm/scripts/datagen.py @@ -10,10 +10,9 @@ import numpy as np import re -import pyflexfloat as ff import sys -from snitch.util.sim import data_utils +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import DataGen, format_array_declaration, \ format_struct_definition, format_array_definition, format_ifdef_wrapper @@ -56,14 +55,14 @@ def validate_config(self, gemm_fp, parallelize_m, # Calculate total TCDM occupation # Note: doesn't account for double buffering - prec = data_utils.size_from_precision_t(dtype) + prec = du.size_from_precision_t(dtype) a_size = frac_m * frac_k * prec b_size = frac_k * frac_n * prec c_size = frac_m * frac_n * prec total_size = a_size total_size += b_size total_size += c_size - data_utils.validate_tcdm_footprint(total_size) + du.validate_tcdm_footprint(total_size) assert (M % m_tiles) == 0, 'M is not an integer multiple of tile size' assert (N % n_tiles) == 0, 'N is not an integer multiple of tile size' @@ -99,12 +98,11 @@ def emit_header(self, **kwargs): prec, _ = self.infer_implementation(kwargs['gemm_fp']) - ff_desc = data_utils.ff_desc_from_precision_t(prec) - ctype = data_utils.ctype_from_precision_t(prec) + ctype = du.ctype_from_precision_t(prec) - a = ff.array(np.random.rand(M, K), ff_desc) - b = ff.array(np.random.rand(K, N), ff_desc) - c = ff.array(np.random.rand(M, N), ff_desc) + a = du.generate_random_array((M, K), prec) + b = du.generate_random_array((K, N), prec) + c = du.generate_random_array((M, N), prec) result = self.exact_golden_model(1, a, b, kwargs['beta'], c) # Store matrices in transposed form if requested diff --git a/sw/blas/syrk/scripts/datagen.py b/sw/blas/syrk/scripts/datagen.py index 9b4959fca4..ad15222f36 100755 --- a/sw/blas/syrk/scripts/datagen.py +++ b/sw/blas/syrk/scripts/datagen.py @@ -7,7 +7,7 @@ import numpy as np -from snitch.util.sim import data_utils +import snitch.util.sim.data_utils as du from snitch.util.sim.data_utils import format_array_definition, format_struct_definition, DataGen @@ -37,7 +37,7 @@ def validate(self, **kwargs): total_size = 2 * a_tile_size + c_tile_size if DOUBLE_BUFFER: total_size *= 2 - data_utils.validate_tcdm_footprint(total_size) + du.validate_tcdm_footprint(total_size) def emit_header(self, **kwargs): header = [super().emit_header()] @@ -47,14 +47,14 @@ def emit_header(self, **kwargs): if 'alpha' in kwargs: alpha = kwargs['alpha'] else: - alpha = np.random.randint(-200, 100)/100 + alpha = du.generate_random_array(1)[0] if 'beta' in kwargs: beta = kwargs['beta'] else: - beta = np.random.randint(-200, 100)/100 + beta = du.generate_random_array(1)[0] - A = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['n']))/100 - C_in = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['m']))/100 + A = du.generate_random_array((kwargs['m'], kwargs['n'])) + C_in = du.generate_random_array((kwargs['m'], kwargs['m'])) A = A.flatten() C_in = C_in.flatten() diff --git a/util/sim/data_utils.py b/util/sim/data_utils.py index e6f48acced..ccfa6a10b6 100644 --- a/util/sim/data_utils.py +++ b/util/sim/data_utils.py @@ -83,6 +83,24 @@ def torch_type_from_precision_t(prec): return precision_t_to_torch_type_map[_integer_precision_t(prec)] +def numpy_type_from_precision_t(prec): + """Convert `precision_t` type to PyTorch type. + + Args: + prec: A value of type `precision_t`. Accepts both enum strings + (e.g. "FP64") and integer enumeration values (e.g. 8). + """ + # Types which have a direct correspondence in Numpy + precision_t_to_numpy_type_map = { + 8: np.float64, + 4: np.float32, + 2: np.float16 + } + prec = _integer_precision_t(prec) + assert prec != 1, "No direct correspondence between FP8 and Numpy" + return precision_t_to_numpy_type_map[prec] + + # Returns the C type representing a floating-point value of the specified precision def ctype_from_precision_t(prec): """Convert `precision_t` type to a C type string. @@ -100,6 +118,29 @@ def ctype_from_precision_t(prec): return precision_t_to_ctype_map[_integer_precision_t(prec)] +def generate_random_array(size, prec='FP64'): + """Consistent random array generation for Snitch experiments. + + Samples values between -1 and 1 from a uniform distribution and + of the exact specified type, e.g. actual 64-bit doubles. + + This function ensures that e.g. power measurements are not skewed + by using integer values in the FPU. + + Args: + size: Tuple of array dimensions. + prec: A value of type `precision_t`. Accepts both enum strings + (e.g. "FP64") and integer enumeration values (e.g. 8). + """ + # Generate in 64b precision and then cast down + rand = np.random.default_rng.random(size=size, dtype=np.float64) * 2 - 1 + # Generate FlexFloat array for 8b floats, casted from 16b Numpy array + if _integer_precision_t(prec) == 1: + return ff.array(rand.astype(np.float16), ff_desc_from_precision_t(prec)) + else: + return rand.astype(numpy_type_from_precision_t(prec)) + + def flatten(array): """Flatten various array types with a homogeneous API.