Skip to content

Commit

Permalink
sw: Uniformize random data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Aug 23, 2024
1 parent 2d024ee commit 72d7dc8
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 36 deletions.
5 changes: 3 additions & 2 deletions sw/apps/atax/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np

import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper, DataGen

Expand All @@ -26,8 +27,8 @@ def emit_header(self, **kwargs):
header = [super().emit_header()]

M, N = kwargs['M'], kwargs['N']
A = np.random.randint(-200, 100, size=(M, N))/100
x = np.random.randint(-200, 100, size=(N, 1))/100
A = du.generate_random_array((M, N))
x = du.generate_random_array((N, 1))
y = self.golden_model(A, x)

assert (M % 8) == 0, "M must be an integer multiple of the number of cores"
Expand Down
3 changes: 2 additions & 1 deletion sw/apps/correlation/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np

import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper, DataGen

Expand All @@ -26,7 +27,7 @@ def emit_header(self, **kwargs):
header = [super().emit_header()]

M, N = kwargs['M'], kwargs['N']
data = np.random.randint(-200, 100, size=(N, M))/100
data = du.generate_random_array((N, M))
corr = self.golden_model(data)

data = data.flatten()
Expand Down
6 changes: 3 additions & 3 deletions sw/apps/covariance/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from snitch.util.sim import data_utils
import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_array_definition, \
format_array_declaration, format_struct_definition, DataGen

Expand Down Expand Up @@ -42,14 +42,14 @@ def validate(self, **kwargs):
total_size = 2 * a_tile_size + b_tile_size
if DOUBLE_BUFFER:
total_size *= 2
data_utils.validate_tcdm_footprint(total_size)
du.validate_tcdm_footprint(total_size)

def emit_header(self, **kwargs):
header = [super().emit_header()]

self.validate(**kwargs)

data = np.random.randint(-200, 100, size=(kwargs['n'], kwargs['m']))
data = du.generate_random_array((kwargs['n'], kwargs['m']))
cov = self.golden_model(data)

data = data.transpose().flatten()
Expand Down
8 changes: 4 additions & 4 deletions sw/apps/doitgen/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from snitch.util.sim import data_utils
import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_array_definition, format_struct_definition, DataGen

np.random.seed(42)
Expand Down Expand Up @@ -49,15 +49,15 @@ def validate(self, **kwargs):
total_size = 2 * a_tile_size + x_size
if DOUBLE_BUFFER:
total_size *= 2
data_utils.validate_tcdm_footprint(total_size)
du.validate_tcdm_footprint(total_size)

def emit_header(self, **kwargs):
header = [super().emit_header()]

self.validate(**kwargs)

A = np.random.randint(-100, 100, size=(kwargs['r'], kwargs['q'], kwargs['s']))
x = np.random.randint(-100, 100, size=(kwargs['s'], kwargs['s']))
A = du.generate_random_array((kwargs['r'], kwargs['q'], kwargs['s']))
x = du.generate_random_array((kwargs['s'], kwargs['s']))

_ = self.golden_model(A, x)

Expand Down
12 changes: 5 additions & 7 deletions sw/blas/axpy/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@
import numpy as np
import sys

from snitch.util.sim import data_utils
import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper, format_struct_definition, DataGen


class AxpyDataGen(DataGen):

MIN = -1000
MAX = +1000
# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096
Expand All @@ -36,16 +34,16 @@ def validate_config(self, **kwargs):
# Note: doesn't account for gaps created by data alignment
vec_size = n_per_tile * 8
total_size = 2 * 3 * vec_size
data_utils.validate_tcdm_footprint(total_size)
du.validate_tcdm_footprint(total_size)

def emit_header(self, **kwargs):
header = [super().emit_header()]

self.validate_config(**kwargs)

a = np.random.uniform(self.MIN, self.MAX, 1)[0]
x = np.random.uniform(self.MIN, self.MAX, kwargs['n'])
y = np.random.uniform(self.MIN, self.MAX, kwargs['n'])
a = du.generate_random_array(1)[0]
x = du.generate_random_array(kwargs['n'])
y = du.generate_random_array(kwargs['n'])
g = self.golden_model(a, x, y)

x_uid = 'x'
Expand Down
7 changes: 3 additions & 4 deletions sw/blas/dot/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
import numpy as np
import sys

import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_scalar_definition, format_array_definition, \
format_scalar_declaration, format_ifdef_wrapper, DataGen


class DotDataGen(DataGen):

MIN = -1000
MAX = +1000
# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096
Expand All @@ -25,8 +24,8 @@ def emit_header(self, **kwargs):
header = [super().emit_header()]

n = kwargs['n']
x = np.random.uniform(self.MIN, self.MAX, n)
y = np.random.uniform(self.MIN, self.MAX, n)
x = du.generate_random_array(n)
y = du.generate_random_array(n)
g = self.golden_model(x, y)

assert (n % (8 * 4)) == 0, "n must be an integer multiple of the number of cores times " \
Expand Down
16 changes: 7 additions & 9 deletions sw/blas/gemm/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@

import numpy as np
import re
import pyflexfloat as ff
import sys

from snitch.util.sim import data_utils
import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import DataGen, format_array_declaration, \
format_struct_definition, format_array_definition, format_ifdef_wrapper

Expand Down Expand Up @@ -56,14 +55,14 @@ def validate_config(self, gemm_fp, parallelize_m,

# Calculate total TCDM occupation
# Note: doesn't account for double buffering
prec = data_utils.size_from_precision_t(dtype)
prec = du.size_from_precision_t(dtype)
a_size = frac_m * frac_k * prec
b_size = frac_k * frac_n * prec
c_size = frac_m * frac_n * prec
total_size = a_size
total_size += b_size
total_size += c_size
data_utils.validate_tcdm_footprint(total_size)
du.validate_tcdm_footprint(total_size)

assert (M % m_tiles) == 0, 'M is not an integer multiple of tile size'
assert (N % n_tiles) == 0, 'N is not an integer multiple of tile size'
Expand Down Expand Up @@ -99,12 +98,11 @@ def emit_header(self, **kwargs):

prec, _ = self.infer_implementation(kwargs['gemm_fp'])

ff_desc = data_utils.ff_desc_from_precision_t(prec)
ctype = data_utils.ctype_from_precision_t(prec)
ctype = du.ctype_from_precision_t(prec)

a = ff.array(np.random.rand(M, K), ff_desc)
b = ff.array(np.random.rand(K, N), ff_desc)
c = ff.array(np.random.rand(M, N), ff_desc)
a = du.generate_random_array((M, K), prec)
b = du.generate_random_array((K, N), prec)
c = du.generate_random_array((M, N), prec)
result = self.exact_golden_model(1, a, b, kwargs['beta'], c)

# Store matrices in transposed form if requested
Expand Down
12 changes: 6 additions & 6 deletions sw/blas/syrk/scripts/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from snitch.util.sim import data_utils
import snitch.util.sim.data_utils as du
from snitch.util.sim.data_utils import format_array_definition, format_struct_definition, DataGen


Expand Down Expand Up @@ -37,7 +37,7 @@ def validate(self, **kwargs):
total_size = 2 * a_tile_size + c_tile_size
if DOUBLE_BUFFER:
total_size *= 2
data_utils.validate_tcdm_footprint(total_size)
du.validate_tcdm_footprint(total_size)

def emit_header(self, **kwargs):
header = [super().emit_header()]
Expand All @@ -47,14 +47,14 @@ def emit_header(self, **kwargs):
if 'alpha' in kwargs:
alpha = kwargs['alpha']
else:
alpha = np.random.randint(-200, 100)/100
alpha = du.generate_random_array(1)[0]
if 'beta' in kwargs:
beta = kwargs['beta']
else:
beta = np.random.randint(-200, 100)/100
beta = du.generate_random_array(1)[0]

A = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['n']))/100
C_in = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['m']))/100
A = du.generate_random_array((kwargs['m'], kwargs['n']))
C_in = du.generate_random_array((kwargs['m'], kwargs['m']))

A = A.flatten()
C_in = C_in.flatten()
Expand Down
41 changes: 41 additions & 0 deletions util/sim/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,24 @@ def torch_type_from_precision_t(prec):
return precision_t_to_torch_type_map[_integer_precision_t(prec)]


def numpy_type_from_precision_t(prec):
"""Convert `precision_t` type to PyTorch type.
Args:
prec: A value of type `precision_t`. Accepts both enum strings
(e.g. "FP64") and integer enumeration values (e.g. 8).
"""
# Types which have a direct correspondence in Numpy
precision_t_to_numpy_type_map = {
8: np.float64,
4: np.float32,
2: np.float16
}
prec = _integer_precision_t(prec)
assert prec != 1, "No direct correspondence between FP8 and Numpy"
return precision_t_to_numpy_type_map[prec]


# Returns the C type representing a floating-point value of the specified precision
def ctype_from_precision_t(prec):
"""Convert `precision_t` type to a C type string.
Expand All @@ -100,6 +118,29 @@ def ctype_from_precision_t(prec):
return precision_t_to_ctype_map[_integer_precision_t(prec)]


def generate_random_array(size, prec='FP64'):
"""Consistent random array generation for Snitch experiments.
Samples values between -1 and 1 from a uniform distribution and
of the exact specified type, e.g. actual 64-bit doubles.
This function ensures that e.g. power measurements are not skewed
by using integer values in the FPU.
Args:
size: Tuple of array dimensions.
prec: A value of type `precision_t`. Accepts both enum strings
(e.g. "FP64") and integer enumeration values (e.g. 8).
"""
# Generate in 64b precision and then cast down
rand = np.random.default_rng.random(size=size, dtype=np.float64) * 2 - 1
# Generate FlexFloat array for 8b floats, casted from 16b Numpy array
if _integer_precision_t(prec) == 1:
return ff.array(rand.astype(np.float16), ff_desc_from_precision_t(prec))
else:
return rand.astype(numpy_type_from_precision_t(prec))


def flatten(array):
"""Flatten various array types with a homogeneous API.
Expand Down

0 comments on commit 72d7dc8

Please sign in to comment.