From 5d6ba86da838c5d7aef9bd0ee1fd69885e249ab0 Mon Sep 17 00:00:00 2001
From: Aba <abarajithan-7@gmail.com>
Date: Sat, 18 Nov 2023 00:16:15 -0800
Subject: [PATCH] Integrate Hardware class into param_test.py

---
 deepsocflow/py/hardware.py |  82 +++++++--
 run/param_test.py          | 334 +++++++++++--------------------------
 run/work/config_hw.svh     |  43 +++--
 run/work/config_hw.tcl     |  28 ++--
 run/work/config_tb.svh     |   2 +-
 run/work/hardware.json     |  28 ++++
 run/work/sources.txt       |  42 ++---
 7 files changed, 251 insertions(+), 308 deletions(-)
 create mode 100644 run/work/hardware.json

diff --git a/deepsocflow/py/hardware.py b/deepsocflow/py/hardware.py
index d142f65..0f761d1 100644
--- a/deepsocflow/py/hardware.py
+++ b/deepsocflow/py/hardware.py
@@ -1,8 +1,10 @@
-# from deepsocflow import Hardware, Bundle, QInput, BundleModel, QConvCore, QDenseCore, QAdd, QPool, Softmax, QLeakyReLu
 import numpy as np
-from abc import ABC, abstractmethod
 import json
+import os
+import subprocess
+import glob
 from deepsocflow.py.utils import *
+import deepsocflow
 
 
 class Hardware:
@@ -19,11 +21,15 @@ def __init__(
             bits_bias: int = 16, 
             max_batch_size: int = 512, 
             max_channels_in: int = 512, 
-            max_channels_out: int = 512, 
             max_kernel_size: int = 13, 
             max_image_size: int = 32, 
-            weights_cache_kbytes: int =384, 
-            edge_cache_kbytes: int|None = None
+            ram_weights_depth: int = 512, 
+            ram_edges_depth: int|None = 288,
+            axi_width: int = 64,
+            target_cpu_int_bits: int = 32,
+            valid_prob: float = 0.01,
+            ready_prob: float = 0.1,
+            data_dir: str = 'vectors/'
             ):
         """
         Args:
@@ -35,11 +41,11 @@ def __init__(
             bits_bias (int, optional): _description_. Defaults to 16.
             max_batch_size (int, optional): _description_. Defaults to 512.
             max_channels_in (int, optional): _description_. Defaults to 512.
-            max_channels_out (int, optional): _description_. Defaults to 512.
             max_kernel_size (int, optional): _description_. Defaults to 13.
             max_image_size (int, optional): _description_. Defaults to 32.
-            weights_cache_kbytes (int, optional): _description_. Defaults to 384.
-            edge_cache_kbytes (int | None, optional): _description_. Defaults to None.
+            ram_weights_depth (int, optional): _description_. Defaults to 512.
+            ram_edges_depth (int | None, optional): _description_. Defaults to None.
+            target_cpu_int_bits (int, optional): _description_. Defaults to 32.
         """
         
         self.params = locals()
@@ -57,17 +63,20 @@ def __init__(
         self.B_BITS = bits_bias
         self.XN_MAX = max_batch_size
         self.CI_MAX = max_channels_in
-        self.CO_MAX = max_channels_out
         self.KH_MAX, self.KW_MAX = tuple(max_kernel_size) if (type(max_kernel_size) in [tuple, list]) else (max_kernel_size, max_kernel_size)
         self.XH_MAX, self.XW_MAX = tuple(max_image_size ) if (type(max_image_size ) in [tuple, list]) else (max_image_size , max_image_size )
+        self.IN_BITS = self.OUT_BITS = axi_width
+        self.INT_BITS = target_cpu_int_bits
+        self.VALID_PROB = int(valid_prob * 1000)
+        self.READY_PROB = int(ready_prob * 1000)
 
-        self.RAM_WEIGHTS_DEPTH     = int((weights_cache_kbytes*1024)/(self.K_BITS*self.COLS*2))
+        self.RAM_WEIGHTS_DEPTH     = ram_weights_depth
         '''
         | Width of weights RAM   = K_BITS * COLS
         | Number of weights RAMs = 2
         '''
 
-        self.RAM_EDGES_DEPTH       = edge_cache_kbytes if edge_cache_kbytes is not None else int(self.CI_MAX * self.XW_MAX * np.ceil(self.XH_MAX/self.ROWS)-1)
+        self.RAM_EDGES_DEPTH       = ram_edges_depth
         '''
         | Depth of RAM needed for edge padding.
         |     if k == 1 -> 0
@@ -85,8 +94,11 @@ def __init__(
         self.BITS_XN_MAX           = clog2(self.XN_MAX)
         self.BITS_RAM_WEIGHTS_ADDR = clog2(self.RAM_WEIGHTS_DEPTH)
 
-        self.IN_BITS = self.OUT_BITS = 64
-
+        self.MODULE_DIR = os.path.dirname(deepsocflow.__file__)
+        self.TB_MODULE = "dnn_engine_tb"
+        self.WAVEFORM = "dnn_engine_tb_behav.wcfg"
+        self.SOURCES = glob.glob(f'{self.MODULE_DIR}/test/sv/*.sv') + glob.glob(f"{self.MODULE_DIR}/rtl/**/*.v", recursive=True) + glob.glob(f"{self.MODULE_DIR}/rtl/**/*.sv", recursive=True) + glob.glob(f"{os.getcwd()}/*.svh")
+        self.DATA_DIR = data_dir
 
     def export_json(self, path='./hardware.json'):
         '''
@@ -113,7 +125,13 @@ def export(self):
         Exports the hardware parameters to SystemVerilog and TCL scripts.
         '''
 
-        with open('rtl/include/config_hw.svh', 'w') as f:
+        with open('config_tb.svh', 'w') as f:
+            f.write(f'`define VALID_PROB {self.VALID_PROB} \n`define READY_PROB {self.READY_PROB}')
+
+        with open('sources.txt', 'w') as f:
+            f.write("\n".join([os.path.normpath(s) for s in self.SOURCES]))
+
+        with open('config_hw.svh', 'w') as f:
             f.write(f'''
 // Written from Hardware.export()
 
@@ -143,7 +161,7 @@ def export(self):
 ''')
 
 
-        with open('fpga/scripts/config_hw.tcl', 'w') as f:
+        with open('config_hw.tcl', 'w') as f:
             f.write(f'''
 # Written from Hardware.export()
                     
@@ -160,3 +178,37 @@ def export(self):
 set S_PIXELS_WIDTH_LF  {self.IN_BITS}
 set M_OUTPUT_WIDTH_LF  {self.OUT_BITS}
 ''')
+
+
+
+    def simulate(self, SIM='verilator', SIM_PATH=''):
+
+        os.makedirs('build', exist_ok=True)
+        print("\n\nCOMPILING...\n\n")
+
+        if SIM == 'xsim':
+            assert subprocess.run(cwd="build", shell=True, args=fr'{SIM_PATH}xsc {self.MODULE_DIR}/c/example.c --gcc_compile_options -I../').returncode == 0
+            assert subprocess.run(cwd="build", shell=True, args=fr'{SIM_PATH}xvlog -sv -f ../sources.txt -i ../').returncode == 0
+            assert subprocess.run(cwd="build", shell=True, args=fr'{SIM_PATH}xelab {self.TB_MODULE} --snapshot {self.TB_MODULE} -log elaborate.log --debug typical -sv_lib dpi').returncode == 0
+
+        if SIM == 'icarus':
+            cmd = [ "iverilog", "-v", "-g2012", "-o", "build/a.out", "-I", "sv", "-s", self.TB_MODULE] + self.SOURCES
+            print(" ".join(cmd))
+            assert subprocess.run(cmd).returncode == 0
+
+        if SIM == "verilator":
+            cmd = f'{SIM_PATH}verilator --binary -j 0 -Wno-fatal --trace --relative-includes --top {self.TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DVERILATOR -CFLAGS -I../ {self.MODULE_DIR}/c/example.c --Mdir ./'
+            print(cmd)
+            assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0
+        
+
+        print("\n\nSIMULATING...\n\n")
+
+        if SIM == 'xsim':
+            with open('build/xsim_cfg.tcl', 'w') as f:
+                f.write('''log_wave -recursive * \nrun all \nexit''')
+            assert subprocess.run(fr'{SIM_PATH}xsim {self.TB_MODULE} --tclbatch xsim_cfg.tcl', cwd="build", shell=True).returncode == 0
+        if SIM == 'icarus':
+            subprocess.run(["vvp", "build/a.out"])
+        if SIM == 'verilator':
+            subprocess.run([f"./V{self.TB_MODULE}"], cwd="build")
\ No newline at end of file
diff --git a/run/param_test.py b/run/param_test.py
index a406138..345e771 100644
--- a/run/param_test.py
+++ b/run/param_test.py
@@ -1,185 +1,56 @@
 import numpy as np
 import os
-# import torch
-import tensorflow as tf
-from tensorflow.keras.layers import Input
-import subprocess
-import glob
-import os.path
-import pytest
-import itertools
-import pickle
-from copy import deepcopy
-from collections import namedtuple
-from dataclasses import dataclass
-import deepsocflow
-from deepsocflow import Bundle
 from qkeras import *
 from tensorflow.keras.layers import Input
-keras.utils.set_random_seed(0)
+import sys
+sys.path.append("../../")
+import deepsocflow
+from deepsocflow import Bundle, Hardware
 
-# Simulator: xsim on windows, verilator otherwise
-SIM = 'xsim' if os.name=='nt' else 'verilator' #'icarus'
-XIL_PATH = os.path.join("F:", "Xilinx", "Vivado", "2022.1", "bin")
 
-DATA_DIR   = 'vectors'
-os.makedirs(DATA_DIR, exist_ok=True)
-DATA_DIR_SIM = f'../{DATA_DIR}'
-MODULE_DIR = deepsocflow.__file__.replace('\\', '/').replace("/__init__.py", "")
+# Simulator: xsim on windows, verilator otherwise
+if os.name=='nt':
+    SIM = 'xsim'
+    SIM_PATH = "F:/Xilinx/Vivado/2022.1/bin/" #os.path.join("F:", "Xilinx", "Vivado", "2022.1", "bin")
+else:
+    SIM = 'verilator'
+    SIM_PATH = ''
 
-TB_MODULE = "dnn_engine_tb"
-WAVEFORM = "dnn_engine_tb_behav.wcfg"
 
-type_d = {
-    'np': {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
-}
+keras.utils.set_random_seed(0)
+type_d = { 'np': {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64} }
 
 '''
-Synthesis Parameters
+0. Specify Hardware
 '''
-
-def product_dict(**kwargs):
-    keys, vals = kwargs.keys(), kwargs.values()
-    for instance in itertools.product(*vals):
-        d = dict(zip(keys, instance))
-        yield namedtuple('Compile', d)(**d)
-
-
-def make_compile_params(c):
-
-    assert c.ROWS >= c.KW_MAX//2 # to capture the bottom pixels
-
-    def clog2(x):
-        return int(np.ceil(np.log2(x)))
-    
-    d = { 
-        'KH_MAX'                : c.KW_MAX, 
-        'L_MAX'                 : int(np.ceil(c.XH_MAX//c.ROWS)),
-    }
-    n = namedtuple('Compile', d)(**d)
-    c = namedtuple("Compile", c._fields + n._fields)(*(c + n))
-
-    d = { 
-        'CONFIG_BEATS'          : 0,
-        'X_PAD'                 : int(np.ceil(c.KH_MAX//2)),
-        'BITS_KW2'              : clog2((c.KW_MAX+1)/2),
-        'BITS_KH2'              : clog2((c.KH_MAX+1)/2),
-        'BITS_CIN_MAX'          : clog2(c.CI_MAX),
-        'BITS_COLS_MAX'         : clog2(c.XW_MAX),
-        'BITS_BLOCKS_MAX'       : clog2(c.L_MAX),
-        'BITS_XN_MAX'           : clog2(c.XN_MAX),
-        'BITS_RAM_WEIGHTS_ADDR' : clog2(c.RAM_WEIGHTS_DEPTH),
-         }
-    n = namedtuple('Compile', d)(**d)
-    c = namedtuple("Compile", c._fields + n._fields)(*(c + n))
-
-    print(f"\n\n---------- {SIM}:{c} ----------\n\n")
-    return c
-
-
-def compile(c):
-
-    with open(f'./config_hw.svh', 'w') as f:
-        f.write(f'''
-    // Written from param_tests.py
-
-    `define ROWS                {c.ROWS}                 \t// PE rows, constrained by resources
-    `define COLS                {c.COLS}                 \t// PE cols, constrained by resources
-    `define X_BITS              {c.X_BITS}               \t// Bits per word in input
-    `define K_BITS              {c.K_BITS}               \t// Bits per word in input
-    `define Y_BITS              {c.Y_BITS}               \t// Bits per word in output of conv
-
-    `define KH_MAX              {c.KH_MAX}               \t// max of kernel height, across layers
-    `define KW_MAX              {c.KW_MAX}               \t// max of kernel width, across layers
-    `define XH_MAX              {c.XH_MAX}               \t// max of input image height, across layers
-    `define XW_MAX              {c.XW_MAX}               \t// max of input image width, across layers
-    `define XN_MAX              {c.XN_MAX}               \t// max of input batch size, across layers
-    `define CI_MAX              {c.CI_MAX}               \t// max of input channels, across layers
-    `define CONFIG_BEATS        {c.CONFIG_BEATS}         \t// constant, for now
-    `define RAM_WEIGHTS_DEPTH   {c.RAM_WEIGHTS_DEPTH}    \t// CONFIG_BEATS + max(KW * CI), across layers
-    `define RAM_EDGES_DEPTH     {c.RAM_EDGES_DEPTH}      \t// max (KW * CI * XW), across layers when KW != 1
-
-    `define DELAY_ACC    1                               \t// constant, for now
-    `define DELAY_MUL    2                               \t// constant, for now 
-    `define DELAY_W_RAM  2                               \t// constant, for now 
-
-    `define S_WEIGHTS_WIDTH_LF  {c.IN_BITS}              \t// constant (64), for now
-    `define S_PIXELS_WIDTH_LF   {c.IN_BITS}              \t// constant (64), for now
-    `define M_OUTPUT_WIDTH_LF   {c.OUT_BITS}             \t// constant (64), for now
-    ''')
-        
-    with open(f'./config_hw.tcl', 'w') as f:
-        f.write(f'''
-    # Written from param_tests.py
-    set RAM_WEIGHTS_DEPTH {c.RAM_WEIGHTS_DEPTH}
-    set ROWS               {c.ROWS}
-    set COLS               {c.COLS}
-    set X_BITS             {c.X_BITS}
-    set K_BITS             {c.K_BITS}
-    set Y_BITS             {c.Y_BITS}
-    set DELAY_W_RAM        2
-    set RAM_EDGES_DEPTH    {c.RAM_EDGES_DEPTH}
-    set KH_MAX             {c.KH_MAX}
-    set S_WEIGHTS_WIDTH_LF  {c.IN_BITS}
-    set S_PIXELS_WIDTH_LF   {c.IN_BITS}
-    set M_OUTPUT_WIDTH_LF   {c.OUT_BITS}
-        ''')
-
-    os.makedirs('build', exist_ok=True)
-    with open('./config_tb.svh', 'w') as f:
-        f.write(f'`define VALID_PROB {c.VALID_PROB} \n`define READY_PROB {c.READY_PROB}')
-
-
-    SOURCES = glob.glob(f'{MODULE_DIR}/test/sv/*.sv') + glob.glob(f"{MODULE_DIR}/rtl/**/*.v", recursive=True) + glob.glob(f"{MODULE_DIR}/rtl/**/*.sv", recursive=True) + glob.glob(f"{os.getcwd()}/*.svh")
-    print(SOURCES)
-    with open('sources.txt', 'w') as f:
-        f.write("\n".join([os.path.normpath(s) for s in SOURCES]))
-
-    if SIM == 'xsim':
-        assert subprocess.run(cwd="build", shell=True, args=fr'{XIL_PATH}\xsc {MODULE_DIR}/c/example.c --gcc_compile_options -I../').returncode == 0
-        assert subprocess.run(cwd="build", shell=True, args=fr'{XIL_PATH}\xvlog -sv -f ../sources.txt -i ../').returncode == 0
-        assert subprocess.run(cwd="build", shell=True, args=fr'{XIL_PATH}\xelab {TB_MODULE} --snapshot {TB_MODULE} -log elaborate.log --debug typical -sv_lib dpi').returncode == 0
-
-    if SIM == 'icarus':
-        cmd = [ "iverilog", "-v", "-g2012", "-o", "build/a.out", "-I", "sv", "-s", TB_MODULE] + SOURCES
-        print(" ".join(cmd))
-        assert subprocess.run(cmd).returncode == 0
-
-    if SIM == "verilator":
-        cmd = f'verilator --binary -j 0 -Wno-fatal --trace --relative-includes --top {TB_MODULE} -I../ -F ../sources.txt -CFLAGS -DVERILATOR -CFLAGS -I../ {MODULE_DIR}/c/example.c --Mdir ./'
-        print(cmd)
-        assert subprocess.run(cmd.split(' '), cwd='build').returncode == 0
-
-    return c
-
-
-@pytest.mark.parametrize("COMPILE", list(product_dict(
-                                                X_BITS     = [4    ], 
-                                                K_BITS     = [4    ], 
-                                                B_BITS     = [16   ], 
-                                                Y_BITS     = [24   ], 
-                                                INT_BITS   = [32   ], # size of integer in target CPU
-                                                ROWS       = [8    ], 
-                                                COLS       = [24   ], 
-                                                KW_MAX     = [13   ], 
-                                                CI_MAX     = [2048 ], 
-                                                XW_MAX     = [512  ], 
-                                                XH_MAX     = [512  ], 
-                                                XN_MAX     = [64   ], 
-                                                IN_BITS    = [64   ], 
-                                                OUT_BITS   = [64   ],
-                                                RAM_WEIGHTS_DEPTH = [20  ],  # KH*CI + Config beats
-                                                RAM_EDGES_DEPTH   = [288 ], # max(CI * XW * (XH/ROWS-1))
-
-                                                VALID_PROB = [1],
-                                                READY_PROB = [100],
-                                            )))
-def test_dnn_engine(COMPILE):
-    c = make_compile_params(COMPILE)
-    assert c.X_BITS in [1,2,4,8] and c.K_BITS in [1,2,4,8], "X_BITS and K_BITS should be in [1,2,4,8]"
-    assert c.B_BITS in [8,16,32], "B_BITS should be in [8,16,32]"
-    xq, kq, bq = f'quantized_bits({c.X_BITS},0,False,True,1)', f'quantized_bits({c.K_BITS},0,False,True,1)', f'quantized_bits({c.B_BITS},0,False,True,1)'
-    inp        = {'bits':c.X_BITS, 'frac':c.X_BITS-1}
+hw = Hardware (
+        processing_elements  = (8,24),
+        frequency_mhz        = 250,
+        bits_input           = 4,
+        bits_weights         = 4,
+        bits_sum             = 24,
+        bits_bias            = 16,
+        max_batch_size       = 64, 
+        max_channels_in      = 2048,
+        max_kernel_size      = (13, 13),
+        max_image_size       = (512,512),
+        ram_weights_depth    = 20,
+        ram_edges_depth      = 288,
+        axi_width            = 64,
+        target_cpu_int_bits  = 32,
+        valid_prob           = 0.1,
+        ready_prob           = 0.1,
+        data_dir             = 'vectors',
+     )
+hw.export_json()
+hw = Hardware.from_json('hardware.json')
+hw.export() # Generates: config_hw.svh, config_hw.tcl
+
+
+def test_dnn_engine():
+
+    xq, kq, bq = f'quantized_bits({hw.X_BITS},0,False,True,1)', f'quantized_bits({hw.K_BITS},0,False,True,1)', f'quantized_bits({hw.B_BITS},0,False,True,1)'
+    inp        = {'bits':hw.X_BITS, 'frac':hw.X_BITS-1}
 
     '''
     Build Model
@@ -188,13 +59,13 @@ def test_dnn_engine(COMPILE):
     x = x_in = Input(input_shape[1:], name='input')
     x = QActivation(xq)(x)
 
-    x = x_skip1 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':(11,11), 'strides':(2,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({c.X_BITS},0,negative_slope=0)'    }, pool= {'type':'avg', 'size':(3,4), 'strides':(2,3), 'padding':'same', 'act_str':f'quantized_bits({c.X_BITS},0,False,False,1)'})(x)
-    x = x_skip2 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_bits({c.X_BITS},0,False,False,1)'       }, add = {'act_str':f'quantized_bits({c.X_BITS},0,False,True,1)'})(x, x_skip1)
-    x =           Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 7, 7), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':False, 'act_str':f'quantized_bits({c.X_BITS},0,False,True,1)'        }, add = {'act_str':f'quantized_bits({c.X_BITS},0,False,True,1)'})(x, x_skip2)
-    x =           Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 5, 5), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'}, add = {'act_str':f'quantized_bits({c.X_BITS},0,False,True,1)'})(x, x_skip1)
-    x =           Bundle( core= {'type':'conv' , 'filters':24, 'kernel_size':( 3, 3), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({c.X_BITS},0,negative_slope=0)'    },)(x)
-    x =           Bundle( core= {'type':'conv' , 'filters':10, 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'}, flatten= True)(x)
-    x =           Bundle( core= {'type':'dense', 'units'  :10,                                                           'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'})(x)
+    x = x_skip1 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':(11,11), 'strides':(2,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({hw.X_BITS},0,negative_slope=0)'    }, pool= {'type':'avg', 'size':(3,4), 'strides':(2,3), 'padding':'same', 'act_str':f'quantized_bits({hw.X_BITS},0,False,False,1)'})(x)
+    x = x_skip2 = Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_bits({hw.X_BITS},0,False,False,1)'       }, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip1)
+    x =           Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 7, 7), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':False, 'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'        }, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip2)
+    x =           Bundle( core= {'type':'conv' , 'filters':8 , 'kernel_size':( 5, 5), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({hw.X_BITS},0,negative_slope=0.125)'}, add = {'act_str':f'quantized_bits({hw.X_BITS},0,False,True,1)'})(x, x_skip1)
+    x =           Bundle( core= {'type':'conv' , 'filters':24, 'kernel_size':( 3, 3), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({hw.X_BITS},0,negative_slope=0)'    },)(x)
+    x =           Bundle( core= {'type':'conv' , 'filters':10, 'kernel_size':( 1, 1), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({hw.X_BITS},0,negative_slope=0.125)'}, flatten= True)(x)
+    x =           Bundle( core= {'type':'dense', 'units'  :10,                                                           'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True , 'act_str':f'quantized_relu({hw.X_BITS},0,negative_slope=0.125)'})(x)
 
     model = Model(inputs=x_in, outputs=x)
 
@@ -209,8 +80,11 @@ def test_dnn_engine(COMPILE):
     inp['tensor'] = inp_act_model(x, training=False)
     inp['int'] = inp['tensor'].numpy() * 2**inp['frac']
 
-
-    for file in os.scandir(DATA_DIR):
+    '''
+    Clean the data directory
+    '''
+    os.makedirs(hw.DATA_DIR, exist_ok=True)
+    for file in os.scandir(hw.DATA_DIR):
         os.remove(file.path)
 
     bundles = model.layers[2:]
@@ -222,8 +96,8 @@ def test_dnn_engine(COMPILE):
     buffer_map = []
     for ib, b in enumerate(bundles):
         print(f'-----------------{b.idx}-----------------------')
-        b.process(inp if b.idx==0 else None, c)
-        b.export(c, False) #ib==len(bundles)-1
+        b.process(inp if b.idx==0 else None, hw)
+        b.export(hw, False) #ib==len(bundles)-1
         
         '''
         Buffer allocation for add bundle
@@ -265,10 +139,10 @@ def test_dnn_engine(COMPILE):
         ch.write(f"Bundle_t bundles [N_BUNDLES] = {{\n")
         
         for ib, b in enumerate(bundles):
-            w_bpt    = (c.K_BITS*b.we[-1][0].size + c.IN_BITS)//8
-            w_bpt_p0 = (c.K_BITS*b.we[0][0].size + c.IN_BITS )//8
-            x_bpt    = (c.X_BITS*b.xe[-1].size + c.IN_BITS   )//8 
-            x_bpt_p0 = (c.X_BITS*b.xe[0].size + c.IN_BITS    )//8
+            w_bpt    = (hw.K_BITS*b.we[-1][0].size + hw.IN_BITS)//8
+            w_bpt_p0 = (hw.K_BITS*b.we[0][0].size + hw.IN_BITS )//8
+            x_bpt    = (hw.X_BITS*b.xe[-1].size + hw.IN_BITS   )//8 
+            x_bpt_p0 = (hw.X_BITS*b.xe[0].size + hw.IN_BITS    )//8
             
             if ib == len(bundles)-1:
                 o_words_b = b.o_int.size
@@ -280,11 +154,11 @@ def test_dnn_engine(COMPILE):
                 o_wpt_p0  = b_next.xe[0].size
                 o_words_b = o_wpt_p0 + (b_next.r.CP-1)*o_wpt
 
-                o_bpt = (c.X_BITS*b_next.xe[-1].size + c.IN_BITS)//8
-                o_bpt_p0 = (c.X_BITS*b_next.xe[0].size + c.IN_BITS)//8
+                o_bpt = (hw.X_BITS*b_next.xe[-1].size + hw.IN_BITS)//8
+                o_bpt_p0 = (hw.X_BITS*b_next.xe[0].size + hw.IN_BITS)//8
                 o_bytes_b = o_bpt_p0 + (b_next.r.CP-1)*o_bpt
 
-            xp_words  = b.r.XN * b.r.XL * b.r.XW * (c.ROWS+c.X_PAD)
+            xp_words  = b.r.XN * b.r.XL * b.r.XW * (hw.ROWS+hw.X_PAD)
 
             w_bytes_b = (w_bpt_p0 + (b.r.CP-1)*w_bpt)*b.r.IT
             x_bytes_b = (x_bpt_p0 + (b.r.CP-1)*x_bpt)
@@ -302,7 +176,7 @@ def test_dnn_engine(COMPILE):
 
             y_coe = b.r.CO_PRL
             y_coe_tl = b.r.CO_PRL if (b.r.CO==b.r.IT*b.r.CO_PRL) else b.r.CO%b.r.IT
-            y_r_ll = c.ROWS if b.r.XH==b.r.XL*c.ROWS else  b.r.XH % c.ROWS
+            y_r_ll = hw.ROWS if b.r.XH==b.r.XL*hw.ROWS else  b.r.XH % hw.ROWS
 
             ca_nzero, ca_shift, ca_pl_scale = b.core['act']['non_zero'], b.core['act']['shift_bits'], b.core['act']['plog_slope']
 
@@ -338,15 +212,15 @@ def test_dnn_engine(COMPILE):
 
 
         ch.write(f"\n}};\n\n")
-        ch.write(f"#define X_BITS_L2   {int(np.log2(c.X_BITS))}\n")
-        ch.write(f"#define W_BITS_L2   {int(np.log2(c.K_BITS))}\n")
-        ch.write(f"#define X_PAD       {c.X_PAD}\n")
-        ch.write(f"#define KH_MAX      {c.KH_MAX}\n")
-        ch.write(f"#define PE_ROWS     {c.ROWS}\n")
-        ch.write(f"#define PE_COLS     {c.COLS}\n\n")
+        ch.write(f"#define X_BITS_L2   {int(np.log2(hw.X_BITS))}\n")
+        ch.write(f"#define W_BITS_L2   {int(np.log2(hw.K_BITS))}\n")
+        ch.write(f"#define X_PAD       {hw.X_PAD}\n")
+        ch.write(f"#define KH_MAX      {hw.KH_MAX}\n")
+        ch.write(f"#define PE_ROWS     {hw.ROWS}\n")
+        ch.write(f"#define PE_COLS     {hw.COLS}\n\n")
 
         ch.write(f"#define N_ADD_BUF   {len(buffer_map) if len(buffer_map) > 0 else ''}\n")
-        ch.write(f"#define WB_BYTES    {w_bytes + (b_words*c.B_BITS)//8}\n")
+        ch.write(f"#define WB_BYTES    {w_bytes + (b_words*hw.B_BITS)//8}\n")
         ch.write(f"#define W_BYTES     {w_bytes}\n")
         ch.write(f"#define X_BYTES     {x_bytes}\n")
         ch.write(f"#define O_WORDS     {o_words}\n")
@@ -354,11 +228,11 @@ def test_dnn_engine(COMPILE):
         ch.write(f"#define O_BYTES_MAX {o_bytes_max}\n")
         ch.write(f"#define X_BYTES_ALL {x_bytes_all}\n")
         ch.write(f"#define NHWC_WORDS  {nhwc_words_max}\n")
-        ch.write(f"#define B_TYPE      int{c.B_BITS}_t\n")
+        ch.write(f"#define B_TYPE      int{hw.B_BITS}_t\n")
         ch.write(f"#define B_WORDS     {b_words}\n")
-        ch.write(f'#define DATA_DIR   "{DATA_DIR_SIM}"\n\n')
+        ch.write(f'#define DATA_DIR   "../{hw.DATA_DIR}"\n\n')
 
-        mask_nums = [(2**c.X_BITS-1) << (p*c.X_BITS)  for p in range(8//c.X_BITS)]
+        mask_nums = [(2**hw.X_BITS-1) << (p*hw.X_BITS)  for p in range(8//hw.X_BITS)]
         mask_nums = ~np.array(mask_nums, dtype=np.uint8)
         ch.write(f"static const uint8_t X_POSITION_INVERTED_MASKS [] = {{ {', '.join([str(n) for n in mask_nums])} }};\n")
 
@@ -371,25 +245,25 @@ def test_dnn_engine(COMPILE):
     for ib, b in enumerate(bundles):
         x_bitstring_b = b''
         if b.b:
-            b_bitstring += b.be.astype(type_d['np'][c.B_BITS]).tobytes()
+            b_bitstring += b.be.astype(type_d['np'][hw.B_BITS]).tobytes()
         for ip in range(b.r.CP):
-            xe = Bundle.pack_words_into_bytes(arr=b.xe[ip].flatten(), bits=c.X_BITS)
+            xe = Bundle.pack_words_into_bytes(arr=b.xe[ip].flatten(), bits=hw.X_BITS)
             x_bitstring_b += b.r.x_header_be_p[ip!=0].tobytes() + xe.tobytes()
                 
             for it in range(b.r.IT):
-                we = Bundle.pack_words_into_bytes(arr=b.we[ip][it].flatten(), bits=c.K_BITS)
+                we = Bundle.pack_words_into_bytes(arr=b.we[ip][it].flatten(), bits=hw.K_BITS)
                 w_bitstring += b.r.w_header_be_p[ip!=0].tobytes() + we.tobytes()
         x_bitstring += x_bitstring_b
-        with open(f"{DATA_DIR}/{ib}_x_sim.bin", 'wb') as f: 
+        with open(f"{hw.DATA_DIR}/{ib}_x_sim.bin", 'wb') as f: 
             f.write(x_bitstring_b)
         if ib==0:
-            with open(f"{DATA_DIR}/x.bin", 'wb') as f: 
+            with open(f"{hw.DATA_DIR}/x.bin", 'wb') as f: 
                 f.write(x_bitstring_b)
 
-    with open(f"{DATA_DIR}/w.bin", 'wb') as f: 
+    with open(f"{hw.DATA_DIR}/w.bin", 'wb') as f: 
         f.write(w_bitstring + b_bitstring)
 
-    with open(f"{DATA_DIR}/x_all.bin", 'wb') as f: 
+    with open(f"{hw.DATA_DIR}/x_all.bin", 'wb') as f: 
         f.write(x_bitstring)
 
 
@@ -397,53 +271,43 @@ def test_dnn_engine(COMPILE):
     Write Text files of vectors
     '''
     for b in bundles:
-        np.savetxt(f"{DATA_DIR}/{b.idx}_y_nhwc_exp.txt", b.oe_exp_nhwc.flatten(), fmt='%d')
-        np.savetxt(f"{DATA_DIR}/{b.idx}_xe.txt", np.concatenate([a.flatten() for a in b.xe]), fmt='%d')
+        np.savetxt(f"{hw.DATA_DIR}/{b.idx}_y_nhwc_exp.txt", b.oe_exp_nhwc.flatten(), fmt='%d')
+        np.savetxt(f"{hw.DATA_DIR}/{b.idx}_xe.txt", np.concatenate([a.flatten() for a in b.xe]), fmt='%d')
         for ip in range(b.r.CP):
             CM_p = b.r.CM_0 if ip==0 else b.r.CM
             x_config = b.r.x_header_le_p[ip!=0][0]
-            x_config = format(x_config, f'#0{c.IN_BITS}b')
-            x_config_words = [int(x_config[i:i+c.X_BITS], 2) for i in range(0, len(x_config), c.X_BITS)]
+            x_config = format(x_config, f'#0{hw.IN_BITS}b')
+            x_config_words = [int(x_config[i:i+hw.X_BITS], 2) for i in range(0, len(x_config), hw.X_BITS)]
             x_config_words.reverse()
             x_config_words = np.array(x_config_words, dtype=np.int8)
 
             xp = b.xe[ip].flatten()
             xp = np.concatenate([x_config_words, xp], axis=0)
-            assert xp.shape == (c.IN_BITS/c.X_BITS +b.r.XN*b.r.XL*b.r.XW*CM_p*(c.ROWS+c.X_PAD),)
-            np.savetxt(f"{DATA_DIR}/{b.idx}_{ip}_x.txt", xp, fmt='%d')
+            assert xp.shape == (hw.IN_BITS/hw.X_BITS +b.r.XN*b.r.XL*b.r.XW*CM_p*(hw.ROWS+hw.X_PAD),)
+            np.savetxt(f"{hw.DATA_DIR}/{b.idx}_{ip}_x.txt", xp, fmt='%d')
 
 
             for it in range(b.r.IT):
                 
                 w_config = b.r.w_header_le_p[ip!=0][0]
-                w_config = format(w_config, f'#0{c.IN_BITS}b')
-                w_config_words = [int(w_config[i:i+c.K_BITS], 2) for i in range(0, len(w_config), c.K_BITS)]
+                w_config = format(w_config, f'#0{hw.IN_BITS}b')
+                w_config_words = [int(w_config[i:i+hw.K_BITS], 2) for i in range(0, len(w_config), hw.K_BITS)]
                 w_config_words.reverse()
                 w_config_words = np.array(w_config_words,dtype=np.int8)
 
                 wp = b.we[ip][it].flatten()            
                 wp = np.concatenate([w_config_words, wp], axis=0)
-                assert wp.shape == (c.IN_BITS/c.K_BITS + (CM_p*b.r.KH+c.CONFIG_BEATS)*c.COLS,)
-                np.savetxt(f"{DATA_DIR}/{b.idx}_{ip}_{it}_w.txt", wp, fmt='%d')
+                assert wp.shape == (hw.IN_BITS/hw.K_BITS + (CM_p*b.r.KH+hw.CONFIG_BEATS)*hw.COLS,)
+                np.savetxt(f"{hw.DATA_DIR}/{b.idx}_{ip}_{it}_w.txt", wp, fmt='%d')
 
-                np.savetxt(f"{DATA_DIR}/{b.idx}_{ip}_{it}_y_exp.txt", b.ye_exp_p[ip][it].flatten(), fmt='%d')
-    print(f'Weights, inputs, outputs saved to {DATA_DIR}/ib_ip_it_*.txt')
+                np.savetxt(f"{hw.DATA_DIR}/{b.idx}_{ip}_{it}_y_exp.txt", b.ye_exp_p[ip][it].flatten(), fmt='%d')
+    print(f'Weights, inputs, outputs saved to {hw.DATA_DIR}/ib_ip_it_*.txt')
 
 
     '''
     RUN SIMULATION
     '''
-    compile(c=c)
-    print("SIMULATING...")
-
-    if SIM == 'xsim':
-        with open('build/xsim_cfg.tcl', 'w') as f:
-            f.write('''log_wave -recursive * \nrun all \nexit''')
-        assert subprocess.run(fr'{XIL_PATH}\xsim {TB_MODULE} --tclbatch xsim_cfg.tcl', cwd="build", shell=True).returncode == 0
-    if SIM == 'icarus':
-        subprocess.run(["vvp", "build/a.out"])
-    if SIM == 'verilator':
-        subprocess.run([f"./V{TB_MODULE}"], cwd="build")
+    hw.simulate(SIM=SIM, SIM_PATH=SIM_PATH)
 
 
     '''
@@ -455,30 +319,30 @@ def test_dnn_engine(COMPILE):
         for ip in range(b.r.CP):
             for it in range(b.r.IT):
                 y_raw_exp = b.ye_exp_p[ip][it]
-                y_raw_sim = np.loadtxt(f"{DATA_DIR}/{b.idx}_{ip}_{it}_y_raw_sim.txt", np.int32).reshape(y_raw_exp.shape)
+                y_raw_sim = np.loadtxt(f"{hw.DATA_DIR}/{b.idx}_{ip}_{it}_y_raw_sim.txt", np.int32).reshape(y_raw_exp.shape)
                 error = np.sum(np.abs(y_raw_exp-y_raw_sim))
                 assert error == 0, f"Error={error}, for y_raw_sim at {b.idx=}_{ip=}_{it=}"
 
         ''' Verify sum output '''
         y_sum_exp = b.oe_sum_exp
-        y_sum_sim = np.loadtxt(f"{DATA_DIR}/{b.idx}_y_sum_sim.txt", np.int32).reshape(y_sum_exp.shape)
+        y_sum_sim = np.loadtxt(f"{hw.DATA_DIR}/{b.idx}_y_sum_sim.txt", np.int32).reshape(y_sum_exp.shape)
         error = np.sum(np.abs(y_sum_exp-y_sum_sim))
         assert error == 0, f"Error={error}, for y_sum_sim at {b.idx=}"
 
         ''' Verify processed output HWC'''
-        y_nhwc_sim = np.loadtxt(f"{DATA_DIR}/{b.idx}_y_nhwc_sim.txt",np.int32).reshape(b.oe_exp_nhwc.shape)
+        y_nhwc_sim = np.loadtxt(f"{hw.DATA_DIR}/{b.idx}_y_nhwc_sim.txt",np.int32).reshape(b.oe_exp_nhwc.shape)
         error = np.sum(np.abs(y_nhwc_sim - b.oe_exp_nhwc))
         assert error == 0, f"sim:\n{y_nhwc_sim[0,:,:,0]}\n exp:\n{b.oe_exp_nhwc[0,:,:,0]}\n input:\n{b.before_pool[0,:,:,0] if b.pool else None}"
 
         ''' Verify tiled output'''
         y_tiled_exp = b.o_int if ib == len(bundles)-1 else np.concatenate([a.flatten() for a in bundles[ib+1].xe])
-        y_tiled_sim = np.loadtxt(f"{DATA_DIR}/{b.idx}_y_tiled_sim.txt", np.int32).reshape(y_tiled_exp.shape)
+        y_tiled_sim = np.loadtxt(f"{hw.DATA_DIR}/{b.idx}_y_tiled_sim.txt", np.int32).reshape(y_tiled_exp.shape)
         error = np.sum(np.abs(y_tiled_sim-y_tiled_exp))
         assert error == 0, f"Error={error}, for y_tiled_sim at {b.idx=}"
 
         ''' Verify packed output'''
         if ib != len(bundles)-1:
-            with open(f'{DATA_DIR}/{ib}_y_packed_sim.bin', 'rb') as f_sim, open(f'{DATA_DIR}/{ib+1}_x_sim.bin', 'rb') as f_exp:
+            with open(f'{hw.DATA_DIR}/{ib}_y_packed_sim.bin', 'rb') as f_sim, open(f'{hw.DATA_DIR}/{ib+1}_x_sim.bin', 'rb') as f_exp:
                 y_packed_sim = np.frombuffer(f_sim.read(), dtype=np.uint8)
                 y_packed_exp = np.frombuffer(f_exp.read(), dtype=np.uint8)
             error = np.sum(np.abs(y_packed_sim-y_packed_exp))
diff --git a/run/work/config_hw.svh b/run/work/config_hw.svh
index c7d8333..6e99d35 100644
--- a/run/work/config_hw.svh
+++ b/run/work/config_hw.svh
@@ -1,27 +1,26 @@
 
-    // Written from param_tests.py
+// Written from Hardware.export()
 
-    `define ROWS                8                 	// PE rows, constrained by resources
-    `define COLS                24                 	// PE cols, constrained by resources
-    `define X_BITS              4               	// Bits per word in input
-    `define K_BITS              4               	// Bits per word in input
-    `define Y_BITS              24               	// Bits per word in output of conv
+`define ROWS                8           // PE rows, constrained by resources
+`define COLS                24          // PE cols, constrained by resources
+`define X_BITS              4           // Bits per word in input
+`define K_BITS              4           // Bits per word in input
+`define Y_BITS              24          // Bits per word in output of conv
 
-    `define KH_MAX              13               	// max of kernel height, across layers
-    `define KW_MAX              13               	// max of kernel width, across layers
-    `define XH_MAX              512               	// max of input image height, across layers
-    `define XW_MAX              512               	// max of input image width, across layers
-    `define XN_MAX              64               	// max of input batch size, across layers
-    `define CI_MAX              2048               	// max of input channels, across layers
-    `define CONFIG_BEATS        0         	// constant, for now
-    `define RAM_WEIGHTS_DEPTH   20    	// CONFIG_BEATS + max(KW * CI), across layers
-    `define RAM_EDGES_DEPTH     288      	// max (KW * CI * XW), across layers when KW != 1
+`define KH_MAX              13          // max of kernel height, across layers
+`define KW_MAX              13          // max of kernel width, across layers
+`define XH_MAX              512         // max of input image height, across layers
+`define XW_MAX              512         // max of input image width, across layers
+`define XN_MAX              64          // max of input batch size, across layers
+`define CI_MAX              2048        // max of input channels, across layers
+`define CONFIG_BEATS        0           // constant, for now
+`define RAM_WEIGHTS_DEPTH   20          // CONFIG_BEATS + max(KW * CI), across layers
+`define RAM_EDGES_DEPTH     288         // max (KW * CI * XW), across layers when KW != 1
 
-    `define DELAY_ACC    1                               	// constant, for now
-    `define DELAY_MUL    2                               	// constant, for now 
-    `define DELAY_W_RAM  2                               	// constant, for now 
+`define DELAY_ACC           1            // constant, for now
+`define DELAY_MUL           2            // constant, for now 
+`define DELAY_W_RAM         2            // constant, for now 
 
-    `define S_WEIGHTS_WIDTH_LF  64              	// constant (64), for now
-    `define S_PIXELS_WIDTH_LF   64              	// constant (64), for now
-    `define M_OUTPUT_WIDTH_LF   64             	// constant (64), for now
-    
\ No newline at end of file
+`define S_WEIGHTS_WIDTH_LF  64          // constant (64), for now
+`define S_PIXELS_WIDTH_LF   64          // constant (64), for now
+`define M_OUTPUT_WIDTH_LF   64          // constant (64), for now
diff --git a/run/work/config_hw.tcl b/run/work/config_hw.tcl
index a619bf3..adcc8f0 100644
--- a/run/work/config_hw.tcl
+++ b/run/work/config_hw.tcl
@@ -1,15 +1,15 @@
 
-    # Written from param_tests.py
-    set RAM_WEIGHTS_DEPTH 20
-    set ROWS               8
-    set COLS               24
-    set X_BITS             4
-    set K_BITS             4
-    set Y_BITS             24
-    set DELAY_W_RAM        2
-    set RAM_EDGES_DEPTH    288
-    set KH_MAX             13
-    set S_WEIGHTS_WIDTH_LF  64
-    set S_PIXELS_WIDTH_LF   64
-    set M_OUTPUT_WIDTH_LF   64
-        
\ No newline at end of file
+# Written from Hardware.export()
+                    
+set RAM_WEIGHTS_DEPTH  20
+set ROWS               8
+set COLS               24
+set X_BITS             4
+set K_BITS             4
+set Y_BITS             24
+set DELAY_W_RAM        2
+set RAM_EDGES_DEPTH    288
+set KH_MAX             13
+set S_WEIGHTS_WIDTH_LF 64
+set S_PIXELS_WIDTH_LF  64
+set M_OUTPUT_WIDTH_LF  64
diff --git a/run/work/config_tb.svh b/run/work/config_tb.svh
index 6dc12a8..701a452 100644
--- a/run/work/config_tb.svh
+++ b/run/work/config_tb.svh
@@ -1,2 +1,2 @@
-`define VALID_PROB 1 
+`define VALID_PROB 100 
 `define READY_PROB 100
\ No newline at end of file
diff --git a/run/work/hardware.json b/run/work/hardware.json
new file mode 100644
index 0000000..471e523
--- /dev/null
+++ b/run/work/hardware.json
@@ -0,0 +1,28 @@
+{
+    "processing_elements": [
+        8,
+        24
+    ],
+    "frequency_mhz": 250,
+    "bits_input": 4,
+    "bits_weights": 4,
+    "bits_sum": 24,
+    "bits_bias": 16,
+    "max_batch_size": 64,
+    "max_channels_in": 2048,
+    "max_kernel_size": [
+        13,
+        13
+    ],
+    "max_image_size": [
+        512,
+        512
+    ],
+    "ram_weights_depth": 20,
+    "ram_edges_depth": 288,
+    "axi_width": 64,
+    "target_cpu_int_bits": 32,
+    "valid_prob": 0.1,
+    "ready_prob": 0.1,
+    "data_dir": "vectors"
+}
\ No newline at end of file
diff --git a/run/work/sources.txt b/run/work/sources.txt
index fb56987..781cfcf 100644
--- a/run/work/sources.txt
+++ b/run/work/sources.txt
@@ -1,23 +1,23 @@
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\axis_tb.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\counter_tb.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\dma.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\dnn_engine_tb.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\ram_raw.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\test\sv\skid_buffer_tb.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\dnn_engine.v
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\ext\alex_axis_adapter.v
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\ext\alex_axis_pipeline_register.v
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\ext\alex_axis_register.v
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\axis_out_shift.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\axis_pixels.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\axis_weight_rotator.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\counter.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\cyclic_bram.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\huffman_2_decoder.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\n_delay.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\out_ram_switch.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\proc_element.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\proc_engine.sv
-C:\Users\abara\AppData\Roaming\Python\Python310\site-packages\deepsocflow\rtl\ext\alex_axis_adapter_any.sv
+D:\dnn-engine\deepsocflow\test\sv\axis_tb.sv
+D:\dnn-engine\deepsocflow\test\sv\counter_tb.sv
+D:\dnn-engine\deepsocflow\test\sv\dma.sv
+D:\dnn-engine\deepsocflow\test\sv\dnn_engine_tb.sv
+D:\dnn-engine\deepsocflow\test\sv\ram_raw.sv
+D:\dnn-engine\deepsocflow\test\sv\skid_buffer_tb.sv
+D:\dnn-engine\deepsocflow\rtl\dnn_engine.v
+D:\dnn-engine\deepsocflow\rtl\ext\alex_axis_adapter.v
+D:\dnn-engine\deepsocflow\rtl\ext\alex_axis_pipeline_register.v
+D:\dnn-engine\deepsocflow\rtl\ext\alex_axis_register.v
+D:\dnn-engine\deepsocflow\rtl\axis_out_shift.sv
+D:\dnn-engine\deepsocflow\rtl\axis_pixels.sv
+D:\dnn-engine\deepsocflow\rtl\axis_weight_rotator.sv
+D:\dnn-engine\deepsocflow\rtl\counter.sv
+D:\dnn-engine\deepsocflow\rtl\cyclic_bram.sv
+D:\dnn-engine\deepsocflow\rtl\huffman_2_decoder.sv
+D:\dnn-engine\deepsocflow\rtl\n_delay.sv
+D:\dnn-engine\deepsocflow\rtl\out_ram_switch.sv
+D:\dnn-engine\deepsocflow\rtl\proc_element.sv
+D:\dnn-engine\deepsocflow\rtl\proc_engine.sv
+D:\dnn-engine\deepsocflow\rtl\ext\alex_axis_adapter_any.sv
 D:\dnn-engine\run\work\config_hw.svh
 D:\dnn-engine\run\work\config_tb.svh
\ No newline at end of file