diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..9087804
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+ release:
+ types: [created]
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
+ - name: Build and publish
+ env:
+ TWINE_USERNAME: __token__
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+ run: |
+ python setup.py sdist
+ twine upload dist/*
diff --git a/AUTHORS.rst b/AUTHORS.rst
new file mode 100644
index 0000000..ba06e5d
--- /dev/null
+++ b/AUTHORS.rst
@@ -0,0 +1,10 @@
+============
+Contributors
+============
+
+* Yaman Umuroglu (@maltanar) (maintainer)
+* Jakoba Petri-Koenig (@auphelia)
+* Lucian Petrica (@quetric)
+* Tobias Alonso (@Tobi-Alonso)
+* Hendrik Borras (@HenniOVP)
+* Felix Paul Jentzsch (@felixpj)
diff --git a/README.md b/README.md
index 5168620..8467a64 100644
--- a/README.md
+++ b/README.md
@@ -14,10 +14,13 @@ pre-built bitfiles, PYNQ Python drivers and Jupyter notebooks to get started,
and you can rebuild them from source.
Both PYNQ on Zynq and Alveo are supported.
+Need help with a problem in this repo, or got a question? Feel free to ask for help in the [FINN Gitter channel](https://gitter.im/xilinx-finn/community).
+
## Quickstart
*For Alveo we recommend setting up everything inside a virtualenv as described [here](https://pynq.readthedocs.io/en/v2.6.1/getting_started/alveo_getting_started.html?highlight=alveo#install-conda).*
+*For PYNQ boards, all commands below must be prefixed with `sudo` or by first going into `sudo su`.*
First, ensure that your `pip` and `setuptools` installations are up-to-date
on your PYNQ board or Alveo server:
@@ -62,11 +65,11 @@ dummy_out = accel.execute(dummy_in)
|----------------------------------------------------------------|-------------------------|------------------------------------------------------------|------------------|
| CIFAR-10 | CNV (VGG-11-like) | several variants: 1/2-bit weights/activations | all |
| MNIST | 3-layer fully-connected | several variants: 1/2-bit weights/activations | all |
-| ImageNet | MobileNet-v1 | 4-bit weights and activations 8-bit first layer weights | Alveo U250 |
+| ImageNet | MobileNet-v1 | 4-bit weights and activations 8-bit first layer weights | Alveo U250 ZCU104 |
## Supported Boards
-*Note that the larger NNs are only available on Alveo boards.*
+*Note that the larger NNs are only available on Alveo or selected Zynq boards.*
`finn-examples` provides pre-built FPGA bitfiles for the following boards:
diff --git a/build/README.md b/build/README.md
index cd699c9..e284deb 100644
--- a/build/README.md
+++ b/build/README.md
@@ -27,8 +27,7 @@ Please see the READMEs under the respective subfolders here for instructions on
All examples in this repo use the same Python PYNQ driver, located under
`finn_examples/driver.py` in the repo. This driver can support any FINN-generated
-accelerator that doesn't use external weights, the only thing that needs to be
-specified is the configuration for the input and output tensors in the `io_shape_dict`. Have a look at `finn_examples/models.py` to see how this is done for the example models in this repo:
+accelerator, the only thing that needs to be specified is the configuration for the input and output tensors in the `io_shape_dict`. Have a look at `finn_examples/models.py` to see how this is done for the example models in this repo:
```python
_cifar10_cnv_io_shape_dict = {
diff --git a/build/get-finn.sh b/build/get-finn.sh
index 3f5cf53..016a69c 100755
--- a/build/get-finn.sh
+++ b/build/get-finn.sh
@@ -30,7 +30,7 @@
# URL for git repo to be cloned
REPO_URL=https://github.com/Xilinx/finn
# commit hash for repo
-REPO_COMMIT=4fee6ffd8e13f91314ec9086e9ce9b2ea9de15c7
+REPO_COMMIT=e5da788bdc74fc9c234bb0176521ad51e830c22e
# directory (under the same folder as this script) to clone to
REPO_DIR=finn
diff --git a/build/mobilenet-v1/README.md b/build/mobilenet-v1/README.md
index b929679..38b69a2 100644
--- a/build/mobilenet-v1/README.md
+++ b/build/mobilenet-v1/README.md
@@ -17,7 +17,8 @@ It requires about 2 MB of weight storage and 1.1 GMACs per inference, yielding
Due to the depthwise separable convolutions in MobileNet-v1,
we use a specialized build script that replaces a few of the standard steps
in FINN with custom ones.
-**MobileNet-v1 is currently only supported on Alveo U250.**
+**MobileNet-v1 is currently only supported on Alveo U250 and ZCU104.**
+We also provide a folding configuration for the **ZCU102**, but there is no pre-built Pynq image available for this board.
0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables.
diff --git a/build/mobilenet-v1/build.py b/build/mobilenet-v1/build.py
index 5d67f1c..fc4c48c 100644
--- a/build/mobilenet-v1/build.py
+++ b/build/mobilenet-v1/build.py
@@ -28,53 +28,147 @@
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
+from finn.util.basic import alveo_default_platform
+import os
+import shutil
# custom steps for mobilenetv1
from custom_steps import (
step_mobilenet_streamline,
step_mobilenet_convert_to_hls_layers,
+ step_mobilenet_convert_to_hls_layers_separate_th,
step_mobilenet_lower_convs,
+ step_mobilenet_slr_floorplan,
)
model_name = "mobilenetv1-w4a4"
-board = "U250"
-vitis_platform = "xilinx_u250_xdma_201830_2"
-synth_clk_period_ns = 3.0
-mobilenet_build_steps = [
- step_mobilenet_streamline,
- step_mobilenet_lower_convs,
- step_mobilenet_convert_to_hls_layers,
- "step_create_dataflow_partition",
- "step_apply_folding_config",
- "step_generate_estimate_reports",
- "step_hls_ipgen",
- "step_set_fifo_depths",
- "step_create_stitched_ip",
- "step_make_pynq_driver",
- "step_synthesize_bitfile",
- "step_deployment_package",
-]
-
-
-cfg = build_cfg.DataflowBuildConfig(
- steps=mobilenet_build_steps,
- output_dir="output_%s_%s" % (model_name, board),
- folding_config_file="folding_config/%s_folding_config.json" % board,
- synth_clk_period_ns=synth_clk_period_ns,
- board=board,
- shell_flow_type=build_cfg.ShellFlowType.VITIS_ALVEO,
- # folding config comes with FIFO depths already
- auto_fifo_depths=False,
- vitis_platform=vitis_platform,
- # enable extra performance optimizations (physopt)
- vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST,
- generate_outputs=[
- build_cfg.DataflowOutputType.PYNQ_DRIVER,
- build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
- build_cfg.DataflowOutputType.BITFILE,
- build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
- ],
-)
-model_file = "models/%s_pre_post_tidy.onnx" % model_name
-build.build_dataflow_cfg(model_file, cfg)
+# which platforms to build the networks for
+zynq_platforms = ["ZCU102", "ZCU104"]
+#alveo_platforms = ["U50", "U200", "U250", "U280"]
+alveo_platforms = ["U250"]
+platforms_to_build = zynq_platforms + alveo_platforms
+
+
+# determine which shell flow to use for a given platform
+def platform_to_shell(platform):
+ if platform in zynq_platforms:
+ return build_cfg.ShellFlowType.VIVADO_ZYNQ
+ elif platform in alveo_platforms:
+ return build_cfg.ShellFlowType.VITIS_ALVEO
+ else:
+ raise Exception("Unknown platform, can't determine ShellFlowType")
+
+
+# select target clock frequency
+def select_clk_period(platform):
+ if platform in zynq_platforms:
+ return 5.4
+ elif platform in alveo_platforms:
+ return 3.0
+
+
+# select build steps (ZCU104/102 folding config is based on separate thresholding nodes)
+def select_build_steps(platform):
+ if platform in zynq_platforms:
+ return [
+ step_mobilenet_streamline,
+ step_mobilenet_lower_convs,
+ step_mobilenet_convert_to_hls_layers_separate_th,
+ "step_create_dataflow_partition",
+ "step_apply_folding_config",
+ "step_generate_estimate_reports",
+ "step_hls_codegen",
+ "step_hls_ipgen",
+ "step_set_fifo_depths",
+ "step_create_stitched_ip",
+ "step_synthesize_bitfile",
+ "step_make_pynq_driver",
+ "step_deployment_package",
+ ]
+ elif platform in alveo_platforms:
+ return [
+ step_mobilenet_streamline,
+ step_mobilenet_lower_convs,
+ step_mobilenet_convert_to_hls_layers,
+ "step_create_dataflow_partition",
+ "step_apply_folding_config",
+ "step_generate_estimate_reports",
+ "step_hls_codegen",
+ "step_hls_ipgen",
+ "step_set_fifo_depths",
+ step_mobilenet_slr_floorplan,
+ "step_synthesize_bitfile",
+ "step_make_pynq_driver",
+ "step_deployment_package",
+ ]
+
+
+# create a release dir, used for finn-examples release packaging
+os.makedirs("release", exist_ok=True)
+
+
+for platform_name in platforms_to_build:
+ shell_flow_type = platform_to_shell(platform_name)
+ if shell_flow_type == build_cfg.ShellFlowType.VITIS_ALVEO:
+ vitis_platform = alveo_default_platform[platform_name]
+ # for Alveo, use the Vitis platform name as the release name
+ # e.g. xilinx_u250_xdma_201830_2
+ release_platform_name = vitis_platform
+ else:
+ vitis_platform = None
+ # for Zynq, use the board name as the release name
+ # e.g. ZCU104
+ release_platform_name = platform_name
+ platform_dir = "release/%s" % release_platform_name
+ os.makedirs(platform_dir, exist_ok=True)
+
+ cfg = build_cfg.DataflowBuildConfig(
+ steps=select_build_steps(platform_name),
+ output_dir="output_%s_%s" % (model_name, release_platform_name),
+ folding_config_file="folding_config/%s_folding_config.json" % platform_name,
+ synth_clk_period_ns=select_clk_period(platform_name),
+ board=platform_name,
+ shell_flow_type=shell_flow_type,
+ vitis_platform=vitis_platform,
+ # folding config comes with FIFO depths already
+ auto_fifo_depths=False,
+ # enable extra performance optimizations (physopt)
+ vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST,
+ generate_outputs=[
+ build_cfg.DataflowOutputType.PYNQ_DRIVER,
+ build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
+ build_cfg.DataflowOutputType.BITFILE,
+ build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
+ ],
+ )
+ model_file = "models/%s_pre_post_tidy.onnx" % model_name
+ build.build_dataflow_cfg(model_file, cfg)
+
+ # copy bitfiles and runtime weights into release dir if found
+ bitfile_gen_dir = cfg.output_dir + "/bitfile"
+ files_to_check_and_copy = [
+ "finn-accel.bit",
+ "finn-accel.hwh",
+ "finn-accel.xclbin",
+ ]
+ for f in files_to_check_and_copy:
+ src_file = bitfile_gen_dir + "/" + f
+ dst_file = platform_dir + "/" + f.replace("finn-accel", model_name)
+ if os.path.isfile(src_file):
+ shutil.copy(src_file, dst_file)
+
+ weight_gen_dir = cfg.output_dir + "/driver/runtime_weights"
+ weight_dst_dir = platform_dir + "/%s_runtime_weights" % model_name
+ if os.path.isdir(weight_gen_dir):
+ weight_files = os.listdir(weight_gen_dir)
+ if weight_files:
+ shutil.copytree(weight_gen_dir, weight_dst_dir)
+
+ # create zipfile for all examples for this platform
+ shutil.make_archive(
+ "release/" + release_platform_name,
+ "zip",
+ root_dir="release",
+ base_dir=release_platform_name,
+ )
diff --git a/build/mobilenet-v1/custom_steps.py b/build/mobilenet-v1/custom_steps.py
index a18faee..9f30597 100644
--- a/build/mobilenet-v1/custom_steps.py
+++ b/build/mobilenet-v1/custom_steps.py
@@ -26,7 +26,10 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from finn.core.modelwrapper import ModelWrapper
-from finn.builder.build_dataflow_config import DataflowBuildConfig
+from finn.builder.build_dataflow_config import (
+ DataflowBuildConfig,
+ ShellFlowType,
+)
from finn.transformation.streamline import Streamline
from finn.transformation.double_to_single_float import DoubleToSingleFloat
import finn.transformation.streamline.absorb as absorb
@@ -39,6 +42,7 @@
from finn.transformation.general import (
GiveReadableTensorNames,
GiveUniqueNodeNames,
+ ApplyConfig,
)
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.infer_shapes import InferShapes
@@ -94,3 +98,36 @@ def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuild
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
return model
+
+
+def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig):
+ if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO:
+ try:
+ from finn.analysis.partitioning import partition
+ # apply partitioning of the model, restricting the first and last layers to SLR0
+ default_slr = 0
+ abs_anchors = [(0,[default_slr]),(-1,[default_slr])]
+ floorplan = partition(model, cfg.synth_clk_period_ns, cfg.board, abs_anchors=abs_anchors, multivariant=False)[0]
+ # apply floorplan to model
+ model = model.transform(ApplyConfig(floorplan))
+ print("SLR floorplanning applied")
+ except:
+ print("No SLR floorplanning applied")
+ return model
+
+
+def step_mobilenet_convert_to_hls_layers_separate_th(
+ model: ModelWrapper, cfg: DataflowBuildConfig
+):
+ mem_mode = cfg.default_mem_mode.value
+ model = model.transform(to_hls.InferPool_Batch())
+ model = model.transform(to_hls.InferConvInpGen())
+ model = model.transform(to_hls.InferThresholdingLayer())
+ model = model.transform(to_hls.InferVVAU())
+ model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
+ model = model.transform(to_hls.InferChannelwiseLinearLayer())
+ model = model.transform(to_hls.InferLabelSelectLayer())
+ model = model.transform(InferShapes())
+ model = model.transform(GiveUniqueNodeNames())
+ model = model.transform(GiveReadableTensorNames())
+ return model
diff --git a/build/mobilenet-v1/folding_config/U200_folding_config.json b/build/mobilenet-v1/folding_config/U200_folding_config.json
new file mode 100644
index 0000000..f5ccf9b
--- /dev/null
+++ b/build/mobilenet-v1/folding_config/U200_folding_config.json
@@ -0,0 +1,499 @@
+{
+ "Defaults": {},
+ "StreamingFIFO_0": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_0": {
+ "SIMD": 3,
+ "ram_style": "distributed"
+ },
+ "StreamingFCLayer_Batch_0": {
+ "PE": 32,
+ "SIMD": 3,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "dsp"
+ },
+ "FMPadding_Batch_0": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_3": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_1": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_0": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_0": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_1": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_1": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_1": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_9": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_2": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_1": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_2": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_12": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_2": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_3": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_2": {
+ "SIMD": 64
+ },
+ "StreamingFIFO_15": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_3": {
+ "SIMD": 64,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_2": {
+ "PE": 64,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_4": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_18": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_3": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_5": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_20": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_3": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_21": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_4": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_3": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_23": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_4": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_6": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_4": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_26": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_5": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_4": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_7": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_29": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_5": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_8": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_31": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_5": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_32": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_6": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_5": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_9": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_35": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_6": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "FMPadding_Batch_6": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_37": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_7": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_6": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_39": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_7": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_10": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_41": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_7": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_42": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_8": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_7": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_44": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_8": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_11": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_46": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_8": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_47": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_9": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_8": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_49": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_9": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_12": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_51": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_9": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_52": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_10": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_9": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_54": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_10": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_13": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_56": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_10": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_57": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_11": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_10": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_59": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_11": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_14": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_61": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_11": {
+ "SIMD": 4
+ },
+ "StreamingFIFO_62": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_12": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_11": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_15": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_65": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_12": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_16": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_67": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_12": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_68": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_13": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_12": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_17": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_71": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_13": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_18": {
+ "impl_style": "hls"
+ },
+ "ConvolutionInputGenerator_14": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Pool_Batch_0": {
+ "PE": 4
+ },
+ "StreamingFCLayer_Batch_14": {
+ "PE": 4,
+ "SIMD": 4,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_19": {
+ "impl_style": "hls"
+ },
+ "ChannelwiseOp_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed"
+ },
+ "LabelSelect_Batch_0": {
+ "PE": 1
+ }
+}
diff --git a/build/mobilenet-v1/folding_config/U280_folding_config.json b/build/mobilenet-v1/folding_config/U280_folding_config.json
new file mode 100644
index 0000000..f5ccf9b
--- /dev/null
+++ b/build/mobilenet-v1/folding_config/U280_folding_config.json
@@ -0,0 +1,499 @@
+{
+ "Defaults": {},
+ "StreamingFIFO_0": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_0": {
+ "SIMD": 3,
+ "ram_style": "distributed"
+ },
+ "StreamingFCLayer_Batch_0": {
+ "PE": 32,
+ "SIMD": 3,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "dsp"
+ },
+ "FMPadding_Batch_0": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_3": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_1": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_0": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_0": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_1": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_1": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_1": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_9": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_2": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_1": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_2": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_12": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_2": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_3": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_2": {
+ "SIMD": 64
+ },
+ "StreamingFIFO_15": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_3": {
+ "SIMD": 64,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_2": {
+ "PE": 64,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_4": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_18": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_3": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_5": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_20": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_3": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_21": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_4": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_3": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_23": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_4": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_6": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_4": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_26": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_5": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_4": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_7": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_29": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_5": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_8": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_31": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_5": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_32": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_6": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_5": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_9": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_35": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_6": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "FMPadding_Batch_6": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_37": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_7": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_6": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_39": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_7": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_10": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_41": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_7": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_42": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_8": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_7": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_44": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_8": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_11": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_46": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_8": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_47": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_9": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_8": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_49": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_9": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_12": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_51": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_9": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_52": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_10": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_9": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_54": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_10": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_13": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_56": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_10": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_57": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_11": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_10": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_59": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_11": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_14": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_61": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_11": {
+ "SIMD": 4
+ },
+ "StreamingFIFO_62": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_12": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_11": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_15": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_65": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_12": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_16": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_67": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_12": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_68": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_13": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_12": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_17": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_71": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_13": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_18": {
+ "impl_style": "hls"
+ },
+ "ConvolutionInputGenerator_14": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Pool_Batch_0": {
+ "PE": 4
+ },
+ "StreamingFCLayer_Batch_14": {
+ "PE": 4,
+ "SIMD": 4,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_19": {
+ "impl_style": "hls"
+ },
+ "ChannelwiseOp_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed"
+ },
+ "LabelSelect_Batch_0": {
+ "PE": 1
+ }
+}
diff --git a/build/mobilenet-v1/folding_config/U50_folding_config.json b/build/mobilenet-v1/folding_config/U50_folding_config.json
new file mode 100644
index 0000000..f5ccf9b
--- /dev/null
+++ b/build/mobilenet-v1/folding_config/U50_folding_config.json
@@ -0,0 +1,499 @@
+{
+ "Defaults": {},
+ "StreamingFIFO_0": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_0": {
+ "SIMD": 3,
+ "ram_style": "distributed"
+ },
+ "StreamingFCLayer_Batch_0": {
+ "PE": 32,
+ "SIMD": 3,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "dsp"
+ },
+ "FMPadding_Batch_0": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_3": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_1": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_0": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_0": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_1": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_1": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_1": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_9": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_2": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_1": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_2": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_12": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_2": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_3": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_2": {
+ "SIMD": 64
+ },
+ "StreamingFIFO_15": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "ConvolutionInputGenerator_3": {
+ "SIMD": 64,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_2": {
+ "PE": 64,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_4": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_18": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_3": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_5": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_20": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_3": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_21": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_4": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_3": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_23": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_4": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_6": {
+ "impl_style": "hls"
+ },
+ "FMPadding_Batch_4": {
+ "SIMD": 32
+ },
+ "StreamingFIFO_26": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_5": {
+ "SIMD": 32,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_4": {
+ "PE": 32,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_7": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_29": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_5": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_8": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_31": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_5": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_32": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_6": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_5": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_9": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_35": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_6": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "FMPadding_Batch_6": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_37": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_7": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_6": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_39": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_7": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_10": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_41": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_7": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_42": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_8": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_7": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_44": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_8": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_11": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_46": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_8": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_47": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_9": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_8": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_49": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_9": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_12": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_51": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_9": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_52": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_10": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_9": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_54": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_10": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_13": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_56": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_10": {
+ "SIMD": 16
+ },
+ "StreamingFIFO_57": {
+ "ram_style": "ultra",
+ "depth": 2048,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_11": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_10": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_59": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_11": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_14": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_61": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_11": {
+ "SIMD": 4
+ },
+ "StreamingFIFO_62": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_12": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_11": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_15": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_65": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_12": {
+ "PE": 16,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_16": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_67": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_12": {
+ "SIMD": 8
+ },
+ "StreamingFIFO_68": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_13": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_12": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_17": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_71": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "StreamingFCLayer_Batch_13": {
+ "PE": 32,
+ "SIMD": 16,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_18": {
+ "impl_style": "hls"
+ },
+ "ConvolutionInputGenerator_14": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Pool_Batch_0": {
+ "PE": 4
+ },
+ "StreamingFCLayer_Batch_14": {
+ "PE": 4,
+ "SIMD": 4,
+ "ram_style": "block",
+ "mem_mode": "decoupled",
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_19": {
+ "impl_style": "hls"
+ },
+ "ChannelwiseOp_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed"
+ },
+ "LabelSelect_Batch_0": {
+ "PE": 1
+ }
+}
diff --git a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json
new file mode 100755
index 0000000..02d6d6e
--- /dev/null
+++ b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json
@@ -0,0 +1,816 @@
+{
+ "Defaults": {},
+ "StreamingFIFO_0": {
+ "ram_style": "block",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_0": {
+ "SIMD": 1,
+ "ram_style": "distributed"
+ },
+ "StreamingDataWidthConverter_Batch_0": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_0": {
+ "PE": 16,
+ "SIMD": 3,
+ "ram_style": "auto",
+ "resType": "dsp",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_3": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingDataWidthConverter_Batch_1": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_2": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_6": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_0": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_3": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_8": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_1": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_0": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_10": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingDataWidthConverter_Batch_4": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_1": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_5": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_1": {
+ "PE": 8,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_6": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_2": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_7": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_17": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_1": {
+ "SIMD": 4
+ },
+ "StreamingDataWidthConverter_Batch_8": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_19": {
+ "ram_style": "block",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_2": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_1": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_9": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_3": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_10": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_24": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_2": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_11": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_4": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_27": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_2": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_12": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_29": {
+ "ram_style": "block",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_3": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_2": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_13": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_5": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_14": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_34": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_3": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_15": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_6": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_37": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_3": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_16": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_39": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_4": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_3": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_17": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_7": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_18": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_44": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_4": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_19": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_8": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_47": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_4": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_20": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_49": {
+ "ram_style": "block",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_5": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_4": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_21": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_9": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_22": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_54": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_5": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_23": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_10": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_57": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_5": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_24": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_59": {
+ "ram_style": "block",
+ "depth": 8192,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_6": {
+ "SIMD": 2,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_5": {
+ "PE": 2,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_25": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_11": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_26": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_64": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_6": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_27": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_12": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_67": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_6": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_28": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_69": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_7": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_6": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_29": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_13": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_30": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_74": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_7": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_31": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_14": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_77": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_7": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_32": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_79": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_8": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_7": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_33": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_15": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_34": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_84": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_8": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_35": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_16": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_87": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_8": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_36": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_89": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_9": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_8": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_37": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_17": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_38": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_94": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_9": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_39": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_18": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_97": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_9": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_40": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_99": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_10": {
+ "SIMD": 4,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_9": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_41": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_19": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_42": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_104": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_10": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_43": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_20": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_107": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_10": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_44": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_109": {
+ "ram_style": "block",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_11": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_10": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_45": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_21": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_46": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_114": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_11": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_47": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_22": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_117": {
+ "ram_style": "block",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_11": {
+ "SIMD": 1
+ },
+ "StreamingFIFO_118": {
+ "ram_style": "block",
+ "depth": 16384,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_12": {
+ "SIMD": 1,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_11": {
+ "PE": 1,
+ "resType": "lut"
+ },
+ "Thresholding_Batch_23": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_48": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_122": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_12": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_49": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_24": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_125": {
+ "ram_style": "block",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_12": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_50": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_127": {
+ "ram_style": "block",
+ "depth": 16384,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_13": {
+ "SIMD": 2,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_12": {
+ "PE": 2,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_51": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_25": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_52": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_132": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_13": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "block",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_53": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_26": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "ConvolutionInputGenerator_14": {
+ "SIMD": 1,
+ "ram_style": "block"
+ },
+ "Pool_Batch_0": {
+ "PE": 1
+ },
+ "StreamingDataWidthConverter_Batch_54": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_14": {
+ "PE": 1,
+ "SIMD": 16,
+ "ram_style": "block",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "ChannelwiseOp_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed"
+ },
+ "LabelSelect_Batch_0": {
+ "PE": 1
+ }
+}
diff --git a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json
new file mode 100755
index 0000000..b441206
--- /dev/null
+++ b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json
@@ -0,0 +1,816 @@
+{
+ "Defaults": {},
+ "StreamingFIFO_0": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_0": {
+ "SIMD": 1,
+ "ram_style": "distributed"
+ },
+ "StreamingDataWidthConverter_Batch_0": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_0": {
+ "PE": 16,
+ "SIMD": 3,
+ "ram_style": "auto",
+ "resType": "dsp",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_3": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingDataWidthConverter_Batch_1": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_2": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_6": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_0": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_3": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_8": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_1": {
+ "SIMD": 16,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_0": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingFIFO_10": {
+ "ram_style": "auto",
+ "depth": 256,
+ "impl_style": "rtl"
+ },
+ "StreamingDataWidthConverter_Batch_4": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_1": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_5": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_1": {
+ "PE": 8,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_6": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_2": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_7": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_17": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_1": {
+ "SIMD": 4
+ },
+ "StreamingDataWidthConverter_Batch_8": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_19": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_2": {
+ "SIMD": 8,
+ "ram_style": "distributed"
+ },
+ "Vector_Vector_Activate_Batch_1": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_9": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_3": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_10": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_24": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_2": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_11": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_4": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_27": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_2": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_12": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_29": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_3": {
+ "SIMD": 16,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_2": {
+ "PE": 16,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_13": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_5": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_14": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_34": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_3": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_15": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_6": {
+ "PE": 2,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_37": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "FMPadding_Batch_3": {
+ "SIMD": 2
+ },
+ "StreamingDataWidthConverter_Batch_16": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_39": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_4": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_3": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_17": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_7": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_18": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_44": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_4": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_19": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_8": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_47": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_4": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_20": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_49": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_5": {
+ "SIMD": 8,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_4": {
+ "PE": 8,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_21": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_9": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_22": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_54": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_5": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_23": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_10": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_57": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_5": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_24": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_59": {
+ "ram_style": "ultra",
+ "depth": 8192,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_6": {
+ "SIMD": 2,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_5": {
+ "PE": 2,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_25": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_11": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_26": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_64": {
+ "ram_style": "auto",
+ "depth": 32,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_6": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_27": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_12": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_67": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_6": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_28": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_69": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_7": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_6": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_29": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_13": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_30": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_74": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_7": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_31": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_14": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_77": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_7": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_32": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_79": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_8": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_7": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_33": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_15": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_34": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_84": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_8": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_35": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_16": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_87": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_8": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_36": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_89": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_9": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_8": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_37": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_17": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_38": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_94": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_9": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_39": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_18": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_97": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_9": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_40": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_99": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_10": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_9": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_41": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_19": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_42": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_104": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_10": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_43": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_20": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_107": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_10": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_44": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_109": {
+ "ram_style": "ultra",
+ "depth": 4096,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_11": {
+ "SIMD": 4,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_10": {
+ "PE": 4,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_45": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_21": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_46": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_114": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_11": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "auto",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_47": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_22": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_117": {
+ "ram_style": "ultra",
+ "depth": 512,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_11": {
+ "SIMD": 1
+ },
+ "StreamingFIFO_118": {
+ "ram_style": "ultra",
+ "depth": 16384,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_12": {
+ "SIMD": 1,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_11": {
+ "PE": 1,
+ "resType": "lut"
+ },
+ "Thresholding_Batch_23": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_48": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_122": {
+ "ram_style": "auto",
+ "depth": 64,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_12": {
+ "PE": 16,
+ "SIMD": 8,
+ "ram_style": "ultra",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 1
+ },
+ "StreamingDataWidthConverter_Batch_49": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_24": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingFIFO_125": {
+ "ram_style": "ultra",
+ "depth": 1024,
+ "impl_style": "vivado"
+ },
+ "FMPadding_Batch_12": {
+ "SIMD": 1
+ },
+ "StreamingDataWidthConverter_Batch_50": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_127": {
+ "ram_style": "ultra",
+ "depth": 16384,
+ "impl_style": "vivado"
+ },
+ "ConvolutionInputGenerator_13": {
+ "SIMD": 2,
+ "ram_style": "block"
+ },
+ "Vector_Vector_Activate_Batch_12": {
+ "PE": 2,
+ "resType": "lut"
+ },
+ "StreamingDataWidthConverter_Batch_51": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_25": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "StreamingDataWidthConverter_Batch_52": {
+ "impl_style": "hls"
+ },
+ "StreamingFIFO_132": {
+ "ram_style": "auto",
+ "depth": 128,
+ "impl_style": "rtl"
+ },
+ "StreamingFCLayer_Batch_13": {
+ "PE": 32,
+ "SIMD": 8,
+ "ram_style": "ultra",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 1
+ },
+ "StreamingDataWidthConverter_Batch_53": {
+ "impl_style": "hls"
+ },
+ "Thresholding_Batch_26": {
+ "PE": 1,
+ "ram_style": "distributed",
+ "mem_mode": "const",
+ "runtime_writeable_weights": 0
+ },
+ "ConvolutionInputGenerator_14": {
+ "SIMD": 1,
+ "ram_style": "block"
+ },
+ "Pool_Batch_0": {
+ "PE": 1
+ },
+ "StreamingDataWidthConverter_Batch_54": {
+ "impl_style": "hls"
+ },
+ "StreamingFCLayer_Batch_14": {
+ "PE": 1,
+ "SIMD": 16,
+ "ram_style": "ultra",
+ "resType": "lut",
+ "mem_mode": "decoupled",
+ "runtime_writeable_weights": 1
+ },
+ "ChannelwiseOp_Batch_0": {
+ "PE": 1,
+ "ram_style": "distributed"
+ },
+ "LabelSelect_Batch_0": {
+ "PE": 1
+ }
+}
diff --git a/finn_examples/bitfiles/bitfiles.zip.link b/finn_examples/bitfiles/bitfiles.zip.link
index b9e0520..6c05a3a 100644
--- a/finn_examples/bitfiles/bitfiles.zip.link
+++ b/finn_examples/bitfiles/bitfiles.zip.link
@@ -12,11 +12,11 @@
"md5sum": "59598d7f36ffdc74a0a0262f5b67423c"
},
"ZCU104": {
- "url": "https://github.com/Xilinx/finn-examples/releases/download/v0.0.1a/ZCU104.zip",
- "md5sum": "cdc1b757a059b0bb2b7270b3081ae52e"
+ "url": "https://github.com/Xilinx/finn-examples/releases/download/mnv1-zcu104/ZCU104.zip",
+ "md5sum": "1ed10d74e85eec70fd094b2947b5b8e3"
},
"xilinx_u250_xdma_201830_2": {
- "url": "https://github.com/Xilinx/finn-examples/releases/download/v0.0.1a/xilinx_u250_xdma_201830_2.zip",
- "md5sum": "5e8f3625fcf14aaa4fc7416fd9f15450"
+ "url": "https://github.com/Xilinx/finn-examples/releases/download/mnv1-u250-partitioned/xilinx_u250_xdma_201830_2.zip",
+ "md5sum": "d8c7d67c688f3471b6e2c53762b8b258"
}
}
diff --git a/finn_examples/driver.py b/finn_examples/driver.py
index 24f9f11..4dd5a08 100644
--- a/finn_examples/driver.py
+++ b/finn_examples/driver.py
@@ -32,6 +32,8 @@
from pynq import Overlay, allocate
from pynq.ps import Clocks
+from finn.core.datatype import DataType
+from finn.util.basic import gen_finn_dt_tensor
from finn.util.data_packing import (
finnpy_to_packed_bytearray,
packed_bytearray_to_finnpy,
@@ -84,25 +86,78 @@ def __init__(
self.batch_size = batch_size
self.fclk_mhz = fclk_mhz
if self.platform == "alveo":
- self.idma = self.idma0
+ if "input_dma_name" in io_shape_dict.keys():
+ self.idma = getattr(self, io_shape_dict["input_dma_name"])
+ else:
+ self.idma = self.idma0
self.odma = self.odma0
self.odma_handle = None
elif self.platform == "zynq-iodma":
- self.idma = self.idma0
+ if "input_dma_name" in io_shape_dict.keys():
+ self.idma = getattr(self, io_shape_dict["input_dma_name"])
+ else:
+ self.idma = self.idma0
self.odma = self.odma0
# set the clock frequency as specified by user during transformations
if self.fclk_mhz > 0:
Clocks.fclk0_mhz = self.fclk_mhz
else:
raise ValueError("Supported platforms are zynq-iodma alveo")
- # load any runtime weights
+ # load any external + runtime weights
+ self.load_external_weights()
self.load_runtime_weights()
+ def load_external_weights(self):
+ """Load any existing external (DRAM) weights from the specified dir into the
+ appropriate layer of the accelerator. Note that this must be enabled
+ during the accelerator build process. The weights directory
+ is specified as the class member ``runtime_weight_dir``. External (DRAM)
+ weights are one .npy file per layer.
+ """
+
+ self.external_weights = []
+ w_filenames = []
+ if not os.path.isdir(self.runtime_weight_dir):
+ return
+ for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir):
+ w_filenames.extend(filenames)
+
+ tmp_weight_dict = {}
+
+ for w_filename in w_filenames:
+ if w_filename.endswith(".npy"):
+ weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename)
+ else:
+ continue
+
+ idma_name = w_filename.split(".")[0]
+ tmp_weight_dict[idma_name] = weight_tensor
+
+ for idma_name in tmp_weight_dict.keys():
+ if idma_name in self.ip_dict.keys():
+ iwdma = getattr(self, idma_name)
+ weight_tensor = tmp_weight_dict[idma_name]
+ weight_buf = allocate(weight_tensor.shape, dtype=np.uint8)
+ weight_buf[:] = weight_tensor
+ # weight_buf.sync_to_device()
+ weight_buf.flush()
+
+ self.external_weights += [(iwdma, weight_buf, idma_name)]
+
+ if "number_of_external_weights" in self._io_shape_dict:
+ hw_ext_weights = self._io_shape_dict["number_of_external_weights"]
+ assert len(self.external_weights) == hw_ext_weights, (
+ "Number of hardware external weights and number of external "
+ + "weight tensors available do not match. \n"
+ + "Is runtime_weight_dir pointing to the correct folder?"
+ )
+
def load_runtime_weights(self, flush_accel=True, verify=True):
- """Load any existing runtime weights from the specified dir into the
+ """Load any existing runtime-writable weights from the specified dir into the
appropriate layer of the accelerator. Note that this must be enabled
during the accelerator build process. The runtime weights directory
- is specified as the class member ``runtime_weight_dir``.
+ is specified as the class member ``runtime_weight_dir``. Runtime-writable
+ weights are provided as one .dat file per layer.
Parameters
----------
@@ -122,18 +177,25 @@ def load_runtime_weights(self, flush_accel=True, verify=True):
if w_filename.endswith(".dat"):
with open(self.runtime_weight_dir + "/" + w_filename, "r") as f:
dat = f.read()
+ else:
+ continue
layer_w = np.fromiter(
[int(x, 16) for x in dat.strip().split()], dtype=np.uint32
)
- layer_ind = int(w_filename.split("_")[0])
- rt_weight_dict[layer_ind] = layer_w
- for layer_ind in rt_weight_dict.keys():
- cand_if_name = "StreamingDataflowPartition_1/s_axilite_%d" % layer_ind
+ sdp_ind = int(w_filename.split("_")[0])
+ layer_ind = int(w_filename.split("_")[1])
+ rt_weight_dict[(sdp_ind, layer_ind)] = layer_w
+ for sdp_ind, layer_ind in rt_weight_dict.keys():
+ cand_if_name = "StreamingDataflowPartition_%d/s_axilite_%d" % (
+ sdp_ind,
+ layer_ind,
+ )
if cand_if_name in self.ip_dict.keys():
layer_mmio = getattr(
- self.StreamingDataflowPartition_1, "s_axilite_%d" % layer_ind
+ getattr(self, "StreamingDataflowPartition_%d" % sdp_ind),
+ "s_axilite_%d" % layer_ind,
).mmio
- layer_w = rt_weight_dict[layer_ind]
+ layer_w = rt_weight_dict[(sdp_ind, layer_ind)]
layer_mmio.write_mm(0, layer_w.tobytes())
if verify:
new_w = np.copy(layer_mmio.array[: layer_w.shape[0]])
@@ -278,6 +340,10 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
if self.platform == "zynq-iodma":
assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle"
# manually launch IODMAs since signatures are missing
+ for iwdma, iwbuf, iwdma_name in self.external_weights:
+ iwdma.write(0x10, iwbuf.device_address)
+ iwdma.write(0x1C, batch_size)
+ iwdma.write(0x00, 1)
self.idma.write(0x10, self.ibuf_packed_device.device_address)
self.idma.write(0x1C, batch_size)
self.odma.write(0x10, self.obuf_packed_device.device_address)
@@ -287,6 +353,8 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
elif self.platform == "alveo":
assert self.odma_handle is None, "Output DMA is already running"
self.idma.start(self.ibuf_packed_device, batch_size)
+ for iwdma, iwbuf, iwdma_name in self.external_weights:
+ iwdma.start(iwbuf, batch_size)
self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size)
else:
raise Exception("Unrecognized platform: %s" % self.platform)
@@ -338,46 +406,55 @@ def throughput_test(self):
res["DRAM_out_bandwidth[Mb/s]"] = (
np.prod(self.oshape_packed) * 0.000001 / runtime
)
- if self.platform != "alveo":
+ for iwdma, iwbuf, iwdma_name in self.external_weights:
+ res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = (
+ self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
+ )
+ if self.platform == "zynq-iodma":
res["fclk[mhz]"] = Clocks.fclk0_mhz
- else:
- res["fclk[mhz]"] = self.fclk_mhz
+ elif self.platform == "alveo":
+ res["fclk[mhz]"] = self.clock_dict["clock0"]["frequency"]
res["batch_size"] = self.batch_size
# also benchmark driver-related overheads
- input_npy = np.zeros(self.ishape_normal, dtype=self.idt.to_numpy_dt())
+ input_npy = gen_finn_dt_tensor(self.idt, self.ishape_normal)
+ # provide as int8/uint8 to support fast packing path where possible
+ if self.idt == DataType.UINT8:
+ input_npy = input_npy.astype(np.uint8)
+ elif self.idt == DataType.INT8:
+ input_npy = input_npy.astype(np.int8)
start = time.time()
ibuf_folded = self.fold_input(input_npy)
end = time.time()
runtime = end - start
- res["fold_input[ms]"] = runtime
+ res["fold_input[ms]"] = runtime * 1000
start = time.time()
ibuf_packed = self.pack_input(ibuf_folded)
end = time.time()
runtime = end - start
- res["pack_input[ms]"] = runtime
+ res["pack_input[ms]"] = runtime * 1000
start = time.time()
self.copy_input_data_to_device(ibuf_packed)
end = time.time()
runtime = end - start
- res["copy_input_data_to_device[ms]"] = runtime
+ res["copy_input_data_to_device[ms]"] = runtime * 1000
start = time.time()
self.copy_output_data_from_device(self.obuf_packed)
end = time.time()
runtime = end - start
- res["copy_output_data_from_device[ms]"] = runtime
+ res["copy_output_data_from_device[ms]"] = runtime * 1000
start = time.time()
obuf_folded = self.unpack_output(self.obuf_packed)
end = time.time()
runtime = end - start
- res["unpack_output[ms]"] = runtime
+ res["unpack_output[ms]"] = runtime * 1000
start = time.time()
self.unfold_output(obuf_folded)
end = time.time()
runtime = end - start
- res["unfold_output[ms]"] = runtime
+ res["unfold_output[ms]"] = runtime * 1000
return res
diff --git a/finn_examples/models.py b/finn_examples/models.py
index fc5da8e..6e6e147 100644
--- a/finn_examples/models.py
+++ b/finn_examples/models.py
@@ -104,6 +104,28 @@ def find_bitfile(model_name, target_platform):
)
+def find_runtime_weights(model_name, target_platform):
+ weight_dir = "%s_runtime_weights" % (model_name)
+ weight_dir_candidates = [
+ pk.resource_filename(
+ "finn_examples", "bitfiles/%s/%s" % (target_platform, weight_dir)
+ ),
+ pk.resource_filename(
+ "finn_examples",
+ "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, weight_dir),
+ ),
+ ]
+ for candidate in weight_dir_candidates:
+ if os.path.isdir(candidate):
+ weight_files = os.listdir(candidate)
+ if weight_files:
+ return candidate
+ raise Exception(
+ "Runtime weights for model = %s target platform = %s not found. Looked in: %s"
+ % (model_name, target_platform, str(weight_dir_candidates))
+ )
+
+
def get_driver_mode():
driver_modes = {"edge": "zynq-iodma", "pcie": "alveo"}
return driver_modes[get_edge_or_pcie()]
@@ -170,4 +192,16 @@ def mobilenetv1_w4a4_imagenet(target_platform=None):
driver_mode = get_driver_mode()
model_name = "mobilenetv1-w4a4"
filename = find_bitfile(model_name, target_platform)
- return FINNExampleOverlay(filename, driver_mode, _imagenet_top5inds_io_shape_dict)
+ if target_platform in ["ZCU104"]:
+ runtime_weight_dir = find_runtime_weights(model_name, target_platform)
+ else:
+ runtime_weight_dir = None
+ # target 185 MHz for Zynq (this is ignored for Alveo)
+ fclk_mhz = 185.0
+ return FINNExampleOverlay(
+ filename,
+ driver_mode,
+ _imagenet_top5inds_io_shape_dict,
+ runtime_weight_dir=runtime_weight_dir,
+ fclk_mhz=fclk_mhz,
+ )
diff --git a/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb b/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb
old mode 100644
new mode 100755
index 327d701..4574435
--- a/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb
+++ b/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb
@@ -69,7 +69,9 @@
"metadata": {},
"outputs": [],
"source": [
- "accel = models.mobilenetv1_w4a4_imagenet()"
+ "accel = models.mobilenetv1_w4a4_imagenet()\n",
+ "#some systems might require a manual platform setting:\n",
+ "#accel = models.mobilenetv1_w4a4_imagenet(\"ZCU102\")"
]
},
{
@@ -91,87 +93,94 @@
"print(\"Expected output shape and datatype: %s %s\" % (str(accel.oshape_normal), str(accel.odt)))"
]
},
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n",
- "Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n",
- "To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n",
- "Requirement already satisfied: torchvision in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (0.8.2)\n",
- "Requirement already satisfied: pillow>=4.1.1 in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (7.0.0)\n",
- "Requirement already satisfied: numpy in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (1.18.1)\n",
- "Requirement already satisfied: torch==1.7.1 in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (1.7.1)\n",
- "Requirement already satisfied: typing-extensions in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torch==1.7.1->torchvision) (3.7.4.3)\n"
- ]
- }
- ],
- "source": [
- "! pip install torchvision"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Load the ImageNet validation dataset"
+ "# Prepare loading of ImageNet validation dataset"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "'/proj/xlabs_t3/users/ml-workspace/datasets/imagenet/raw-images/imagenet_symlink/val'"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/proj/xlabs_t3/users/ml-workspace/datasets/imagenet/raw-images/imagenet_symlink/val\n"
+ ]
}
],
"source": [
- "import torchvision.transforms as transforms\n",
- "import torchvision.datasets as datasets\n",
- "import torch\n",
"import numpy as np\n",
+ "from PIL import Image\n",
+ "from dataset_loading import FileQueue, ImgQueue\n",
"import os\n",
"\n",
- "os.environ[\"IMAGENET_VAL_PATH\"]"
+ "# 2 ways to provide the data:\n",
+ "# without a label file: expect images in 1000 sorted subfolders\n",
+ "# with a label file: expect images directly in val directory\n",
+ "val_dir = os.environ[\"IMAGENET_VAL_PATH\"]\n",
+ "label_file = None\n",
+ "print(val_dir)"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "valdir = os.environ[\"IMAGENET_VAL_PATH\"]\n",
- "batch_size = 1\n",
- "val_loader = torch.utils.data.DataLoader(\n",
- " datasets.ImageFolder(valdir, transforms.Compose([\n",
- " transforms.Resize(256),\n",
- " transforms.CenterCrop(224),\n",
- " transforms.Lambda(lambda x: np.array(x, dtype=np.uint8))\n",
- " ])),\n",
- " batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "test_single_x, test_single_y = val_loader.sampler.data_source.__getitem__(0)"
+ "def img_resize(img, size):\n",
+ " w, h = img.size\n",
+ " if (w <= h and w == size) or (h <= w and h == size):\n",
+ " return img\n",
+ " if w < h:\n",
+ " ow = size\n",
+ " oh = int(size * h / w)\n",
+ " return img.resize((ow, oh), Image.BILINEAR)\n",
+ " else:\n",
+ " oh = size\n",
+ " ow = int(size * w / h)\n",
+ " return img.resize((ow, oh), Image.BILINEAR)\n",
+ "\n",
+ "def img_center_crop(img, size):\n",
+ " crop_height, crop_width = (size, size)\n",
+ " image_width, image_height = img.size\n",
+ " crop_top = int(round((image_height - crop_height) / 2.))\n",
+ " crop_left = int(round((image_width - crop_width) / 2.))\n",
+ " return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height))\n",
+ "\n",
+ "def pre_process(img_np):\n",
+ " img = Image.fromarray(img_np.astype(np.uint8))\n",
+ " img = img_resize(img, 256)\n",
+ " img = img_center_crop(img, 224)\n",
+ " img = np.array(img, dtype=np.uint8)\n",
+ " return img\n",
+ "\n",
+ "def setup_dataloader(val_path, label_file_path = None, batch_size=100, n_images = 50000):\n",
+ " if label_file_path is None:\n",
+ " val_folders = [ f.name for f in os.scandir(val_path) if f.is_dir() ]\n",
+ " assert len(val_folders) == 1000, \"Expected 1000 subfolders in ILSVRC2012 val\"\n",
+ " files = []\n",
+ " labels = []\n",
+ " for idx, folder in enumerate(val_folders):\n",
+ " current_files = sorted(os.listdir(os.path.join(val_path, folder)))\n",
+ " current_files = [os.path.join(folder, file) for file in current_files]\n",
+ " files.extend(current_files)\n",
+ " labels.extend([idx]*len(current_files))\n",
+ " files = files[:n_images]\n",
+ " else:\n",
+ " files = ['ILSVRC2012_val_{:08d}.JPEG'.format(i) for i in range(1,n_images+1)]\n",
+ " labels = np.loadtxt(label_file_path, dtype=int, usecols=1)\n",
+ "\n",
+ " file_queue = FileQueue()\n",
+ " file_queue.load_epochs(list(zip(files,labels)), shuffle=False)\n",
+ " img_queue = ImgQueue(maxsize=batch_size)\n",
+ " img_queue.start_loaders(file_queue, num_threads=4, img_dir=val_path, transform=pre_process)\n",
+ " return img_queue"
]
},
{
@@ -183,12 +192,12 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
""
]
@@ -200,15 +209,20 @@
}
],
"source": [
+ "%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"\n",
- "plt.imshow(test_single_x.reshape(224,224,3))\n",
+ "img_queue = setup_dataloader(val_dir, label_file, 1, 1)\n",
+ "\n",
+ "test_single_x, test_single_y = img_queue.get()\n",
+ "\n",
+ "plt.imshow(test_single_x)\n",
"plt.show()"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -217,7 +231,7 @@
"0"
]
},
- "execution_count": 9,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -228,7 +242,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -246,7 +260,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -255,14 +269,14 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Top-5 classes predicted by the accelerator: [[[[391. 48. 0. 39. 395.]]]]\n"
+ "Top-5 classes predicted by the accelerator: [[[[391. 0. 395. 394. 48.]]]]\n"
]
}
],
@@ -272,14 +286,14 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "2.71 ms ± 22.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ "2.18 ms ± 6.29 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -297,102 +311,561 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Accelerator buffer shapes are (1000, 224, 224, 1, 3) for input, (1000, 1, 1, 1, 10) for output\n"
+ "Accelerator buffer shapes are (100, 224, 224, 1, 3) for input, (100, 1, 1, 1, 10) for output\n"
]
}
],
"source": [
- "import numpy as np\n",
- "\n",
- "batch_size = 1000\n",
+ "batch_size = 100\n",
"accel.batch_size = batch_size\n",
- "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )\n",
- "obuf_packed = np.empty_like(accel.obuf_packed_device)\n",
- "val_loader = torch.utils.data.DataLoader(\n",
- " datasets.ImageFolder(valdir, transforms.Compose([\n",
- " transforms.Resize(256),\n",
- " transforms.CenterCrop(224),\n",
- " transforms.Lambda(lambda x: np.array(x, dtype=np.uint8))\n",
- " ])),\n",
- " batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)"
+ "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )"
]
},
{
"cell_type": "code",
- "execution_count": 15,
- "metadata": {},
+ "execution_count": 13,
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "batch 1 : total OK 844 NOK 156\n",
- "batch 2 : total OK 1589 NOK 411\n",
- "batch 3 : total OK 2296 NOK 704\n",
- "batch 4 : total OK 2967 NOK 1033\n",
- "batch 5 : total OK 3842 NOK 1158\n",
- "batch 6 : total OK 4622 NOK 1378\n",
- "batch 7 : total OK 5437 NOK 1563\n",
- "batch 8 : total OK 6247 NOK 1753\n",
- "batch 9 : total OK 6949 NOK 2051\n",
- "batch 10 : total OK 7675 NOK 2325\n",
- "batch 11 : total OK 8445 NOK 2555\n",
- "batch 12 : total OK 9172 NOK 2828\n",
- "batch 13 : total OK 9935 NOK 3065\n",
- "batch 14 : total OK 10671 NOK 3329\n",
- "batch 15 : total OK 11444 NOK 3556\n",
- "batch 16 : total OK 12173 NOK 3827\n",
- "batch 17 : total OK 13030 NOK 3970\n",
- "batch 18 : total OK 13765 NOK 4235\n",
- "batch 19 : total OK 14550 NOK 4450\n",
- "batch 20 : total OK 15292 NOK 4708\n",
- "batch 21 : total OK 15973 NOK 5027\n",
- "batch 22 : total OK 16643 NOK 5357\n",
- "batch 23 : total OK 17294 NOK 5706\n",
- "batch 24 : total OK 17922 NOK 6078\n",
- "batch 25 : total OK 18528 NOK 6472\n",
- "batch 26 : total OK 19139 NOK 6861\n",
- "batch 27 : total OK 19806 NOK 7194\n",
- "batch 28 : total OK 20487 NOK 7513\n",
- "batch 29 : total OK 21251 NOK 7749\n",
- "batch 30 : total OK 21891 NOK 8109\n",
- "batch 31 : total OK 22590 NOK 8410\n",
- "batch 32 : total OK 23150 NOK 8850\n",
- "batch 33 : total OK 23804 NOK 9196\n",
- "batch 34 : total OK 24450 NOK 9550\n",
- "batch 35 : total OK 25115 NOK 9885\n",
- "batch 36 : total OK 25799 NOK 10201\n",
- "batch 37 : total OK 26470 NOK 10530\n",
- "batch 38 : total OK 27106 NOK 10894\n",
- "batch 39 : total OK 27777 NOK 11223\n",
- "batch 40 : total OK 28422 NOK 11578\n",
- "batch 41 : total OK 29092 NOK 11908\n",
- "batch 42 : total OK 29712 NOK 12288\n",
- "batch 43 : total OK 30363 NOK 12637\n",
- "batch 44 : total OK 31033 NOK 12967\n",
- "batch 45 : total OK 31664 NOK 13336\n",
- "batch 46 : total OK 32305 NOK 13695\n",
- "batch 47 : total OK 33019 NOK 13981\n",
- "batch 48 : total OK 33809 NOK 14191\n",
- "batch 49 : total OK 34391 NOK 14609\n",
- "batch 50 : total OK 35206 NOK 14794\n"
+ "batch 1 : total OK 88 NOK 12\n",
+ "batch 2 : total OK 164 NOK 36\n",
+ "batch 3 : total OK 241 NOK 59\n",
+ "batch 4 : total OK 311 NOK 89\n",
+ "batch 5 : total OK 403 NOK 97\n",
+ "batch 6 : total OK 491 NOK 109\n",
+ "batch 7 : total OK 580 NOK 120\n",
+ "batch 8 : total OK 670 NOK 130\n",
+ "batch 9 : total OK 757 NOK 143\n",
+ "batch 10 : total OK 846 NOK 154\n",
+ "batch 11 : total OK 927 NOK 173\n",
+ "batch 12 : total OK 1018 NOK 182\n",
+ "batch 13 : total OK 1110 NOK 190\n",
+ "batch 14 : total OK 1180 NOK 220\n",
+ "batch 15 : total OK 1262 NOK 238\n",
+ "batch 16 : total OK 1337 NOK 263\n",
+ "batch 17 : total OK 1395 NOK 305\n",
+ "batch 18 : total OK 1454 NOK 346\n",
+ "batch 19 : total OK 1521 NOK 379\n",
+ "batch 20 : total OK 1590 NOK 410\n",
+ "batch 21 : total OK 1650 NOK 450\n",
+ "batch 22 : total OK 1728 NOK 472\n",
+ "batch 23 : total OK 1798 NOK 502\n",
+ "batch 24 : total OK 1861 NOK 539\n",
+ "batch 25 : total OK 1933 NOK 567\n",
+ "batch 26 : total OK 2019 NOK 581\n",
+ "batch 27 : total OK 2093 NOK 607\n",
+ "batch 28 : total OK 2145 NOK 655\n",
+ "batch 29 : total OK 2225 NOK 675\n",
+ "batch 30 : total OK 2294 NOK 706\n",
+ "batch 31 : total OK 2352 NOK 748\n",
+ "batch 32 : total OK 2412 NOK 788\n",
+ "batch 33 : total OK 2474 NOK 826\n",
+ "batch 34 : total OK 2529 NOK 871\n",
+ "batch 35 : total OK 2593 NOK 907\n",
+ "batch 36 : total OK 2677 NOK 923\n",
+ "batch 37 : total OK 2750 NOK 950\n",
+ "batch 38 : total OK 2817 NOK 983\n",
+ "batch 39 : total OK 2895 NOK 1005\n",
+ "batch 40 : total OK 2965 NOK 1035\n",
+ "batch 41 : total OK 3051 NOK 1049\n",
+ "batch 42 : total OK 3138 NOK 1062\n",
+ "batch 43 : total OK 3226 NOK 1074\n",
+ "batch 44 : total OK 3310 NOK 1090\n",
+ "batch 45 : total OK 3402 NOK 1098\n",
+ "batch 46 : total OK 3491 NOK 1109\n",
+ "batch 47 : total OK 3580 NOK 1120\n",
+ "batch 48 : total OK 3672 NOK 1128\n",
+ "batch 49 : total OK 3756 NOK 1144\n",
+ "batch 50 : total OK 3836 NOK 1164\n",
+ "batch 51 : total OK 3915 NOK 1185\n",
+ "batch 52 : total OK 3997 NOK 1203\n",
+ "batch 53 : total OK 4079 NOK 1221\n",
+ "batch 54 : total OK 4154 NOK 1246\n",
+ "batch 55 : total OK 4230 NOK 1270\n",
+ "batch 56 : total OK 4309 NOK 1291\n",
+ "batch 57 : total OK 4383 NOK 1317\n",
+ "batch 58 : total OK 4458 NOK 1342\n",
+ "batch 59 : total OK 4538 NOK 1362\n",
+ "batch 60 : total OK 4606 NOK 1394\n",
+ "batch 61 : total OK 4677 NOK 1423\n",
+ "batch 62 : total OK 4756 NOK 1444\n",
+ "batch 63 : total OK 4817 NOK 1483\n",
+ "batch 64 : total OK 4891 NOK 1509\n",
+ "batch 65 : total OK 4978 NOK 1522\n",
+ "batch 66 : total OK 5067 NOK 1533\n",
+ "batch 67 : total OK 5152 NOK 1548\n",
+ "batch 68 : total OK 5235 NOK 1565\n",
+ "batch 69 : total OK 5326 NOK 1574\n",
+ "batch 70 : total OK 5418 NOK 1582\n",
+ "batch 71 : total OK 5503 NOK 1597\n",
+ "batch 72 : total OK 5589 NOK 1611\n",
+ "batch 73 : total OK 5678 NOK 1622\n",
+ "batch 74 : total OK 5763 NOK 1637\n",
+ "batch 75 : total OK 5853 NOK 1647\n",
+ "batch 76 : total OK 5923 NOK 1677\n",
+ "batch 77 : total OK 6000 NOK 1700\n",
+ "batch 78 : total OK 6081 NOK 1719\n",
+ "batch 79 : total OK 6172 NOK 1728\n",
+ "batch 80 : total OK 6231 NOK 1769\n",
+ "batch 81 : total OK 6314 NOK 1786\n",
+ "batch 82 : total OK 6374 NOK 1826\n",
+ "batch 83 : total OK 6441 NOK 1859\n",
+ "batch 84 : total OK 6490 NOK 1910\n",
+ "batch 85 : total OK 6570 NOK 1930\n",
+ "batch 86 : total OK 6638 NOK 1962\n",
+ "batch 87 : total OK 6709 NOK 1991\n",
+ "batch 88 : total OK 6779 NOK 2021\n",
+ "batch 89 : total OK 6858 NOK 2042\n",
+ "batch 90 : total OK 6934 NOK 2066\n",
+ "batch 91 : total OK 7007 NOK 2093\n",
+ "batch 92 : total OK 7086 NOK 2114\n",
+ "batch 93 : total OK 7158 NOK 2142\n",
+ "batch 94 : total OK 7224 NOK 2176\n",
+ "batch 95 : total OK 7290 NOK 2210\n",
+ "batch 96 : total OK 7368 NOK 2232\n",
+ "batch 97 : total OK 7426 NOK 2274\n",
+ "batch 98 : total OK 7510 NOK 2290\n",
+ "batch 99 : total OK 7581 NOK 2319\n",
+ "batch 100 : total OK 7661 NOK 2339\n",
+ "batch 101 : total OK 7720 NOK 2380\n",
+ "batch 102 : total OK 7805 NOK 2395\n",
+ "batch 103 : total OK 7874 NOK 2426\n",
+ "batch 104 : total OK 7957 NOK 2443\n",
+ "batch 105 : total OK 8038 NOK 2462\n",
+ "batch 106 : total OK 8111 NOK 2489\n",
+ "batch 107 : total OK 8185 NOK 2515\n",
+ "batch 108 : total OK 8269 NOK 2531\n",
+ "batch 109 : total OK 8358 NOK 2542\n",
+ "batch 110 : total OK 8439 NOK 2561\n",
+ "batch 111 : total OK 8514 NOK 2586\n",
+ "batch 112 : total OK 8585 NOK 2615\n",
+ "batch 113 : total OK 8666 NOK 2634\n",
+ "batch 114 : total OK 8722 NOK 2678\n",
+ "batch 115 : total OK 8808 NOK 2692\n",
+ "batch 116 : total OK 8868 NOK 2732\n",
+ "batch 117 : total OK 8939 NOK 2761\n",
+ "batch 118 : total OK 9022 NOK 2778\n",
+ "batch 119 : total OK 9094 NOK 2806\n",
+ "batch 120 : total OK 9165 NOK 2835\n",
+ "batch 121 : total OK 9213 NOK 2887\n",
+ "batch 122 : total OK 9287 NOK 2913\n",
+ "batch 123 : total OK 9376 NOK 2924\n",
+ "batch 124 : total OK 9446 NOK 2954\n",
+ "batch 125 : total OK 9510 NOK 2990\n",
+ "batch 126 : total OK 9579 NOK 3021\n",
+ "batch 127 : total OK 9659 NOK 3041\n",
+ "batch 128 : total OK 9757 NOK 3043\n",
+ "batch 129 : total OK 9832 NOK 3068\n",
+ "batch 130 : total OK 9926 NOK 3074\n",
+ "batch 131 : total OK 10010 NOK 3090\n",
+ "batch 132 : total OK 10097 NOK 3103\n",
+ "batch 133 : total OK 10153 NOK 3147\n",
+ "batch 134 : total OK 10212 NOK 3188\n",
+ "batch 135 : total OK 10292 NOK 3208\n",
+ "batch 136 : total OK 10353 NOK 3247\n",
+ "batch 137 : total OK 10423 NOK 3277\n",
+ "batch 138 : total OK 10516 NOK 3284\n",
+ "batch 139 : total OK 10588 NOK 3312\n",
+ "batch 140 : total OK 10666 NOK 3334\n",
+ "batch 141 : total OK 10731 NOK 3369\n",
+ "batch 142 : total OK 10790 NOK 3410\n",
+ "batch 143 : total OK 10860 NOK 3440\n",
+ "batch 144 : total OK 10938 NOK 3462\n",
+ "batch 145 : total OK 11025 NOK 3475\n",
+ "batch 146 : total OK 11106 NOK 3494\n",
+ "batch 147 : total OK 11197 NOK 3503\n",
+ "batch 148 : total OK 11283 NOK 3517\n",
+ "batch 149 : total OK 11363 NOK 3537\n",
+ "batch 150 : total OK 11439 NOK 3561\n",
+ "batch 151 : total OK 11522 NOK 3578\n",
+ "batch 152 : total OK 11585 NOK 3615\n",
+ "batch 153 : total OK 11658 NOK 3642\n",
+ "batch 154 : total OK 11745 NOK 3655\n",
+ "batch 155 : total OK 11824 NOK 3676\n",
+ "batch 156 : total OK 11889 NOK 3711\n",
+ "batch 157 : total OK 11950 NOK 3750\n",
+ "batch 158 : total OK 12010 NOK 3790\n",
+ "batch 159 : total OK 12098 NOK 3802\n",
+ "batch 160 : total OK 12177 NOK 3823\n",
+ "batch 161 : total OK 12270 NOK 3830\n",
+ "batch 162 : total OK 12359 NOK 3841\n",
+ "batch 163 : total OK 12450 NOK 3850\n",
+ "batch 164 : total OK 12540 NOK 3860\n",
+ "batch 165 : total OK 12613 NOK 3887\n",
+ "batch 166 : total OK 12695 NOK 3905\n",
+ "batch 167 : total OK 12789 NOK 3911\n",
+ "batch 168 : total OK 12871 NOK 3929\n",
+ "batch 169 : total OK 12955 NOK 3945\n",
+ "batch 170 : total OK 13047 NOK 3953\n",
+ "batch 171 : total OK 13119 NOK 3981\n",
+ "batch 172 : total OK 13202 NOK 3998\n",
+ "batch 173 : total OK 13276 NOK 4024\n",
+ "batch 174 : total OK 13363 NOK 4037\n",
+ "batch 175 : total OK 13422 NOK 4078\n",
+ "batch 176 : total OK 13514 NOK 4086\n",
+ "batch 177 : total OK 13579 NOK 4121\n",
+ "batch 178 : total OK 13668 NOK 4132\n",
+ "batch 179 : total OK 13728 NOK 4172\n",
+ "batch 180 : total OK 13785 NOK 4215\n",
+ "batch 181 : total OK 13866 NOK 4234\n",
+ "batch 182 : total OK 13947 NOK 4253\n",
+ "batch 183 : total OK 14037 NOK 4263\n",
+ "batch 184 : total OK 14115 NOK 4285\n",
+ "batch 185 : total OK 14188 NOK 4312\n",
+ "batch 186 : total OK 14261 NOK 4339\n",
+ "batch 187 : total OK 14335 NOK 4365\n",
+ "batch 188 : total OK 14401 NOK 4399\n",
+ "batch 189 : total OK 14489 NOK 4411\n",
+ "batch 190 : total OK 14564 NOK 4436\n",
+ "batch 191 : total OK 14610 NOK 4490\n",
+ "batch 192 : total OK 14680 NOK 4520\n",
+ "batch 193 : total OK 14747 NOK 4553\n",
+ "batch 194 : total OK 14828 NOK 4572\n",
+ "batch 195 : total OK 14914 NOK 4586\n",
+ "batch 196 : total OK 14979 NOK 4621\n",
+ "batch 197 : total OK 15067 NOK 4633\n",
+ "batch 198 : total OK 15134 NOK 4666\n",
+ "batch 199 : total OK 15225 NOK 4675\n",
+ "batch 200 : total OK 15306 NOK 4694\n",
+ "batch 201 : total OK 15371 NOK 4729\n",
+ "batch 202 : total OK 15434 NOK 4766\n",
+ "batch 203 : total OK 15516 NOK 4784\n",
+ "batch 204 : total OK 15595 NOK 4805\n",
+ "batch 205 : total OK 15664 NOK 4836\n",
+ "batch 206 : total OK 15742 NOK 4858\n",
+ "batch 207 : total OK 15797 NOK 4903\n",
+ "batch 208 : total OK 15834 NOK 4966\n",
+ "batch 209 : total OK 15920 NOK 4980\n",
+ "batch 210 : total OK 15973 NOK 5027\n",
+ "batch 211 : total OK 16048 NOK 5052\n",
+ "batch 212 : total OK 16108 NOK 5092\n",
+ "batch 213 : total OK 16183 NOK 5117\n",
+ "batch 214 : total OK 16258 NOK 5142\n",
+ "batch 215 : total OK 16328 NOK 5172\n",
+ "batch 216 : total OK 16410 NOK 5190\n",
+ "batch 217 : total OK 16476 NOK 5224\n",
+ "batch 218 : total OK 16520 NOK 5280\n",
+ "batch 219 : total OK 16587 NOK 5313\n",
+ "batch 220 : total OK 16649 NOK 5351\n",
+ "batch 221 : total OK 16719 NOK 5381\n",
+ "batch 222 : total OK 16768 NOK 5432\n",
+ "batch 223 : total OK 16839 NOK 5461\n",
+ "batch 224 : total OK 16893 NOK 5507\n",
+ "batch 225 : total OK 16971 NOK 5529\n",
+ "batch 226 : total OK 17056 NOK 5544\n",
+ "batch 227 : total OK 17121 NOK 5579\n",
+ "batch 228 : total OK 17175 NOK 5625\n",
+ "batch 229 : total OK 17241 NOK 5659\n",
+ "batch 230 : total OK 17313 NOK 5687\n",
+ "batch 231 : total OK 17368 NOK 5732\n",
+ "batch 232 : total OK 17422 NOK 5778\n",
+ "batch 233 : total OK 17462 NOK 5838\n",
+ "batch 234 : total OK 17545 NOK 5855\n",
+ "batch 235 : total OK 17596 NOK 5904\n",
+ "batch 236 : total OK 17663 NOK 5937\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "batch 237 : total OK 17729 NOK 5971\n",
+ "batch 238 : total OK 17807 NOK 5993\n",
+ "batch 239 : total OK 17882 NOK 6018\n",
+ "batch 240 : total OK 17926 NOK 6074\n",
+ "batch 241 : total OK 17989 NOK 6111\n",
+ "batch 242 : total OK 18044 NOK 6156\n",
+ "batch 243 : total OK 18096 NOK 6204\n",
+ "batch 244 : total OK 18172 NOK 6228\n",
+ "batch 245 : total OK 18220 NOK 6280\n",
+ "batch 246 : total OK 18297 NOK 6303\n",
+ "batch 247 : total OK 18342 NOK 6358\n",
+ "batch 248 : total OK 18412 NOK 6388\n",
+ "batch 249 : total OK 18491 NOK 6409\n",
+ "batch 250 : total OK 18536 NOK 6464\n",
+ "batch 251 : total OK 18592 NOK 6508\n",
+ "batch 252 : total OK 18644 NOK 6556\n",
+ "batch 253 : total OK 18693 NOK 6607\n",
+ "batch 254 : total OK 18760 NOK 6640\n",
+ "batch 255 : total OK 18824 NOK 6676\n",
+ "batch 256 : total OK 18902 NOK 6698\n",
+ "batch 257 : total OK 18960 NOK 6740\n",
+ "batch 258 : total OK 19022 NOK 6778\n",
+ "batch 259 : total OK 19081 NOK 6819\n",
+ "batch 260 : total OK 19153 NOK 6847\n",
+ "batch 261 : total OK 19230 NOK 6870\n",
+ "batch 262 : total OK 19290 NOK 6910\n",
+ "batch 263 : total OK 19351 NOK 6949\n",
+ "batch 264 : total OK 19407 NOK 6993\n",
+ "batch 265 : total OK 19483 NOK 7017\n",
+ "batch 266 : total OK 19540 NOK 7060\n",
+ "batch 267 : total OK 19619 NOK 7081\n",
+ "batch 268 : total OK 19693 NOK 7107\n",
+ "batch 269 : total OK 19766 NOK 7134\n",
+ "batch 270 : total OK 19831 NOK 7169\n",
+ "batch 271 : total OK 19897 NOK 7203\n",
+ "batch 272 : total OK 19943 NOK 7257\n",
+ "batch 273 : total OK 20018 NOK 7282\n",
+ "batch 274 : total OK 20100 NOK 7300\n",
+ "batch 275 : total OK 20167 NOK 7333\n",
+ "batch 276 : total OK 20241 NOK 7359\n",
+ "batch 277 : total OK 20320 NOK 7380\n",
+ "batch 278 : total OK 20406 NOK 7394\n",
+ "batch 279 : total OK 20463 NOK 7437\n",
+ "batch 280 : total OK 20511 NOK 7489\n",
+ "batch 281 : total OK 20595 NOK 7505\n",
+ "batch 282 : total OK 20665 NOK 7535\n",
+ "batch 283 : total OK 20750 NOK 7550\n",
+ "batch 284 : total OK 20805 NOK 7595\n",
+ "batch 285 : total OK 20885 NOK 7615\n",
+ "batch 286 : total OK 20962 NOK 7638\n",
+ "batch 287 : total OK 21041 NOK 7659\n",
+ "batch 288 : total OK 21124 NOK 7676\n",
+ "batch 289 : total OK 21208 NOK 7692\n",
+ "batch 290 : total OK 21273 NOK 7727\n",
+ "batch 291 : total OK 21352 NOK 7748\n",
+ "batch 292 : total OK 21424 NOK 7776\n",
+ "batch 293 : total OK 21461 NOK 7839\n",
+ "batch 294 : total OK 21523 NOK 7877\n",
+ "batch 295 : total OK 21577 NOK 7923\n",
+ "batch 296 : total OK 21635 NOK 7965\n",
+ "batch 297 : total OK 21707 NOK 7993\n",
+ "batch 298 : total OK 21788 NOK 8012\n",
+ "batch 299 : total OK 21841 NOK 8059\n",
+ "batch 300 : total OK 21905 NOK 8095\n",
+ "batch 301 : total OK 21944 NOK 8156\n",
+ "batch 302 : total OK 22022 NOK 8178\n",
+ "batch 303 : total OK 22104 NOK 8196\n",
+ "batch 304 : total OK 22188 NOK 8212\n",
+ "batch 305 : total OK 22259 NOK 8241\n",
+ "batch 306 : total OK 22339 NOK 8261\n",
+ "batch 307 : total OK 22420 NOK 8280\n",
+ "batch 308 : total OK 22494 NOK 8306\n",
+ "batch 309 : total OK 22575 NOK 8325\n",
+ "batch 310 : total OK 22610 NOK 8390\n",
+ "batch 311 : total OK 22658 NOK 8442\n",
+ "batch 312 : total OK 22694 NOK 8506\n",
+ "batch 313 : total OK 22768 NOK 8532\n",
+ "batch 314 : total OK 22829 NOK 8571\n",
+ "batch 315 : total OK 22907 NOK 8593\n",
+ "batch 316 : total OK 22976 NOK 8624\n",
+ "batch 317 : total OK 23012 NOK 8688\n",
+ "batch 318 : total OK 23069 NOK 8731\n",
+ "batch 319 : total OK 23138 NOK 8762\n",
+ "batch 320 : total OK 23166 NOK 8834\n",
+ "batch 321 : total OK 23243 NOK 8857\n",
+ "batch 322 : total OK 23312 NOK 8888\n",
+ "batch 323 : total OK 23395 NOK 8905\n",
+ "batch 324 : total OK 23467 NOK 8933\n",
+ "batch 325 : total OK 23534 NOK 8966\n",
+ "batch 326 : total OK 23583 NOK 9017\n",
+ "batch 327 : total OK 23643 NOK 9057\n",
+ "batch 328 : total OK 23704 NOK 9096\n",
+ "batch 329 : total OK 23741 NOK 9159\n",
+ "batch 330 : total OK 23813 NOK 9187\n",
+ "batch 331 : total OK 23885 NOK 9215\n",
+ "batch 332 : total OK 23935 NOK 9265\n",
+ "batch 333 : total OK 23976 NOK 9324\n",
+ "batch 334 : total OK 24035 NOK 9365\n",
+ "batch 335 : total OK 24124 NOK 9376\n",
+ "batch 336 : total OK 24201 NOK 9399\n",
+ "batch 337 : total OK 24266 NOK 9434\n",
+ "batch 338 : total OK 24327 NOK 9473\n",
+ "batch 339 : total OK 24383 NOK 9517\n",
+ "batch 340 : total OK 24458 NOK 9542\n",
+ "batch 341 : total OK 24507 NOK 9593\n",
+ "batch 342 : total OK 24579 NOK 9621\n",
+ "batch 343 : total OK 24664 NOK 9636\n",
+ "batch 344 : total OK 24731 NOK 9669\n",
+ "batch 345 : total OK 24792 NOK 9708\n",
+ "batch 346 : total OK 24852 NOK 9748\n",
+ "batch 347 : total OK 24907 NOK 9793\n",
+ "batch 348 : total OK 24978 NOK 9822\n",
+ "batch 349 : total OK 25041 NOK 9859\n",
+ "batch 350 : total OK 25113 NOK 9887\n",
+ "batch 351 : total OK 25184 NOK 9916\n",
+ "batch 352 : total OK 25247 NOK 9953\n",
+ "batch 353 : total OK 25315 NOK 9985\n",
+ "batch 354 : total OK 25385 NOK 10015\n",
+ "batch 355 : total OK 25454 NOK 10046\n",
+ "batch 356 : total OK 25505 NOK 10095\n",
+ "batch 357 : total OK 25580 NOK 10120\n",
+ "batch 358 : total OK 25655 NOK 10145\n",
+ "batch 359 : total OK 25727 NOK 10173\n",
+ "batch 360 : total OK 25794 NOK 10206\n",
+ "batch 361 : total OK 25863 NOK 10237\n",
+ "batch 362 : total OK 25946 NOK 10254\n",
+ "batch 363 : total OK 26008 NOK 10292\n",
+ "batch 364 : total OK 26088 NOK 10312\n",
+ "batch 365 : total OK 26129 NOK 10371\n",
+ "batch 366 : total OK 26183 NOK 10417\n",
+ "batch 367 : total OK 26240 NOK 10460\n",
+ "batch 368 : total OK 26308 NOK 10492\n",
+ "batch 369 : total OK 26383 NOK 10517\n",
+ "batch 370 : total OK 26468 NOK 10532\n",
+ "batch 371 : total OK 26518 NOK 10582\n",
+ "batch 372 : total OK 26578 NOK 10622\n",
+ "batch 373 : total OK 26630 NOK 10670\n",
+ "batch 374 : total OK 26701 NOK 10699\n",
+ "batch 375 : total OK 26753 NOK 10747\n",
+ "batch 376 : total OK 26820 NOK 10780\n",
+ "batch 377 : total OK 26891 NOK 10809\n",
+ "batch 378 : total OK 26961 NOK 10839\n",
+ "batch 379 : total OK 27037 NOK 10863\n",
+ "batch 380 : total OK 27103 NOK 10897\n",
+ "batch 381 : total OK 27175 NOK 10925\n",
+ "batch 382 : total OK 27242 NOK 10958\n",
+ "batch 383 : total OK 27297 NOK 11003\n",
+ "batch 384 : total OK 27366 NOK 11034\n",
+ "batch 385 : total OK 27442 NOK 11058\n",
+ "batch 386 : total OK 27524 NOK 11076\n",
+ "batch 387 : total OK 27575 NOK 11125\n",
+ "batch 388 : total OK 27634 NOK 11166\n",
+ "batch 389 : total OK 27703 NOK 11197\n",
+ "batch 390 : total OK 27776 NOK 11224\n",
+ "batch 391 : total OK 27860 NOK 11240\n",
+ "batch 392 : total OK 27916 NOK 11284\n",
+ "batch 393 : total OK 27970 NOK 11330\n",
+ "batch 394 : total OK 28032 NOK 11368\n",
+ "batch 395 : total OK 28106 NOK 11394\n",
+ "batch 396 : total OK 28171 NOK 11429\n",
+ "batch 397 : total OK 28233 NOK 11467\n",
+ "batch 398 : total OK 28302 NOK 11498\n",
+ "batch 399 : total OK 28368 NOK 11532\n",
+ "batch 400 : total OK 28427 NOK 11573\n",
+ "batch 401 : total OK 28518 NOK 11582\n",
+ "batch 402 : total OK 28605 NOK 11595\n",
+ "batch 403 : total OK 28677 NOK 11623\n",
+ "batch 404 : total OK 28741 NOK 11659\n",
+ "batch 405 : total OK 28797 NOK 11703\n",
+ "batch 406 : total OK 28842 NOK 11758\n",
+ "batch 407 : total OK 28897 NOK 11803\n",
+ "batch 408 : total OK 28975 NOK 11825\n",
+ "batch 409 : total OK 29047 NOK 11853\n",
+ "batch 410 : total OK 29101 NOK 11899\n",
+ "batch 411 : total OK 29193 NOK 11907\n",
+ "batch 412 : total OK 29264 NOK 11936\n",
+ "batch 413 : total OK 29319 NOK 11981\n",
+ "batch 414 : total OK 29367 NOK 12033\n",
+ "batch 415 : total OK 29439 NOK 12061\n",
+ "batch 416 : total OK 29507 NOK 12093\n",
+ "batch 417 : total OK 29584 NOK 12116\n",
+ "batch 418 : total OK 29639 NOK 12161\n",
+ "batch 419 : total OK 29663 NOK 12237\n",
+ "batch 420 : total OK 29707 NOK 12293\n",
+ "batch 421 : total OK 29759 NOK 12341\n",
+ "batch 422 : total OK 29828 NOK 12372\n",
+ "batch 423 : total OK 29885 NOK 12415\n",
+ "batch 424 : total OK 29953 NOK 12447\n",
+ "batch 425 : total OK 30012 NOK 12488\n",
+ "batch 426 : total OK 30079 NOK 12521\n",
+ "batch 427 : total OK 30161 NOK 12539\n",
+ "batch 428 : total OK 30231 NOK 12569\n",
+ "batch 429 : total OK 30294 NOK 12606\n",
+ "batch 430 : total OK 30356 NOK 12644\n",
+ "batch 431 : total OK 30407 NOK 12693\n",
+ "batch 432 : total OK 30480 NOK 12720\n",
+ "batch 433 : total OK 30545 NOK 12755\n",
+ "batch 434 : total OK 30620 NOK 12780\n",
+ "batch 435 : total OK 30672 NOK 12828\n",
+ "batch 436 : total OK 30746 NOK 12854\n",
+ "batch 437 : total OK 30822 NOK 12878\n",
+ "batch 438 : total OK 30900 NOK 12900\n",
+ "batch 439 : total OK 30962 NOK 12938\n",
+ "batch 440 : total OK 31025 NOK 12975\n",
+ "batch 441 : total OK 31093 NOK 13007\n",
+ "batch 442 : total OK 31147 NOK 13053\n",
+ "batch 443 : total OK 31187 NOK 13113\n",
+ "batch 444 : total OK 31259 NOK 13141\n",
+ "batch 445 : total OK 31329 NOK 13171\n",
+ "batch 446 : total OK 31408 NOK 13192\n",
+ "batch 447 : total OK 31460 NOK 13240\n",
+ "batch 448 : total OK 31535 NOK 13265\n",
+ "batch 449 : total OK 31611 NOK 13289\n",
+ "batch 450 : total OK 31651 NOK 13349\n",
+ "batch 451 : total OK 31724 NOK 13376\n",
+ "batch 452 : total OK 31798 NOK 13402\n",
+ "batch 453 : total OK 31854 NOK 13446\n",
+ "batch 454 : total OK 31887 NOK 13513\n",
+ "batch 455 : total OK 31936 NOK 13564\n",
+ "batch 456 : total OK 31980 NOK 13620\n",
+ "batch 457 : total OK 32055 NOK 13645\n",
+ "batch 458 : total OK 32133 NOK 13667\n",
+ "batch 459 : total OK 32215 NOK 13685\n",
+ "batch 460 : total OK 32295 NOK 13705\n",
+ "batch 461 : total OK 32357 NOK 13743\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "batch 462 : total OK 32421 NOK 13779\n",
+ "batch 463 : total OK 32487 NOK 13813\n",
+ "batch 464 : total OK 32574 NOK 13826\n",
+ "batch 465 : total OK 32643 NOK 13857\n",
+ "batch 466 : total OK 32703 NOK 13897\n",
+ "batch 467 : total OK 32777 NOK 13923\n",
+ "batch 468 : total OK 32843 NOK 13957\n",
+ "batch 469 : total OK 32932 NOK 13968\n",
+ "batch 470 : total OK 33008 NOK 13992\n",
+ "batch 471 : total OK 33090 NOK 14010\n",
+ "batch 472 : total OK 33159 NOK 14041\n",
+ "batch 473 : total OK 33240 NOK 14060\n",
+ "batch 474 : total OK 33304 NOK 14096\n",
+ "batch 475 : total OK 33384 NOK 14116\n",
+ "batch 476 : total OK 33461 NOK 14139\n",
+ "batch 477 : total OK 33544 NOK 14156\n",
+ "batch 478 : total OK 33631 NOK 14169\n",
+ "batch 479 : total OK 33716 NOK 14184\n",
+ "batch 480 : total OK 33797 NOK 14203\n",
+ "batch 481 : total OK 33835 NOK 14265\n",
+ "batch 482 : total OK 33903 NOK 14297\n",
+ "batch 483 : total OK 33972 NOK 14328\n",
+ "batch 484 : total OK 34032 NOK 14368\n",
+ "batch 485 : total OK 34072 NOK 14428\n",
+ "batch 486 : total OK 34135 NOK 14465\n",
+ "batch 487 : total OK 34198 NOK 14502\n",
+ "batch 488 : total OK 34271 NOK 14529\n",
+ "batch 489 : total OK 34327 NOK 14573\n",
+ "batch 490 : total OK 34381 NOK 14619\n",
+ "batch 491 : total OK 34461 NOK 14639\n",
+ "batch 492 : total OK 34544 NOK 14656\n",
+ "batch 493 : total OK 34640 NOK 14660\n",
+ "batch 494 : total OK 34711 NOK 14689\n",
+ "batch 495 : total OK 34804 NOK 14696\n",
+ "batch 496 : total OK 34896 NOK 14704\n",
+ "batch 497 : total OK 34986 NOK 14714\n",
+ "batch 498 : total OK 35080 NOK 14720\n",
+ "batch 499 : total OK 35153 NOK 14747\n",
+ "batch 500 : total OK 35196 NOK 14804\n"
]
}
],
"source": [
+ "img_queue = setup_dataloader(val_dir, label_file, batch_size)\n",
+ "\n",
"ok = 0\n",
"nok = 0\n",
"i = 0\n",
- "for (imgs, lbls) in val_loader:\n",
- " ibuf_normal = imgs.numpy().reshape(accel.ishape_normal)\n",
- " exp = lbls.numpy()\n",
+ "while not img_queue.last_batch:\n",
+ " imgs, lbls = img_queue.get_batch(batch_size, timeout=None)\n",
+ " imgs = np.array(imgs)\n",
+ " exp = np.array(lbls)\n",
+ " \n",
+ " ibuf_normal = imgs.reshape(accel.ishape_normal)\n",
" obuf_normal = accel.execute(ibuf_normal)\n",
" obuf_normal = obuf_normal.reshape(batch_size, -1)[:,0]\n",
" ret = np.bincount(obuf_normal.flatten() == exp.flatten())\n",
@@ -404,14 +877,14 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Final top-1 accuracy: 70.412%\n"
+ "Final top-1 accuracy: 70.392%\n"
]
}
],
@@ -430,27 +903,27 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'runtime[ms]': 551.5413284301758,\n",
- " 'throughput[images/s]': 1813.1007568304074,\n",
- " 'DRAM_in_bandwidth[Mb/s]': 272.92243072416755,\n",
- " 'DRAM_out_bandwidth[Mb/s]': 0.018131007568304075,\n",
- " 'fclk[mhz]': 206,\n",
- " 'batch_size': 1000,\n",
- " 'fold_input[ms]': 1.0013580322265625e-05,\n",
- " 'pack_input[ms]': 1.52587890625e-05,\n",
- " 'copy_input_data_to_device[ms]': 0.07888936996459961,\n",
- " 'copy_output_data_from_device[ms]': 0.00015473365783691406,\n",
- " 'unpack_output[ms]': 0.040567874908447266,\n",
- " 'unfold_output[ms]': 6.4373016357421875e-06}"
+ "{'runtime[ms]': 50.49920082092285,\n",
+ " 'throughput[images/s]': 1980.2293575815947,\n",
+ " 'DRAM_in_bandwidth[Mb/s]': 298.0799647380423,\n",
+ " 'DRAM_out_bandwidth[Mb/s]': 0.01980229357581595,\n",
+ " 'fclk[mhz]': 100.0,\n",
+ " 'batch_size': 100,\n",
+ " 'fold_input[ms]': 1.5020370483398438e-05,\n",
+ " 'pack_input[ms]': 2.4080276489257812e-05,\n",
+ " 'copy_input_data_to_device[ms]': 0.006676673889160156,\n",
+ " 'copy_output_data_from_device[ms]': 0.00022292137145996094,\n",
+ " 'unpack_output[ms]': 0.004586219787597656,\n",
+ " 'unfold_output[ms]': 6.9141387939453125e-06}"
]
},
- "execution_count": 17,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/setup.py b/setup.py
index 7edd82b..1eb0930 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Xilinx, Inc
+# Copyright (C) 2020-2021 Xilinx, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@
from pynq.utils import build_py as _build_py
__author__ = "Yaman Umuroglu"
-__copyright__ = "Copyright 2020, Xilinx"
+__copyright__ = "Copyright 2020-2021, Xilinx"
__email__ = "yamanu@xilinx.com"
@@ -90,7 +90,7 @@ def extend_package(path):
setup(
name=module_name,
- version="0.0.1b",
+ version="0.0.2b",
description="FINN Examples on PYNQ for Zynq and Alveo",
long_description=long_description,
long_description_content_type="text/markdown",
@@ -108,8 +108,8 @@ def extend_package(path):
setup_requires=["pynq>=2.5.1"],
install_requires=[
"pynq>=2.5.1",
- "finn-base==0.0.1b0",
- "finn-dataset_loading==0.0.4", # noqa
+ "finn-base==0.0.2b0",
+ "finn-dataset_loading==0.0.5", # noqa
],
extras_require={
':python_version<"3.6"': ["matplotlib<3.1", "ipython==7.9"],