diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..9087804 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python setup.py sdist + twine upload dist/* diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..ba06e5d --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,10 @@ +============ +Contributors +============ + +* Yaman Umuroglu (@maltanar) (maintainer) +* Jakoba Petri-Koenig (@auphelia) +* Lucian Petrica (@quetric) +* Tobias Alonso (@Tobi-Alonso) +* Hendrik Borras (@HenniOVP) +* Felix Paul Jentzsch (@felixpj) diff --git a/README.md b/README.md index 5168620..8467a64 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,13 @@ pre-built bitfiles, PYNQ Python drivers and Jupyter notebooks to get started, and you can rebuild them from source. Both PYNQ on Zynq and Alveo are supported. +Need help with a problem in this repo, or got a question? Feel free to ask for help in the [FINN Gitter channel](https://gitter.im/xilinx-finn/community). + ## Quickstart *For Alveo we recommend setting up everything inside a virtualenv as described [here](https://pynq.readthedocs.io/en/v2.6.1/getting_started/alveo_getting_started.html?highlight=alveo#install-conda).* +*For PYNQ boards, all commands below must be prefixed with `sudo` or by first going into `sudo su`.* First, ensure that your `pip` and `setuptools` installations are up-to-date on your PYNQ board or Alveo server: @@ -62,11 +65,11 @@ dummy_out = accel.execute(dummy_in) |----------------------------------------------------------------|-------------------------|------------------------------------------------------------|------------------| |
CIFAR-10 | CNV (VGG-11-like) | several variants:
1/2-bit weights/activations | all | |

MNIST | 3-layer fully-connected | several variants:
1/2-bit weights/activations | all | -|

ImageNet | MobileNet-v1 | 4-bit weights and activations
8-bit first layer weights | Alveo U250 | +|

ImageNet | MobileNet-v1 | 4-bit weights and activations
8-bit first layer weights | Alveo U250
ZCU104 | ## Supported Boards -*Note that the larger NNs are only available on Alveo boards.* +*Note that the larger NNs are only available on Alveo or selected Zynq boards.* `finn-examples` provides pre-built FPGA bitfiles for the following boards: diff --git a/build/README.md b/build/README.md index cd699c9..e284deb 100644 --- a/build/README.md +++ b/build/README.md @@ -27,8 +27,7 @@ Please see the READMEs under the respective subfolders here for instructions on All examples in this repo use the same Python PYNQ driver, located under `finn_examples/driver.py` in the repo. This driver can support any FINN-generated -accelerator that doesn't use external weights, the only thing that needs to be -specified is the configuration for the input and output tensors in the `io_shape_dict`. Have a look at `finn_examples/models.py` to see how this is done for the example models in this repo: +accelerator, the only thing that needs to be specified is the configuration for the input and output tensors in the `io_shape_dict`. Have a look at `finn_examples/models.py` to see how this is done for the example models in this repo: ```python _cifar10_cnv_io_shape_dict = { diff --git a/build/get-finn.sh b/build/get-finn.sh index 3f5cf53..016a69c 100755 --- a/build/get-finn.sh +++ b/build/get-finn.sh @@ -30,7 +30,7 @@ # URL for git repo to be cloned REPO_URL=https://github.com/Xilinx/finn # commit hash for repo -REPO_COMMIT=4fee6ffd8e13f91314ec9086e9ce9b2ea9de15c7 +REPO_COMMIT=e5da788bdc74fc9c234bb0176521ad51e830c22e # directory (under the same folder as this script) to clone to REPO_DIR=finn diff --git a/build/mobilenet-v1/README.md b/build/mobilenet-v1/README.md index b929679..38b69a2 100644 --- a/build/mobilenet-v1/README.md +++ b/build/mobilenet-v1/README.md @@ -17,7 +17,8 @@ It requires about 2 MB of weight storage and 1.1 GMACs per inference, yielding Due to the depthwise separable convolutions in MobileNet-v1, we use a specialized build script that replaces a few of the standard steps in FINN with custom ones. -**MobileNet-v1 is currently only supported on Alveo U250.** +**MobileNet-v1 is currently only supported on Alveo U250 and ZCU104.** +We also provide a folding configuration for the **ZCU102**, but there is no pre-built Pynq image available for this board. 0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables. diff --git a/build/mobilenet-v1/build.py b/build/mobilenet-v1/build.py index 5d67f1c..fc4c48c 100644 --- a/build/mobilenet-v1/build.py +++ b/build/mobilenet-v1/build.py @@ -28,53 +28,147 @@ import finn.builder.build_dataflow as build import finn.builder.build_dataflow_config as build_cfg +from finn.util.basic import alveo_default_platform +import os +import shutil # custom steps for mobilenetv1 from custom_steps import ( step_mobilenet_streamline, step_mobilenet_convert_to_hls_layers, + step_mobilenet_convert_to_hls_layers_separate_th, step_mobilenet_lower_convs, + step_mobilenet_slr_floorplan, ) model_name = "mobilenetv1-w4a4" -board = "U250" -vitis_platform = "xilinx_u250_xdma_201830_2" -synth_clk_period_ns = 3.0 -mobilenet_build_steps = [ - step_mobilenet_streamline, - step_mobilenet_lower_convs, - step_mobilenet_convert_to_hls_layers, - "step_create_dataflow_partition", - "step_apply_folding_config", - "step_generate_estimate_reports", - "step_hls_ipgen", - "step_set_fifo_depths", - "step_create_stitched_ip", - "step_make_pynq_driver", - "step_synthesize_bitfile", - "step_deployment_package", -] - - -cfg = build_cfg.DataflowBuildConfig( - steps=mobilenet_build_steps, - output_dir="output_%s_%s" % (model_name, board), - folding_config_file="folding_config/%s_folding_config.json" % board, - synth_clk_period_ns=synth_clk_period_ns, - board=board, - shell_flow_type=build_cfg.ShellFlowType.VITIS_ALVEO, - # folding config comes with FIFO depths already - auto_fifo_depths=False, - vitis_platform=vitis_platform, - # enable extra performance optimizations (physopt) - vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST, - generate_outputs=[ - build_cfg.DataflowOutputType.PYNQ_DRIVER, - build_cfg.DataflowOutputType.ESTIMATE_REPORTS, - build_cfg.DataflowOutputType.BITFILE, - build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, - ], -) -model_file = "models/%s_pre_post_tidy.onnx" % model_name -build.build_dataflow_cfg(model_file, cfg) +# which platforms to build the networks for +zynq_platforms = ["ZCU102", "ZCU104"] +#alveo_platforms = ["U50", "U200", "U250", "U280"] +alveo_platforms = ["U250"] +platforms_to_build = zynq_platforms + alveo_platforms + + +# determine which shell flow to use for a given platform +def platform_to_shell(platform): + if platform in zynq_platforms: + return build_cfg.ShellFlowType.VIVADO_ZYNQ + elif platform in alveo_platforms: + return build_cfg.ShellFlowType.VITIS_ALVEO + else: + raise Exception("Unknown platform, can't determine ShellFlowType") + + +# select target clock frequency +def select_clk_period(platform): + if platform in zynq_platforms: + return 5.4 + elif platform in alveo_platforms: + return 3.0 + + +# select build steps (ZCU104/102 folding config is based on separate thresholding nodes) +def select_build_steps(platform): + if platform in zynq_platforms: + return [ + step_mobilenet_streamline, + step_mobilenet_lower_convs, + step_mobilenet_convert_to_hls_layers_separate_th, + "step_create_dataflow_partition", + "step_apply_folding_config", + "step_generate_estimate_reports", + "step_hls_codegen", + "step_hls_ipgen", + "step_set_fifo_depths", + "step_create_stitched_ip", + "step_synthesize_bitfile", + "step_make_pynq_driver", + "step_deployment_package", + ] + elif platform in alveo_platforms: + return [ + step_mobilenet_streamline, + step_mobilenet_lower_convs, + step_mobilenet_convert_to_hls_layers, + "step_create_dataflow_partition", + "step_apply_folding_config", + "step_generate_estimate_reports", + "step_hls_codegen", + "step_hls_ipgen", + "step_set_fifo_depths", + step_mobilenet_slr_floorplan, + "step_synthesize_bitfile", + "step_make_pynq_driver", + "step_deployment_package", + ] + + +# create a release dir, used for finn-examples release packaging +os.makedirs("release", exist_ok=True) + + +for platform_name in platforms_to_build: + shell_flow_type = platform_to_shell(platform_name) + if shell_flow_type == build_cfg.ShellFlowType.VITIS_ALVEO: + vitis_platform = alveo_default_platform[platform_name] + # for Alveo, use the Vitis platform name as the release name + # e.g. xilinx_u250_xdma_201830_2 + release_platform_name = vitis_platform + else: + vitis_platform = None + # for Zynq, use the board name as the release name + # e.g. ZCU104 + release_platform_name = platform_name + platform_dir = "release/%s" % release_platform_name + os.makedirs(platform_dir, exist_ok=True) + + cfg = build_cfg.DataflowBuildConfig( + steps=select_build_steps(platform_name), + output_dir="output_%s_%s" % (model_name, release_platform_name), + folding_config_file="folding_config/%s_folding_config.json" % platform_name, + synth_clk_period_ns=select_clk_period(platform_name), + board=platform_name, + shell_flow_type=shell_flow_type, + vitis_platform=vitis_platform, + # folding config comes with FIFO depths already + auto_fifo_depths=False, + # enable extra performance optimizations (physopt) + vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST, + generate_outputs=[ + build_cfg.DataflowOutputType.PYNQ_DRIVER, + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + build_cfg.DataflowOutputType.BITFILE, + build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, + ], + ) + model_file = "models/%s_pre_post_tidy.onnx" % model_name + build.build_dataflow_cfg(model_file, cfg) + + # copy bitfiles and runtime weights into release dir if found + bitfile_gen_dir = cfg.output_dir + "/bitfile" + files_to_check_and_copy = [ + "finn-accel.bit", + "finn-accel.hwh", + "finn-accel.xclbin", + ] + for f in files_to_check_and_copy: + src_file = bitfile_gen_dir + "/" + f + dst_file = platform_dir + "/" + f.replace("finn-accel", model_name) + if os.path.isfile(src_file): + shutil.copy(src_file, dst_file) + + weight_gen_dir = cfg.output_dir + "/driver/runtime_weights" + weight_dst_dir = platform_dir + "/%s_runtime_weights" % model_name + if os.path.isdir(weight_gen_dir): + weight_files = os.listdir(weight_gen_dir) + if weight_files: + shutil.copytree(weight_gen_dir, weight_dst_dir) + + # create zipfile for all examples for this platform + shutil.make_archive( + "release/" + release_platform_name, + "zip", + root_dir="release", + base_dir=release_platform_name, + ) diff --git a/build/mobilenet-v1/custom_steps.py b/build/mobilenet-v1/custom_steps.py index a18faee..9f30597 100644 --- a/build/mobilenet-v1/custom_steps.py +++ b/build/mobilenet-v1/custom_steps.py @@ -26,7 +26,10 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from finn.core.modelwrapper import ModelWrapper -from finn.builder.build_dataflow_config import DataflowBuildConfig +from finn.builder.build_dataflow_config import ( + DataflowBuildConfig, + ShellFlowType, +) from finn.transformation.streamline import Streamline from finn.transformation.double_to_single_float import DoubleToSingleFloat import finn.transformation.streamline.absorb as absorb @@ -39,6 +42,7 @@ from finn.transformation.general import ( GiveReadableTensorNames, GiveUniqueNodeNames, + ApplyConfig, ) import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.transformation.infer_shapes import InferShapes @@ -94,3 +98,36 @@ def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuild model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) return model + + +def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig): + if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO: + try: + from finn.analysis.partitioning import partition + # apply partitioning of the model, restricting the first and last layers to SLR0 + default_slr = 0 + abs_anchors = [(0,[default_slr]),(-1,[default_slr])] + floorplan = partition(model, cfg.synth_clk_period_ns, cfg.board, abs_anchors=abs_anchors, multivariant=False)[0] + # apply floorplan to model + model = model.transform(ApplyConfig(floorplan)) + print("SLR floorplanning applied") + except: + print("No SLR floorplanning applied") + return model + + +def step_mobilenet_convert_to_hls_layers_separate_th( + model: ModelWrapper, cfg: DataflowBuildConfig +): + mem_mode = cfg.default_mem_mode.value + model = model.transform(to_hls.InferPool_Batch()) + model = model.transform(to_hls.InferConvInpGen()) + model = model.transform(to_hls.InferThresholdingLayer()) + model = model.transform(to_hls.InferVVAU()) + model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferChannelwiseLinearLayer()) + model = model.transform(to_hls.InferLabelSelectLayer()) + model = model.transform(InferShapes()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + return model diff --git a/build/mobilenet-v1/folding_config/U200_folding_config.json b/build/mobilenet-v1/folding_config/U200_folding_config.json new file mode 100644 index 0000000..f5ccf9b --- /dev/null +++ b/build/mobilenet-v1/folding_config/U200_folding_config.json @@ -0,0 +1,499 @@ +{ + "Defaults": {}, + "StreamingFIFO_0": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_0": { + "SIMD": 3, + "ram_style": "distributed" + }, + "StreamingFCLayer_Batch_0": { + "PE": 32, + "SIMD": 3, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "dsp" + }, + "FMPadding_Batch_0": { + "SIMD": 32 + }, + "StreamingFIFO_3": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_1": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_0": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_0": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_1": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_1": { + "impl_style": "hls" + }, + "FMPadding_Batch_1": { + "SIMD": 32 + }, + "StreamingFIFO_9": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_2": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_1": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_2": { + "impl_style": "hls" + }, + "StreamingFIFO_12": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_2": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_3": { + "impl_style": "hls" + }, + "FMPadding_Batch_2": { + "SIMD": 64 + }, + "StreamingFIFO_15": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_3": { + "SIMD": 64, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_2": { + "PE": 64, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_4": { + "impl_style": "hls" + }, + "StreamingFIFO_18": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_3": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_5": { + "impl_style": "hls" + }, + "StreamingFIFO_20": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_3": { + "SIMD": 16 + }, + "StreamingFIFO_21": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_4": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_3": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_23": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_4": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_6": { + "impl_style": "hls" + }, + "FMPadding_Batch_4": { + "SIMD": 32 + }, + "StreamingFIFO_26": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_5": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_4": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_7": { + "impl_style": "hls" + }, + "StreamingFIFO_29": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_5": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_8": { + "impl_style": "hls" + }, + "StreamingFIFO_31": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_5": { + "SIMD": 8 + }, + "StreamingFIFO_32": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_6": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_5": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_9": { + "impl_style": "hls" + }, + "StreamingFIFO_35": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_6": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "FMPadding_Batch_6": { + "SIMD": 16 + }, + "StreamingFIFO_37": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_7": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_6": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_39": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_7": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_10": { + "impl_style": "hls" + }, + "StreamingFIFO_41": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_7": { + "SIMD": 16 + }, + "StreamingFIFO_42": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_8": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_7": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_44": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_8": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_11": { + "impl_style": "hls" + }, + "StreamingFIFO_46": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_8": { + "SIMD": 16 + }, + "StreamingFIFO_47": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_9": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_8": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_49": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_9": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_12": { + "impl_style": "hls" + }, + "StreamingFIFO_51": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_9": { + "SIMD": 16 + }, + "StreamingFIFO_52": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_10": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_9": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_54": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_10": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_13": { + "impl_style": "hls" + }, + "StreamingFIFO_56": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_10": { + "SIMD": 16 + }, + "StreamingFIFO_57": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_11": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_10": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_59": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_11": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_14": { + "impl_style": "hls" + }, + "StreamingFIFO_61": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_11": { + "SIMD": 4 + }, + "StreamingFIFO_62": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_12": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_11": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_15": { + "impl_style": "hls" + }, + "StreamingFIFO_65": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_12": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_16": { + "impl_style": "hls" + }, + "StreamingFIFO_67": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_12": { + "SIMD": 8 + }, + "StreamingFIFO_68": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_13": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_12": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_17": { + "impl_style": "hls" + }, + "StreamingFIFO_71": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_13": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_18": { + "impl_style": "hls" + }, + "ConvolutionInputGenerator_14": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Pool_Batch_0": { + "PE": 4 + }, + "StreamingFCLayer_Batch_14": { + "PE": 4, + "SIMD": 4, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_19": { + "impl_style": "hls" + }, + "ChannelwiseOp_Batch_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/build/mobilenet-v1/folding_config/U280_folding_config.json b/build/mobilenet-v1/folding_config/U280_folding_config.json new file mode 100644 index 0000000..f5ccf9b --- /dev/null +++ b/build/mobilenet-v1/folding_config/U280_folding_config.json @@ -0,0 +1,499 @@ +{ + "Defaults": {}, + "StreamingFIFO_0": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_0": { + "SIMD": 3, + "ram_style": "distributed" + }, + "StreamingFCLayer_Batch_0": { + "PE": 32, + "SIMD": 3, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "dsp" + }, + "FMPadding_Batch_0": { + "SIMD": 32 + }, + "StreamingFIFO_3": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_1": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_0": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_0": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_1": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_1": { + "impl_style": "hls" + }, + "FMPadding_Batch_1": { + "SIMD": 32 + }, + "StreamingFIFO_9": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_2": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_1": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_2": { + "impl_style": "hls" + }, + "StreamingFIFO_12": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_2": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_3": { + "impl_style": "hls" + }, + "FMPadding_Batch_2": { + "SIMD": 64 + }, + "StreamingFIFO_15": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_3": { + "SIMD": 64, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_2": { + "PE": 64, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_4": { + "impl_style": "hls" + }, + "StreamingFIFO_18": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_3": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_5": { + "impl_style": "hls" + }, + "StreamingFIFO_20": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_3": { + "SIMD": 16 + }, + "StreamingFIFO_21": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_4": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_3": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_23": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_4": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_6": { + "impl_style": "hls" + }, + "FMPadding_Batch_4": { + "SIMD": 32 + }, + "StreamingFIFO_26": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_5": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_4": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_7": { + "impl_style": "hls" + }, + "StreamingFIFO_29": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_5": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_8": { + "impl_style": "hls" + }, + "StreamingFIFO_31": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_5": { + "SIMD": 8 + }, + "StreamingFIFO_32": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_6": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_5": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_9": { + "impl_style": "hls" + }, + "StreamingFIFO_35": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_6": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "FMPadding_Batch_6": { + "SIMD": 16 + }, + "StreamingFIFO_37": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_7": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_6": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_39": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_7": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_10": { + "impl_style": "hls" + }, + "StreamingFIFO_41": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_7": { + "SIMD": 16 + }, + "StreamingFIFO_42": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_8": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_7": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_44": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_8": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_11": { + "impl_style": "hls" + }, + "StreamingFIFO_46": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_8": { + "SIMD": 16 + }, + "StreamingFIFO_47": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_9": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_8": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_49": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_9": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_12": { + "impl_style": "hls" + }, + "StreamingFIFO_51": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_9": { + "SIMD": 16 + }, + "StreamingFIFO_52": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_10": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_9": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_54": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_10": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_13": { + "impl_style": "hls" + }, + "StreamingFIFO_56": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_10": { + "SIMD": 16 + }, + "StreamingFIFO_57": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_11": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_10": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_59": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_11": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_14": { + "impl_style": "hls" + }, + "StreamingFIFO_61": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_11": { + "SIMD": 4 + }, + "StreamingFIFO_62": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_12": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_11": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_15": { + "impl_style": "hls" + }, + "StreamingFIFO_65": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_12": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_16": { + "impl_style": "hls" + }, + "StreamingFIFO_67": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_12": { + "SIMD": 8 + }, + "StreamingFIFO_68": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_13": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_12": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_17": { + "impl_style": "hls" + }, + "StreamingFIFO_71": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_13": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_18": { + "impl_style": "hls" + }, + "ConvolutionInputGenerator_14": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Pool_Batch_0": { + "PE": 4 + }, + "StreamingFCLayer_Batch_14": { + "PE": 4, + "SIMD": 4, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_19": { + "impl_style": "hls" + }, + "ChannelwiseOp_Batch_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/build/mobilenet-v1/folding_config/U50_folding_config.json b/build/mobilenet-v1/folding_config/U50_folding_config.json new file mode 100644 index 0000000..f5ccf9b --- /dev/null +++ b/build/mobilenet-v1/folding_config/U50_folding_config.json @@ -0,0 +1,499 @@ +{ + "Defaults": {}, + "StreamingFIFO_0": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_0": { + "SIMD": 3, + "ram_style": "distributed" + }, + "StreamingFCLayer_Batch_0": { + "PE": 32, + "SIMD": 3, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "dsp" + }, + "FMPadding_Batch_0": { + "SIMD": 32 + }, + "StreamingFIFO_3": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_1": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_0": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_0": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_1": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_1": { + "impl_style": "hls" + }, + "FMPadding_Batch_1": { + "SIMD": 32 + }, + "StreamingFIFO_9": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_2": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_1": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_2": { + "impl_style": "hls" + }, + "StreamingFIFO_12": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_2": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_3": { + "impl_style": "hls" + }, + "FMPadding_Batch_2": { + "SIMD": 64 + }, + "StreamingFIFO_15": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "ConvolutionInputGenerator_3": { + "SIMD": 64, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_2": { + "PE": 64, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_4": { + "impl_style": "hls" + }, + "StreamingFIFO_18": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_3": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_5": { + "impl_style": "hls" + }, + "StreamingFIFO_20": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_3": { + "SIMD": 16 + }, + "StreamingFIFO_21": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_4": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_3": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_23": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_4": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_6": { + "impl_style": "hls" + }, + "FMPadding_Batch_4": { + "SIMD": 32 + }, + "StreamingFIFO_26": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_5": { + "SIMD": 32, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_4": { + "PE": 32, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_7": { + "impl_style": "hls" + }, + "StreamingFIFO_29": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_5": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_8": { + "impl_style": "hls" + }, + "StreamingFIFO_31": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_5": { + "SIMD": 8 + }, + "StreamingFIFO_32": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_6": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_5": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_9": { + "impl_style": "hls" + }, + "StreamingFIFO_35": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_6": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "FMPadding_Batch_6": { + "SIMD": 16 + }, + "StreamingFIFO_37": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_7": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_6": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_39": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_7": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_10": { + "impl_style": "hls" + }, + "StreamingFIFO_41": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_7": { + "SIMD": 16 + }, + "StreamingFIFO_42": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_8": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_7": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_44": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_8": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_11": { + "impl_style": "hls" + }, + "StreamingFIFO_46": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_8": { + "SIMD": 16 + }, + "StreamingFIFO_47": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_9": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_8": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_49": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_9": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_12": { + "impl_style": "hls" + }, + "StreamingFIFO_51": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_9": { + "SIMD": 16 + }, + "StreamingFIFO_52": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_10": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_9": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_54": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_10": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_13": { + "impl_style": "hls" + }, + "StreamingFIFO_56": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_10": { + "SIMD": 16 + }, + "StreamingFIFO_57": { + "ram_style": "ultra", + "depth": 2048, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_11": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_10": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_59": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_11": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_14": { + "impl_style": "hls" + }, + "StreamingFIFO_61": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_11": { + "SIMD": 4 + }, + "StreamingFIFO_62": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_12": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_11": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_15": { + "impl_style": "hls" + }, + "StreamingFIFO_65": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_12": { + "PE": 16, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_16": { + "impl_style": "hls" + }, + "StreamingFIFO_67": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_12": { + "SIMD": 8 + }, + "StreamingFIFO_68": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_13": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_12": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_17": { + "impl_style": "hls" + }, + "StreamingFIFO_71": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "StreamingFCLayer_Batch_13": { + "PE": 32, + "SIMD": 16, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_18": { + "impl_style": "hls" + }, + "ConvolutionInputGenerator_14": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Pool_Batch_0": { + "PE": 4 + }, + "StreamingFCLayer_Batch_14": { + "PE": 4, + "SIMD": 4, + "ram_style": "block", + "mem_mode": "decoupled", + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_19": { + "impl_style": "hls" + }, + "ChannelwiseOp_Batch_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json new file mode 100755 index 0000000..02d6d6e --- /dev/null +++ b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json @@ -0,0 +1,816 @@ +{ + "Defaults": {}, + "StreamingFIFO_0": { + "ram_style": "block", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_0": { + "SIMD": 1, + "ram_style": "distributed" + }, + "StreamingDataWidthConverter_Batch_0": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_0": { + "PE": 16, + "SIMD": 3, + "ram_style": "auto", + "resType": "dsp", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_3": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingDataWidthConverter_Batch_1": { + "impl_style": "hls" + }, + "Thresholding_Batch_0": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_2": { + "impl_style": "hls" + }, + "StreamingFIFO_6": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "FMPadding_Batch_0": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_3": { + "impl_style": "hls" + }, + "StreamingFIFO_8": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_1": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_0": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_10": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingDataWidthConverter_Batch_4": { + "impl_style": "hls" + }, + "Thresholding_Batch_1": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_5": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_1": { + "PE": 8, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_6": { + "impl_style": "hls" + }, + "Thresholding_Batch_2": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_7": { + "impl_style": "hls" + }, + "StreamingFIFO_17": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_1": { + "SIMD": 4 + }, + "StreamingDataWidthConverter_Batch_8": { + "impl_style": "hls" + }, + "StreamingFIFO_19": { + "ram_style": "block", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_2": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_1": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_9": { + "impl_style": "hls" + }, + "Thresholding_Batch_3": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_10": { + "impl_style": "hls" + }, + "StreamingFIFO_24": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_2": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_11": { + "impl_style": "hls" + }, + "Thresholding_Batch_4": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_27": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_2": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_12": { + "impl_style": "hls" + }, + "StreamingFIFO_29": { + "ram_style": "block", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_3": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_2": { + "PE": 16, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_13": { + "impl_style": "hls" + }, + "Thresholding_Batch_5": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_14": { + "impl_style": "hls" + }, + "StreamingFIFO_34": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_3": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_15": { + "impl_style": "hls" + }, + "Thresholding_Batch_6": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_37": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_3": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_16": { + "impl_style": "hls" + }, + "StreamingFIFO_39": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_4": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_3": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_17": { + "impl_style": "hls" + }, + "Thresholding_Batch_7": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_18": { + "impl_style": "hls" + }, + "StreamingFIFO_44": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_4": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_19": { + "impl_style": "hls" + }, + "Thresholding_Batch_8": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_47": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_4": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_20": { + "impl_style": "hls" + }, + "StreamingFIFO_49": { + "ram_style": "block", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_5": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_4": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_21": { + "impl_style": "hls" + }, + "Thresholding_Batch_9": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_22": { + "impl_style": "hls" + }, + "StreamingFIFO_54": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_5": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_23": { + "impl_style": "hls" + }, + "Thresholding_Batch_10": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_57": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_5": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_24": { + "impl_style": "hls" + }, + "StreamingFIFO_59": { + "ram_style": "block", + "depth": 8192, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_6": { + "SIMD": 2, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_5": { + "PE": 2, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_25": { + "impl_style": "hls" + }, + "Thresholding_Batch_11": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_26": { + "impl_style": "hls" + }, + "StreamingFIFO_64": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_6": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_27": { + "impl_style": "hls" + }, + "Thresholding_Batch_12": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_67": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_6": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_28": { + "impl_style": "hls" + }, + "StreamingFIFO_69": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_7": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_6": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_29": { + "impl_style": "hls" + }, + "Thresholding_Batch_13": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_30": { + "impl_style": "hls" + }, + "StreamingFIFO_74": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_7": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_31": { + "impl_style": "hls" + }, + "Thresholding_Batch_14": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_77": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_7": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_32": { + "impl_style": "hls" + }, + "StreamingFIFO_79": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_8": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_7": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_33": { + "impl_style": "hls" + }, + "Thresholding_Batch_15": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_34": { + "impl_style": "hls" + }, + "StreamingFIFO_84": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_8": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_35": { + "impl_style": "hls" + }, + "Thresholding_Batch_16": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_87": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_8": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_36": { + "impl_style": "hls" + }, + "StreamingFIFO_89": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_9": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_8": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_37": { + "impl_style": "hls" + }, + "Thresholding_Batch_17": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_38": { + "impl_style": "hls" + }, + "StreamingFIFO_94": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_9": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_39": { + "impl_style": "hls" + }, + "Thresholding_Batch_18": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_97": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_9": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_40": { + "impl_style": "hls" + }, + "StreamingFIFO_99": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_10": { + "SIMD": 4, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_9": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_41": { + "impl_style": "hls" + }, + "Thresholding_Batch_19": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_42": { + "impl_style": "hls" + }, + "StreamingFIFO_104": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_10": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_43": { + "impl_style": "hls" + }, + "Thresholding_Batch_20": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_107": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_10": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_44": { + "impl_style": "hls" + }, + "StreamingFIFO_109": { + "ram_style": "block", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_11": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_10": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_45": { + "impl_style": "hls" + }, + "Thresholding_Batch_21": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_46": { + "impl_style": "hls" + }, + "StreamingFIFO_114": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_11": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_47": { + "impl_style": "hls" + }, + "Thresholding_Batch_22": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_117": { + "ram_style": "block", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_11": { + "SIMD": 1 + }, + "StreamingFIFO_118": { + "ram_style": "block", + "depth": 16384, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_12": { + "SIMD": 1, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_11": { + "PE": 1, + "resType": "lut" + }, + "Thresholding_Batch_23": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_48": { + "impl_style": "hls" + }, + "StreamingFIFO_122": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_12": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_49": { + "impl_style": "hls" + }, + "Thresholding_Batch_24": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_125": { + "ram_style": "block", + "depth": 1024, + "impl_style": "vivado" + }, + "FMPadding_Batch_12": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_50": { + "impl_style": "hls" + }, + "StreamingFIFO_127": { + "ram_style": "block", + "depth": 16384, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_13": { + "SIMD": 2, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_12": { + "PE": 2, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_51": { + "impl_style": "hls" + }, + "Thresholding_Batch_25": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_52": { + "impl_style": "hls" + }, + "StreamingFIFO_132": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_13": { + "PE": 32, + "SIMD": 8, + "ram_style": "block", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_53": { + "impl_style": "hls" + }, + "Thresholding_Batch_26": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "ConvolutionInputGenerator_14": { + "SIMD": 1, + "ram_style": "block" + }, + "Pool_Batch_0": { + "PE": 1 + }, + "StreamingDataWidthConverter_Batch_54": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_14": { + "PE": 1, + "SIMD": 16, + "ram_style": "block", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "ChannelwiseOp_Batch_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json new file mode 100755 index 0000000..b441206 --- /dev/null +++ b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json @@ -0,0 +1,816 @@ +{ + "Defaults": {}, + "StreamingFIFO_0": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_0": { + "SIMD": 1, + "ram_style": "distributed" + }, + "StreamingDataWidthConverter_Batch_0": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_0": { + "PE": 16, + "SIMD": 3, + "ram_style": "auto", + "resType": "dsp", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_3": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingDataWidthConverter_Batch_1": { + "impl_style": "hls" + }, + "Thresholding_Batch_0": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_2": { + "impl_style": "hls" + }, + "StreamingFIFO_6": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "FMPadding_Batch_0": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_3": { + "impl_style": "hls" + }, + "StreamingFIFO_8": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_1": { + "SIMD": 16, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_0": { + "PE": 16, + "resType": "lut" + }, + "StreamingFIFO_10": { + "ram_style": "auto", + "depth": 256, + "impl_style": "rtl" + }, + "StreamingDataWidthConverter_Batch_4": { + "impl_style": "hls" + }, + "Thresholding_Batch_1": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_5": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_1": { + "PE": 8, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_6": { + "impl_style": "hls" + }, + "Thresholding_Batch_2": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_7": { + "impl_style": "hls" + }, + "StreamingFIFO_17": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "FMPadding_Batch_1": { + "SIMD": 4 + }, + "StreamingDataWidthConverter_Batch_8": { + "impl_style": "hls" + }, + "StreamingFIFO_19": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_2": { + "SIMD": 8, + "ram_style": "distributed" + }, + "Vector_Vector_Activate_Batch_1": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_9": { + "impl_style": "hls" + }, + "Thresholding_Batch_3": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_10": { + "impl_style": "hls" + }, + "StreamingFIFO_24": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_2": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_11": { + "impl_style": "hls" + }, + "Thresholding_Batch_4": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_27": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_2": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_12": { + "impl_style": "hls" + }, + "StreamingFIFO_29": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_3": { + "SIMD": 16, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_2": { + "PE": 16, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_13": { + "impl_style": "hls" + }, + "Thresholding_Batch_5": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_14": { + "impl_style": "hls" + }, + "StreamingFIFO_34": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_3": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_15": { + "impl_style": "hls" + }, + "Thresholding_Batch_6": { + "PE": 2, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_37": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "FMPadding_Batch_3": { + "SIMD": 2 + }, + "StreamingDataWidthConverter_Batch_16": { + "impl_style": "hls" + }, + "StreamingFIFO_39": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_4": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_3": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_17": { + "impl_style": "hls" + }, + "Thresholding_Batch_7": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_18": { + "impl_style": "hls" + }, + "StreamingFIFO_44": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_4": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_19": { + "impl_style": "hls" + }, + "Thresholding_Batch_8": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_47": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_4": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_20": { + "impl_style": "hls" + }, + "StreamingFIFO_49": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_5": { + "SIMD": 8, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_4": { + "PE": 8, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_21": { + "impl_style": "hls" + }, + "Thresholding_Batch_9": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_22": { + "impl_style": "hls" + }, + "StreamingFIFO_54": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_5": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_23": { + "impl_style": "hls" + }, + "Thresholding_Batch_10": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_57": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_5": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_24": { + "impl_style": "hls" + }, + "StreamingFIFO_59": { + "ram_style": "ultra", + "depth": 8192, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_6": { + "SIMD": 2, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_5": { + "PE": 2, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_25": { + "impl_style": "hls" + }, + "Thresholding_Batch_11": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_26": { + "impl_style": "hls" + }, + "StreamingFIFO_64": { + "ram_style": "auto", + "depth": 32, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_6": { + "PE": 16, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_27": { + "impl_style": "hls" + }, + "Thresholding_Batch_12": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_67": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_6": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_28": { + "impl_style": "hls" + }, + "StreamingFIFO_69": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_7": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_6": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_29": { + "impl_style": "hls" + }, + "Thresholding_Batch_13": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_30": { + "impl_style": "hls" + }, + "StreamingFIFO_74": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_7": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_31": { + "impl_style": "hls" + }, + "Thresholding_Batch_14": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_77": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_7": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_32": { + "impl_style": "hls" + }, + "StreamingFIFO_79": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_8": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_7": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_33": { + "impl_style": "hls" + }, + "Thresholding_Batch_15": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_34": { + "impl_style": "hls" + }, + "StreamingFIFO_84": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_8": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_35": { + "impl_style": "hls" + }, + "Thresholding_Batch_16": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_87": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_8": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_36": { + "impl_style": "hls" + }, + "StreamingFIFO_89": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_9": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_8": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_37": { + "impl_style": "hls" + }, + "Thresholding_Batch_17": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_38": { + "impl_style": "hls" + }, + "StreamingFIFO_94": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_9": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_39": { + "impl_style": "hls" + }, + "Thresholding_Batch_18": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_97": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_9": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_40": { + "impl_style": "hls" + }, + "StreamingFIFO_99": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_10": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_9": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_41": { + "impl_style": "hls" + }, + "Thresholding_Batch_19": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_42": { + "impl_style": "hls" + }, + "StreamingFIFO_104": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_10": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_43": { + "impl_style": "hls" + }, + "Thresholding_Batch_20": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_107": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_10": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_44": { + "impl_style": "hls" + }, + "StreamingFIFO_109": { + "ram_style": "ultra", + "depth": 4096, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_11": { + "SIMD": 4, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_10": { + "PE": 4, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_45": { + "impl_style": "hls" + }, + "Thresholding_Batch_21": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_46": { + "impl_style": "hls" + }, + "StreamingFIFO_114": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_11": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_47": { + "impl_style": "hls" + }, + "Thresholding_Batch_22": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_117": { + "ram_style": "ultra", + "depth": 512, + "impl_style": "vivado" + }, + "FMPadding_Batch_11": { + "SIMD": 1 + }, + "StreamingFIFO_118": { + "ram_style": "ultra", + "depth": 16384, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_12": { + "SIMD": 1, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_11": { + "PE": 1, + "resType": "lut" + }, + "Thresholding_Batch_23": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_48": { + "impl_style": "hls" + }, + "StreamingFIFO_122": { + "ram_style": "auto", + "depth": 64, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_12": { + "PE": 16, + "SIMD": 8, + "ram_style": "ultra", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 1 + }, + "StreamingDataWidthConverter_Batch_49": { + "impl_style": "hls" + }, + "Thresholding_Batch_24": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingFIFO_125": { + "ram_style": "ultra", + "depth": 1024, + "impl_style": "vivado" + }, + "FMPadding_Batch_12": { + "SIMD": 1 + }, + "StreamingDataWidthConverter_Batch_50": { + "impl_style": "hls" + }, + "StreamingFIFO_127": { + "ram_style": "ultra", + "depth": 16384, + "impl_style": "vivado" + }, + "ConvolutionInputGenerator_13": { + "SIMD": 2, + "ram_style": "block" + }, + "Vector_Vector_Activate_Batch_12": { + "PE": 2, + "resType": "lut" + }, + "StreamingDataWidthConverter_Batch_51": { + "impl_style": "hls" + }, + "Thresholding_Batch_25": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "StreamingDataWidthConverter_Batch_52": { + "impl_style": "hls" + }, + "StreamingFIFO_132": { + "ram_style": "auto", + "depth": 128, + "impl_style": "rtl" + }, + "StreamingFCLayer_Batch_13": { + "PE": 32, + "SIMD": 8, + "ram_style": "ultra", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 1 + }, + "StreamingDataWidthConverter_Batch_53": { + "impl_style": "hls" + }, + "Thresholding_Batch_26": { + "PE": 1, + "ram_style": "distributed", + "mem_mode": "const", + "runtime_writeable_weights": 0 + }, + "ConvolutionInputGenerator_14": { + "SIMD": 1, + "ram_style": "block" + }, + "Pool_Batch_0": { + "PE": 1 + }, + "StreamingDataWidthConverter_Batch_54": { + "impl_style": "hls" + }, + "StreamingFCLayer_Batch_14": { + "PE": 1, + "SIMD": 16, + "ram_style": "ultra", + "resType": "lut", + "mem_mode": "decoupled", + "runtime_writeable_weights": 1 + }, + "ChannelwiseOp_Batch_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_Batch_0": { + "PE": 1 + } +} diff --git a/finn_examples/bitfiles/bitfiles.zip.link b/finn_examples/bitfiles/bitfiles.zip.link index b9e0520..6c05a3a 100644 --- a/finn_examples/bitfiles/bitfiles.zip.link +++ b/finn_examples/bitfiles/bitfiles.zip.link @@ -12,11 +12,11 @@ "md5sum": "59598d7f36ffdc74a0a0262f5b67423c" }, "ZCU104": { - "url": "https://github.com/Xilinx/finn-examples/releases/download/v0.0.1a/ZCU104.zip", - "md5sum": "cdc1b757a059b0bb2b7270b3081ae52e" + "url": "https://github.com/Xilinx/finn-examples/releases/download/mnv1-zcu104/ZCU104.zip", + "md5sum": "1ed10d74e85eec70fd094b2947b5b8e3" }, "xilinx_u250_xdma_201830_2": { - "url": "https://github.com/Xilinx/finn-examples/releases/download/v0.0.1a/xilinx_u250_xdma_201830_2.zip", - "md5sum": "5e8f3625fcf14aaa4fc7416fd9f15450" + "url": "https://github.com/Xilinx/finn-examples/releases/download/mnv1-u250-partitioned/xilinx_u250_xdma_201830_2.zip", + "md5sum": "d8c7d67c688f3471b6e2c53762b8b258" } } diff --git a/finn_examples/driver.py b/finn_examples/driver.py index 24f9f11..4dd5a08 100644 --- a/finn_examples/driver.py +++ b/finn_examples/driver.py @@ -32,6 +32,8 @@ from pynq import Overlay, allocate from pynq.ps import Clocks +from finn.core.datatype import DataType +from finn.util.basic import gen_finn_dt_tensor from finn.util.data_packing import ( finnpy_to_packed_bytearray, packed_bytearray_to_finnpy, @@ -84,25 +86,78 @@ def __init__( self.batch_size = batch_size self.fclk_mhz = fclk_mhz if self.platform == "alveo": - self.idma = self.idma0 + if "input_dma_name" in io_shape_dict.keys(): + self.idma = getattr(self, io_shape_dict["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 self.odma_handle = None elif self.platform == "zynq-iodma": - self.idma = self.idma0 + if "input_dma_name" in io_shape_dict.keys(): + self.idma = getattr(self, io_shape_dict["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 # set the clock frequency as specified by user during transformations if self.fclk_mhz > 0: Clocks.fclk0_mhz = self.fclk_mhz else: raise ValueError("Supported platforms are zynq-iodma alveo") - # load any runtime weights + # load any external + runtime weights + self.load_external_weights() self.load_runtime_weights() + def load_external_weights(self): + """Load any existing external (DRAM) weights from the specified dir into the + appropriate layer of the accelerator. Note that this must be enabled + during the accelerator build process. The weights directory + is specified as the class member ``runtime_weight_dir``. External (DRAM) + weights are one .npy file per layer. + """ + + self.external_weights = [] + w_filenames = [] + if not os.path.isdir(self.runtime_weight_dir): + return + for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir): + w_filenames.extend(filenames) + + tmp_weight_dict = {} + + for w_filename in w_filenames: + if w_filename.endswith(".npy"): + weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename) + else: + continue + + idma_name = w_filename.split(".")[0] + tmp_weight_dict[idma_name] = weight_tensor + + for idma_name in tmp_weight_dict.keys(): + if idma_name in self.ip_dict.keys(): + iwdma = getattr(self, idma_name) + weight_tensor = tmp_weight_dict[idma_name] + weight_buf = allocate(weight_tensor.shape, dtype=np.uint8) + weight_buf[:] = weight_tensor + # weight_buf.sync_to_device() + weight_buf.flush() + + self.external_weights += [(iwdma, weight_buf, idma_name)] + + if "number_of_external_weights" in self._io_shape_dict: + hw_ext_weights = self._io_shape_dict["number_of_external_weights"] + assert len(self.external_weights) == hw_ext_weights, ( + "Number of hardware external weights and number of external " + + "weight tensors available do not match. \n" + + "Is runtime_weight_dir pointing to the correct folder?" + ) + def load_runtime_weights(self, flush_accel=True, verify=True): - """Load any existing runtime weights from the specified dir into the + """Load any existing runtime-writable weights from the specified dir into the appropriate layer of the accelerator. Note that this must be enabled during the accelerator build process. The runtime weights directory - is specified as the class member ``runtime_weight_dir``. + is specified as the class member ``runtime_weight_dir``. Runtime-writable + weights are provided as one .dat file per layer. Parameters ---------- @@ -122,18 +177,25 @@ def load_runtime_weights(self, flush_accel=True, verify=True): if w_filename.endswith(".dat"): with open(self.runtime_weight_dir + "/" + w_filename, "r") as f: dat = f.read() + else: + continue layer_w = np.fromiter( [int(x, 16) for x in dat.strip().split()], dtype=np.uint32 ) - layer_ind = int(w_filename.split("_")[0]) - rt_weight_dict[layer_ind] = layer_w - for layer_ind in rt_weight_dict.keys(): - cand_if_name = "StreamingDataflowPartition_1/s_axilite_%d" % layer_ind + sdp_ind = int(w_filename.split("_")[0]) + layer_ind = int(w_filename.split("_")[1]) + rt_weight_dict[(sdp_ind, layer_ind)] = layer_w + for sdp_ind, layer_ind in rt_weight_dict.keys(): + cand_if_name = "StreamingDataflowPartition_%d/s_axilite_%d" % ( + sdp_ind, + layer_ind, + ) if cand_if_name in self.ip_dict.keys(): layer_mmio = getattr( - self.StreamingDataflowPartition_1, "s_axilite_%d" % layer_ind + getattr(self, "StreamingDataflowPartition_%d" % sdp_ind), + "s_axilite_%d" % layer_ind, ).mmio - layer_w = rt_weight_dict[layer_ind] + layer_w = rt_weight_dict[(sdp_ind, layer_ind)] layer_mmio.write_mm(0, layer_w.tobytes()) if verify: new_w = np.copy(layer_mmio.array[: layer_w.shape[0]]) @@ -278,6 +340,10 @@ def execute_on_buffers(self, asynch=False, batch_size=None): if self.platform == "zynq-iodma": assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle" # manually launch IODMAs since signatures are missing + for iwdma, iwbuf, iwdma_name in self.external_weights: + iwdma.write(0x10, iwbuf.device_address) + iwdma.write(0x1C, batch_size) + iwdma.write(0x00, 1) self.idma.write(0x10, self.ibuf_packed_device.device_address) self.idma.write(0x1C, batch_size) self.odma.write(0x10, self.obuf_packed_device.device_address) @@ -287,6 +353,8 @@ def execute_on_buffers(self, asynch=False, batch_size=None): elif self.platform == "alveo": assert self.odma_handle is None, "Output DMA is already running" self.idma.start(self.ibuf_packed_device, batch_size) + for iwdma, iwbuf, iwdma_name in self.external_weights: + iwdma.start(iwbuf, batch_size) self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size) else: raise Exception("Unrecognized platform: %s" % self.platform) @@ -338,46 +406,55 @@ def throughput_test(self): res["DRAM_out_bandwidth[Mb/s]"] = ( np.prod(self.oshape_packed) * 0.000001 / runtime ) - if self.platform != "alveo": + for iwdma, iwbuf, iwdma_name in self.external_weights: + res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = ( + self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime + ) + if self.platform == "zynq-iodma": res["fclk[mhz]"] = Clocks.fclk0_mhz - else: - res["fclk[mhz]"] = self.fclk_mhz + elif self.platform == "alveo": + res["fclk[mhz]"] = self.clock_dict["clock0"]["frequency"] res["batch_size"] = self.batch_size # also benchmark driver-related overheads - input_npy = np.zeros(self.ishape_normal, dtype=self.idt.to_numpy_dt()) + input_npy = gen_finn_dt_tensor(self.idt, self.ishape_normal) + # provide as int8/uint8 to support fast packing path where possible + if self.idt == DataType.UINT8: + input_npy = input_npy.astype(np.uint8) + elif self.idt == DataType.INT8: + input_npy = input_npy.astype(np.int8) start = time.time() ibuf_folded = self.fold_input(input_npy) end = time.time() runtime = end - start - res["fold_input[ms]"] = runtime + res["fold_input[ms]"] = runtime * 1000 start = time.time() ibuf_packed = self.pack_input(ibuf_folded) end = time.time() runtime = end - start - res["pack_input[ms]"] = runtime + res["pack_input[ms]"] = runtime * 1000 start = time.time() self.copy_input_data_to_device(ibuf_packed) end = time.time() runtime = end - start - res["copy_input_data_to_device[ms]"] = runtime + res["copy_input_data_to_device[ms]"] = runtime * 1000 start = time.time() self.copy_output_data_from_device(self.obuf_packed) end = time.time() runtime = end - start - res["copy_output_data_from_device[ms]"] = runtime + res["copy_output_data_from_device[ms]"] = runtime * 1000 start = time.time() obuf_folded = self.unpack_output(self.obuf_packed) end = time.time() runtime = end - start - res["unpack_output[ms]"] = runtime + res["unpack_output[ms]"] = runtime * 1000 start = time.time() self.unfold_output(obuf_folded) end = time.time() runtime = end - start - res["unfold_output[ms]"] = runtime + res["unfold_output[ms]"] = runtime * 1000 return res diff --git a/finn_examples/models.py b/finn_examples/models.py index fc5da8e..6e6e147 100644 --- a/finn_examples/models.py +++ b/finn_examples/models.py @@ -104,6 +104,28 @@ def find_bitfile(model_name, target_platform): ) +def find_runtime_weights(model_name, target_platform): + weight_dir = "%s_runtime_weights" % (model_name) + weight_dir_candidates = [ + pk.resource_filename( + "finn_examples", "bitfiles/%s/%s" % (target_platform, weight_dir) + ), + pk.resource_filename( + "finn_examples", + "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, weight_dir), + ), + ] + for candidate in weight_dir_candidates: + if os.path.isdir(candidate): + weight_files = os.listdir(candidate) + if weight_files: + return candidate + raise Exception( + "Runtime weights for model = %s target platform = %s not found. Looked in: %s" + % (model_name, target_platform, str(weight_dir_candidates)) + ) + + def get_driver_mode(): driver_modes = {"edge": "zynq-iodma", "pcie": "alveo"} return driver_modes[get_edge_or_pcie()] @@ -170,4 +192,16 @@ def mobilenetv1_w4a4_imagenet(target_platform=None): driver_mode = get_driver_mode() model_name = "mobilenetv1-w4a4" filename = find_bitfile(model_name, target_platform) - return FINNExampleOverlay(filename, driver_mode, _imagenet_top5inds_io_shape_dict) + if target_platform in ["ZCU104"]: + runtime_weight_dir = find_runtime_weights(model_name, target_platform) + else: + runtime_weight_dir = None + # target 185 MHz for Zynq (this is ignored for Alveo) + fclk_mhz = 185.0 + return FINNExampleOverlay( + filename, + driver_mode, + _imagenet_top5inds_io_shape_dict, + runtime_weight_dir=runtime_weight_dir, + fclk_mhz=fclk_mhz, + ) diff --git a/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb b/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb old mode 100644 new mode 100755 index 327d701..4574435 --- a/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb +++ b/finn_examples/notebooks/2_imagenet_with_mobilenet_v1.ipynb @@ -69,7 +69,9 @@ "metadata": {}, "outputs": [], "source": [ - "accel = models.mobilenetv1_w4a4_imagenet()" + "accel = models.mobilenetv1_w4a4_imagenet()\n", + "#some systems might require a manual platform setting:\n", + "#accel = models.mobilenetv1_w4a4_imagenet(\"ZCU102\")" ] }, { @@ -91,87 +93,94 @@ "print(\"Expected output shape and datatype: %s %s\" % (str(accel.oshape_normal), str(accel.odt)))" ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n", - "Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n", - "To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n", - "Requirement already satisfied: torchvision in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (0.8.2)\n", - "Requirement already satisfied: pillow>=4.1.1 in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (7.0.0)\n", - "Requirement already satisfied: numpy in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (1.18.1)\n", - "Requirement already satisfied: torch==1.7.1 in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torchvision) (1.7.1)\n", - "Requirement already satisfied: typing-extensions in /scratch/users/yamanu/conda/finn-examples/lib/python3.7/site-packages (from torch==1.7.1->torchvision) (3.7.4.3)\n" - ] - } - ], - "source": [ - "! pip install torchvision" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Load the ImageNet validation dataset" + "# Prepare loading of ImageNet validation dataset" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'/proj/xlabs_t3/users/ml-workspace/datasets/imagenet/raw-images/imagenet_symlink/val'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "/proj/xlabs_t3/users/ml-workspace/datasets/imagenet/raw-images/imagenet_symlink/val\n" + ] } ], "source": [ - "import torchvision.transforms as transforms\n", - "import torchvision.datasets as datasets\n", - "import torch\n", "import numpy as np\n", + "from PIL import Image\n", + "from dataset_loading import FileQueue, ImgQueue\n", "import os\n", "\n", - "os.environ[\"IMAGENET_VAL_PATH\"]" + "# 2 ways to provide the data:\n", + "# without a label file: expect images in 1000 sorted subfolders\n", + "# with a label file: expect images directly in val directory\n", + "val_dir = os.environ[\"IMAGENET_VAL_PATH\"]\n", + "label_file = None\n", + "print(val_dir)" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "valdir = os.environ[\"IMAGENET_VAL_PATH\"]\n", - "batch_size = 1\n", - "val_loader = torch.utils.data.DataLoader(\n", - " datasets.ImageFolder(valdir, transforms.Compose([\n", - " transforms.Resize(256),\n", - " transforms.CenterCrop(224),\n", - " transforms.Lambda(lambda x: np.array(x, dtype=np.uint8))\n", - " ])),\n", - " batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "test_single_x, test_single_y = val_loader.sampler.data_source.__getitem__(0)" + "def img_resize(img, size):\n", + " w, h = img.size\n", + " if (w <= h and w == size) or (h <= w and h == size):\n", + " return img\n", + " if w < h:\n", + " ow = size\n", + " oh = int(size * h / w)\n", + " return img.resize((ow, oh), Image.BILINEAR)\n", + " else:\n", + " oh = size\n", + " ow = int(size * w / h)\n", + " return img.resize((ow, oh), Image.BILINEAR)\n", + "\n", + "def img_center_crop(img, size):\n", + " crop_height, crop_width = (size, size)\n", + " image_width, image_height = img.size\n", + " crop_top = int(round((image_height - crop_height) / 2.))\n", + " crop_left = int(round((image_width - crop_width) / 2.))\n", + " return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height))\n", + "\n", + "def pre_process(img_np):\n", + " img = Image.fromarray(img_np.astype(np.uint8))\n", + " img = img_resize(img, 256)\n", + " img = img_center_crop(img, 224)\n", + " img = np.array(img, dtype=np.uint8)\n", + " return img\n", + "\n", + "def setup_dataloader(val_path, label_file_path = None, batch_size=100, n_images = 50000):\n", + " if label_file_path is None:\n", + " val_folders = [ f.name for f in os.scandir(val_path) if f.is_dir() ]\n", + " assert len(val_folders) == 1000, \"Expected 1000 subfolders in ILSVRC2012 val\"\n", + " files = []\n", + " labels = []\n", + " for idx, folder in enumerate(val_folders):\n", + " current_files = sorted(os.listdir(os.path.join(val_path, folder)))\n", + " current_files = [os.path.join(folder, file) for file in current_files]\n", + " files.extend(current_files)\n", + " labels.extend([idx]*len(current_files))\n", + " files = files[:n_images]\n", + " else:\n", + " files = ['ILSVRC2012_val_{:08d}.JPEG'.format(i) for i in range(1,n_images+1)]\n", + " labels = np.loadtxt(label_file_path, dtype=int, usecols=1)\n", + "\n", + " file_queue = FileQueue()\n", + " file_queue.load_epochs(list(zip(files,labels)), shuffle=False)\n", + " img_queue = ImgQueue(maxsize=batch_size)\n", + " img_queue.start_loaders(file_queue, num_threads=4, img_dir=val_path, transform=pre_process)\n", + " return img_queue" ] }, { @@ -183,12 +192,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -200,15 +209,20 @@ } ], "source": [ + "%matplotlib inline\n", "from matplotlib import pyplot as plt\n", "\n", - "plt.imshow(test_single_x.reshape(224,224,3))\n", + "img_queue = setup_dataloader(val_dir, label_file, 1, 1)\n", + "\n", + "test_single_x, test_single_y = img_queue.get()\n", + "\n", + "plt.imshow(test_single_x)\n", "plt.show()" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -217,7 +231,7 @@ "0" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -228,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -246,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -255,14 +269,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Top-5 classes predicted by the accelerator: [[[[391. 48. 0. 39. 395.]]]]\n" + "Top-5 classes predicted by the accelerator: [[[[391. 0. 395. 394. 48.]]]]\n" ] } ], @@ -272,14 +286,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2.71 ms ± 22.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "2.18 ms ± 6.29 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -297,102 +311,561 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Accelerator buffer shapes are (1000, 224, 224, 1, 3) for input, (1000, 1, 1, 1, 10) for output\n" + "Accelerator buffer shapes are (100, 224, 224, 1, 3) for input, (100, 1, 1, 1, 10) for output\n" ] } ], "source": [ - "import numpy as np\n", - "\n", - "batch_size = 1000\n", + "batch_size = 100\n", "accel.batch_size = batch_size\n", - "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )\n", - "obuf_packed = np.empty_like(accel.obuf_packed_device)\n", - "val_loader = torch.utils.data.DataLoader(\n", - " datasets.ImageFolder(valdir, transforms.Compose([\n", - " transforms.Resize(256),\n", - " transforms.CenterCrop(224),\n", - " transforms.Lambda(lambda x: np.array(x, dtype=np.uint8))\n", - " ])),\n", - " batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)" + "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )" ] }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 13, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "batch 1 : total OK 844 NOK 156\n", - "batch 2 : total OK 1589 NOK 411\n", - "batch 3 : total OK 2296 NOK 704\n", - "batch 4 : total OK 2967 NOK 1033\n", - "batch 5 : total OK 3842 NOK 1158\n", - "batch 6 : total OK 4622 NOK 1378\n", - "batch 7 : total OK 5437 NOK 1563\n", - "batch 8 : total OK 6247 NOK 1753\n", - "batch 9 : total OK 6949 NOK 2051\n", - "batch 10 : total OK 7675 NOK 2325\n", - "batch 11 : total OK 8445 NOK 2555\n", - "batch 12 : total OK 9172 NOK 2828\n", - "batch 13 : total OK 9935 NOK 3065\n", - "batch 14 : total OK 10671 NOK 3329\n", - "batch 15 : total OK 11444 NOK 3556\n", - "batch 16 : total OK 12173 NOK 3827\n", - "batch 17 : total OK 13030 NOK 3970\n", - "batch 18 : total OK 13765 NOK 4235\n", - "batch 19 : total OK 14550 NOK 4450\n", - "batch 20 : total OK 15292 NOK 4708\n", - "batch 21 : total OK 15973 NOK 5027\n", - "batch 22 : total OK 16643 NOK 5357\n", - "batch 23 : total OK 17294 NOK 5706\n", - "batch 24 : total OK 17922 NOK 6078\n", - "batch 25 : total OK 18528 NOK 6472\n", - "batch 26 : total OK 19139 NOK 6861\n", - "batch 27 : total OK 19806 NOK 7194\n", - "batch 28 : total OK 20487 NOK 7513\n", - "batch 29 : total OK 21251 NOK 7749\n", - "batch 30 : total OK 21891 NOK 8109\n", - "batch 31 : total OK 22590 NOK 8410\n", - "batch 32 : total OK 23150 NOK 8850\n", - "batch 33 : total OK 23804 NOK 9196\n", - "batch 34 : total OK 24450 NOK 9550\n", - "batch 35 : total OK 25115 NOK 9885\n", - "batch 36 : total OK 25799 NOK 10201\n", - "batch 37 : total OK 26470 NOK 10530\n", - "batch 38 : total OK 27106 NOK 10894\n", - "batch 39 : total OK 27777 NOK 11223\n", - "batch 40 : total OK 28422 NOK 11578\n", - "batch 41 : total OK 29092 NOK 11908\n", - "batch 42 : total OK 29712 NOK 12288\n", - "batch 43 : total OK 30363 NOK 12637\n", - "batch 44 : total OK 31033 NOK 12967\n", - "batch 45 : total OK 31664 NOK 13336\n", - "batch 46 : total OK 32305 NOK 13695\n", - "batch 47 : total OK 33019 NOK 13981\n", - "batch 48 : total OK 33809 NOK 14191\n", - "batch 49 : total OK 34391 NOK 14609\n", - "batch 50 : total OK 35206 NOK 14794\n" + "batch 1 : total OK 88 NOK 12\n", + "batch 2 : total OK 164 NOK 36\n", + "batch 3 : total OK 241 NOK 59\n", + "batch 4 : total OK 311 NOK 89\n", + "batch 5 : total OK 403 NOK 97\n", + "batch 6 : total OK 491 NOK 109\n", + "batch 7 : total OK 580 NOK 120\n", + "batch 8 : total OK 670 NOK 130\n", + "batch 9 : total OK 757 NOK 143\n", + "batch 10 : total OK 846 NOK 154\n", + "batch 11 : total OK 927 NOK 173\n", + "batch 12 : total OK 1018 NOK 182\n", + "batch 13 : total OK 1110 NOK 190\n", + "batch 14 : total OK 1180 NOK 220\n", + "batch 15 : total OK 1262 NOK 238\n", + "batch 16 : total OK 1337 NOK 263\n", + "batch 17 : total OK 1395 NOK 305\n", + "batch 18 : total OK 1454 NOK 346\n", + "batch 19 : total OK 1521 NOK 379\n", + "batch 20 : total OK 1590 NOK 410\n", + "batch 21 : total OK 1650 NOK 450\n", + "batch 22 : total OK 1728 NOK 472\n", + "batch 23 : total OK 1798 NOK 502\n", + "batch 24 : total OK 1861 NOK 539\n", + "batch 25 : total OK 1933 NOK 567\n", + "batch 26 : total OK 2019 NOK 581\n", + "batch 27 : total OK 2093 NOK 607\n", + "batch 28 : total OK 2145 NOK 655\n", + "batch 29 : total OK 2225 NOK 675\n", + "batch 30 : total OK 2294 NOK 706\n", + "batch 31 : total OK 2352 NOK 748\n", + "batch 32 : total OK 2412 NOK 788\n", + "batch 33 : total OK 2474 NOK 826\n", + "batch 34 : total OK 2529 NOK 871\n", + "batch 35 : total OK 2593 NOK 907\n", + "batch 36 : total OK 2677 NOK 923\n", + "batch 37 : total OK 2750 NOK 950\n", + "batch 38 : total OK 2817 NOK 983\n", + "batch 39 : total OK 2895 NOK 1005\n", + "batch 40 : total OK 2965 NOK 1035\n", + "batch 41 : total OK 3051 NOK 1049\n", + "batch 42 : total OK 3138 NOK 1062\n", + "batch 43 : total OK 3226 NOK 1074\n", + "batch 44 : total OK 3310 NOK 1090\n", + "batch 45 : total OK 3402 NOK 1098\n", + "batch 46 : total OK 3491 NOK 1109\n", + "batch 47 : total OK 3580 NOK 1120\n", + "batch 48 : total OK 3672 NOK 1128\n", + "batch 49 : total OK 3756 NOK 1144\n", + "batch 50 : total OK 3836 NOK 1164\n", + "batch 51 : total OK 3915 NOK 1185\n", + "batch 52 : total OK 3997 NOK 1203\n", + "batch 53 : total OK 4079 NOK 1221\n", + "batch 54 : total OK 4154 NOK 1246\n", + "batch 55 : total OK 4230 NOK 1270\n", + "batch 56 : total OK 4309 NOK 1291\n", + "batch 57 : total OK 4383 NOK 1317\n", + "batch 58 : total OK 4458 NOK 1342\n", + "batch 59 : total OK 4538 NOK 1362\n", + "batch 60 : total OK 4606 NOK 1394\n", + "batch 61 : total OK 4677 NOK 1423\n", + "batch 62 : total OK 4756 NOK 1444\n", + "batch 63 : total OK 4817 NOK 1483\n", + "batch 64 : total OK 4891 NOK 1509\n", + "batch 65 : total OK 4978 NOK 1522\n", + "batch 66 : total OK 5067 NOK 1533\n", + "batch 67 : total OK 5152 NOK 1548\n", + "batch 68 : total OK 5235 NOK 1565\n", + "batch 69 : total OK 5326 NOK 1574\n", + "batch 70 : total OK 5418 NOK 1582\n", + "batch 71 : total OK 5503 NOK 1597\n", + "batch 72 : total OK 5589 NOK 1611\n", + "batch 73 : total OK 5678 NOK 1622\n", + "batch 74 : total OK 5763 NOK 1637\n", + "batch 75 : total OK 5853 NOK 1647\n", + "batch 76 : total OK 5923 NOK 1677\n", + "batch 77 : total OK 6000 NOK 1700\n", + "batch 78 : total OK 6081 NOK 1719\n", + "batch 79 : total OK 6172 NOK 1728\n", + "batch 80 : total OK 6231 NOK 1769\n", + "batch 81 : total OK 6314 NOK 1786\n", + "batch 82 : total OK 6374 NOK 1826\n", + "batch 83 : total OK 6441 NOK 1859\n", + "batch 84 : total OK 6490 NOK 1910\n", + "batch 85 : total OK 6570 NOK 1930\n", + "batch 86 : total OK 6638 NOK 1962\n", + "batch 87 : total OK 6709 NOK 1991\n", + "batch 88 : total OK 6779 NOK 2021\n", + "batch 89 : total OK 6858 NOK 2042\n", + "batch 90 : total OK 6934 NOK 2066\n", + "batch 91 : total OK 7007 NOK 2093\n", + "batch 92 : total OK 7086 NOK 2114\n", + "batch 93 : total OK 7158 NOK 2142\n", + "batch 94 : total OK 7224 NOK 2176\n", + "batch 95 : total OK 7290 NOK 2210\n", + "batch 96 : total OK 7368 NOK 2232\n", + "batch 97 : total OK 7426 NOK 2274\n", + "batch 98 : total OK 7510 NOK 2290\n", + "batch 99 : total OK 7581 NOK 2319\n", + "batch 100 : total OK 7661 NOK 2339\n", + "batch 101 : total OK 7720 NOK 2380\n", + "batch 102 : total OK 7805 NOK 2395\n", + "batch 103 : total OK 7874 NOK 2426\n", + "batch 104 : total OK 7957 NOK 2443\n", + "batch 105 : total OK 8038 NOK 2462\n", + "batch 106 : total OK 8111 NOK 2489\n", + "batch 107 : total OK 8185 NOK 2515\n", + "batch 108 : total OK 8269 NOK 2531\n", + "batch 109 : total OK 8358 NOK 2542\n", + "batch 110 : total OK 8439 NOK 2561\n", + "batch 111 : total OK 8514 NOK 2586\n", + "batch 112 : total OK 8585 NOK 2615\n", + "batch 113 : total OK 8666 NOK 2634\n", + "batch 114 : total OK 8722 NOK 2678\n", + "batch 115 : total OK 8808 NOK 2692\n", + "batch 116 : total OK 8868 NOK 2732\n", + "batch 117 : total OK 8939 NOK 2761\n", + "batch 118 : total OK 9022 NOK 2778\n", + "batch 119 : total OK 9094 NOK 2806\n", + "batch 120 : total OK 9165 NOK 2835\n", + "batch 121 : total OK 9213 NOK 2887\n", + "batch 122 : total OK 9287 NOK 2913\n", + "batch 123 : total OK 9376 NOK 2924\n", + "batch 124 : total OK 9446 NOK 2954\n", + "batch 125 : total OK 9510 NOK 2990\n", + "batch 126 : total OK 9579 NOK 3021\n", + "batch 127 : total OK 9659 NOK 3041\n", + "batch 128 : total OK 9757 NOK 3043\n", + "batch 129 : total OK 9832 NOK 3068\n", + "batch 130 : total OK 9926 NOK 3074\n", + "batch 131 : total OK 10010 NOK 3090\n", + "batch 132 : total OK 10097 NOK 3103\n", + "batch 133 : total OK 10153 NOK 3147\n", + "batch 134 : total OK 10212 NOK 3188\n", + "batch 135 : total OK 10292 NOK 3208\n", + "batch 136 : total OK 10353 NOK 3247\n", + "batch 137 : total OK 10423 NOK 3277\n", + "batch 138 : total OK 10516 NOK 3284\n", + "batch 139 : total OK 10588 NOK 3312\n", + "batch 140 : total OK 10666 NOK 3334\n", + "batch 141 : total OK 10731 NOK 3369\n", + "batch 142 : total OK 10790 NOK 3410\n", + "batch 143 : total OK 10860 NOK 3440\n", + "batch 144 : total OK 10938 NOK 3462\n", + "batch 145 : total OK 11025 NOK 3475\n", + "batch 146 : total OK 11106 NOK 3494\n", + "batch 147 : total OK 11197 NOK 3503\n", + "batch 148 : total OK 11283 NOK 3517\n", + "batch 149 : total OK 11363 NOK 3537\n", + "batch 150 : total OK 11439 NOK 3561\n", + "batch 151 : total OK 11522 NOK 3578\n", + "batch 152 : total OK 11585 NOK 3615\n", + "batch 153 : total OK 11658 NOK 3642\n", + "batch 154 : total OK 11745 NOK 3655\n", + "batch 155 : total OK 11824 NOK 3676\n", + "batch 156 : total OK 11889 NOK 3711\n", + "batch 157 : total OK 11950 NOK 3750\n", + "batch 158 : total OK 12010 NOK 3790\n", + "batch 159 : total OK 12098 NOK 3802\n", + "batch 160 : total OK 12177 NOK 3823\n", + "batch 161 : total OK 12270 NOK 3830\n", + "batch 162 : total OK 12359 NOK 3841\n", + "batch 163 : total OK 12450 NOK 3850\n", + "batch 164 : total OK 12540 NOK 3860\n", + "batch 165 : total OK 12613 NOK 3887\n", + "batch 166 : total OK 12695 NOK 3905\n", + "batch 167 : total OK 12789 NOK 3911\n", + "batch 168 : total OK 12871 NOK 3929\n", + "batch 169 : total OK 12955 NOK 3945\n", + "batch 170 : total OK 13047 NOK 3953\n", + "batch 171 : total OK 13119 NOK 3981\n", + "batch 172 : total OK 13202 NOK 3998\n", + "batch 173 : total OK 13276 NOK 4024\n", + "batch 174 : total OK 13363 NOK 4037\n", + "batch 175 : total OK 13422 NOK 4078\n", + "batch 176 : total OK 13514 NOK 4086\n", + "batch 177 : total OK 13579 NOK 4121\n", + "batch 178 : total OK 13668 NOK 4132\n", + "batch 179 : total OK 13728 NOK 4172\n", + "batch 180 : total OK 13785 NOK 4215\n", + "batch 181 : total OK 13866 NOK 4234\n", + "batch 182 : total OK 13947 NOK 4253\n", + "batch 183 : total OK 14037 NOK 4263\n", + "batch 184 : total OK 14115 NOK 4285\n", + "batch 185 : total OK 14188 NOK 4312\n", + "batch 186 : total OK 14261 NOK 4339\n", + "batch 187 : total OK 14335 NOK 4365\n", + "batch 188 : total OK 14401 NOK 4399\n", + "batch 189 : total OK 14489 NOK 4411\n", + "batch 190 : total OK 14564 NOK 4436\n", + "batch 191 : total OK 14610 NOK 4490\n", + "batch 192 : total OK 14680 NOK 4520\n", + "batch 193 : total OK 14747 NOK 4553\n", + "batch 194 : total OK 14828 NOK 4572\n", + "batch 195 : total OK 14914 NOK 4586\n", + "batch 196 : total OK 14979 NOK 4621\n", + "batch 197 : total OK 15067 NOK 4633\n", + "batch 198 : total OK 15134 NOK 4666\n", + "batch 199 : total OK 15225 NOK 4675\n", + "batch 200 : total OK 15306 NOK 4694\n", + "batch 201 : total OK 15371 NOK 4729\n", + "batch 202 : total OK 15434 NOK 4766\n", + "batch 203 : total OK 15516 NOK 4784\n", + "batch 204 : total OK 15595 NOK 4805\n", + "batch 205 : total OK 15664 NOK 4836\n", + "batch 206 : total OK 15742 NOK 4858\n", + "batch 207 : total OK 15797 NOK 4903\n", + "batch 208 : total OK 15834 NOK 4966\n", + "batch 209 : total OK 15920 NOK 4980\n", + "batch 210 : total OK 15973 NOK 5027\n", + "batch 211 : total OK 16048 NOK 5052\n", + "batch 212 : total OK 16108 NOK 5092\n", + "batch 213 : total OK 16183 NOK 5117\n", + "batch 214 : total OK 16258 NOK 5142\n", + "batch 215 : total OK 16328 NOK 5172\n", + "batch 216 : total OK 16410 NOK 5190\n", + "batch 217 : total OK 16476 NOK 5224\n", + "batch 218 : total OK 16520 NOK 5280\n", + "batch 219 : total OK 16587 NOK 5313\n", + "batch 220 : total OK 16649 NOK 5351\n", + "batch 221 : total OK 16719 NOK 5381\n", + "batch 222 : total OK 16768 NOK 5432\n", + "batch 223 : total OK 16839 NOK 5461\n", + "batch 224 : total OK 16893 NOK 5507\n", + "batch 225 : total OK 16971 NOK 5529\n", + "batch 226 : total OK 17056 NOK 5544\n", + "batch 227 : total OK 17121 NOK 5579\n", + "batch 228 : total OK 17175 NOK 5625\n", + "batch 229 : total OK 17241 NOK 5659\n", + "batch 230 : total OK 17313 NOK 5687\n", + "batch 231 : total OK 17368 NOK 5732\n", + "batch 232 : total OK 17422 NOK 5778\n", + "batch 233 : total OK 17462 NOK 5838\n", + "batch 234 : total OK 17545 NOK 5855\n", + "batch 235 : total OK 17596 NOK 5904\n", + "batch 236 : total OK 17663 NOK 5937\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "batch 237 : total OK 17729 NOK 5971\n", + "batch 238 : total OK 17807 NOK 5993\n", + "batch 239 : total OK 17882 NOK 6018\n", + "batch 240 : total OK 17926 NOK 6074\n", + "batch 241 : total OK 17989 NOK 6111\n", + "batch 242 : total OK 18044 NOK 6156\n", + "batch 243 : total OK 18096 NOK 6204\n", + "batch 244 : total OK 18172 NOK 6228\n", + "batch 245 : total OK 18220 NOK 6280\n", + "batch 246 : total OK 18297 NOK 6303\n", + "batch 247 : total OK 18342 NOK 6358\n", + "batch 248 : total OK 18412 NOK 6388\n", + "batch 249 : total OK 18491 NOK 6409\n", + "batch 250 : total OK 18536 NOK 6464\n", + "batch 251 : total OK 18592 NOK 6508\n", + "batch 252 : total OK 18644 NOK 6556\n", + "batch 253 : total OK 18693 NOK 6607\n", + "batch 254 : total OK 18760 NOK 6640\n", + "batch 255 : total OK 18824 NOK 6676\n", + "batch 256 : total OK 18902 NOK 6698\n", + "batch 257 : total OK 18960 NOK 6740\n", + "batch 258 : total OK 19022 NOK 6778\n", + "batch 259 : total OK 19081 NOK 6819\n", + "batch 260 : total OK 19153 NOK 6847\n", + "batch 261 : total OK 19230 NOK 6870\n", + "batch 262 : total OK 19290 NOK 6910\n", + "batch 263 : total OK 19351 NOK 6949\n", + "batch 264 : total OK 19407 NOK 6993\n", + "batch 265 : total OK 19483 NOK 7017\n", + "batch 266 : total OK 19540 NOK 7060\n", + "batch 267 : total OK 19619 NOK 7081\n", + "batch 268 : total OK 19693 NOK 7107\n", + "batch 269 : total OK 19766 NOK 7134\n", + "batch 270 : total OK 19831 NOK 7169\n", + "batch 271 : total OK 19897 NOK 7203\n", + "batch 272 : total OK 19943 NOK 7257\n", + "batch 273 : total OK 20018 NOK 7282\n", + "batch 274 : total OK 20100 NOK 7300\n", + "batch 275 : total OK 20167 NOK 7333\n", + "batch 276 : total OK 20241 NOK 7359\n", + "batch 277 : total OK 20320 NOK 7380\n", + "batch 278 : total OK 20406 NOK 7394\n", + "batch 279 : total OK 20463 NOK 7437\n", + "batch 280 : total OK 20511 NOK 7489\n", + "batch 281 : total OK 20595 NOK 7505\n", + "batch 282 : total OK 20665 NOK 7535\n", + "batch 283 : total OK 20750 NOK 7550\n", + "batch 284 : total OK 20805 NOK 7595\n", + "batch 285 : total OK 20885 NOK 7615\n", + "batch 286 : total OK 20962 NOK 7638\n", + "batch 287 : total OK 21041 NOK 7659\n", + "batch 288 : total OK 21124 NOK 7676\n", + "batch 289 : total OK 21208 NOK 7692\n", + "batch 290 : total OK 21273 NOK 7727\n", + "batch 291 : total OK 21352 NOK 7748\n", + "batch 292 : total OK 21424 NOK 7776\n", + "batch 293 : total OK 21461 NOK 7839\n", + "batch 294 : total OK 21523 NOK 7877\n", + "batch 295 : total OK 21577 NOK 7923\n", + "batch 296 : total OK 21635 NOK 7965\n", + "batch 297 : total OK 21707 NOK 7993\n", + "batch 298 : total OK 21788 NOK 8012\n", + "batch 299 : total OK 21841 NOK 8059\n", + "batch 300 : total OK 21905 NOK 8095\n", + "batch 301 : total OK 21944 NOK 8156\n", + "batch 302 : total OK 22022 NOK 8178\n", + "batch 303 : total OK 22104 NOK 8196\n", + "batch 304 : total OK 22188 NOK 8212\n", + "batch 305 : total OK 22259 NOK 8241\n", + "batch 306 : total OK 22339 NOK 8261\n", + "batch 307 : total OK 22420 NOK 8280\n", + "batch 308 : total OK 22494 NOK 8306\n", + "batch 309 : total OK 22575 NOK 8325\n", + "batch 310 : total OK 22610 NOK 8390\n", + "batch 311 : total OK 22658 NOK 8442\n", + "batch 312 : total OK 22694 NOK 8506\n", + "batch 313 : total OK 22768 NOK 8532\n", + "batch 314 : total OK 22829 NOK 8571\n", + "batch 315 : total OK 22907 NOK 8593\n", + "batch 316 : total OK 22976 NOK 8624\n", + "batch 317 : total OK 23012 NOK 8688\n", + "batch 318 : total OK 23069 NOK 8731\n", + "batch 319 : total OK 23138 NOK 8762\n", + "batch 320 : total OK 23166 NOK 8834\n", + "batch 321 : total OK 23243 NOK 8857\n", + "batch 322 : total OK 23312 NOK 8888\n", + "batch 323 : total OK 23395 NOK 8905\n", + "batch 324 : total OK 23467 NOK 8933\n", + "batch 325 : total OK 23534 NOK 8966\n", + "batch 326 : total OK 23583 NOK 9017\n", + "batch 327 : total OK 23643 NOK 9057\n", + "batch 328 : total OK 23704 NOK 9096\n", + "batch 329 : total OK 23741 NOK 9159\n", + "batch 330 : total OK 23813 NOK 9187\n", + "batch 331 : total OK 23885 NOK 9215\n", + "batch 332 : total OK 23935 NOK 9265\n", + "batch 333 : total OK 23976 NOK 9324\n", + "batch 334 : total OK 24035 NOK 9365\n", + "batch 335 : total OK 24124 NOK 9376\n", + "batch 336 : total OK 24201 NOK 9399\n", + "batch 337 : total OK 24266 NOK 9434\n", + "batch 338 : total OK 24327 NOK 9473\n", + "batch 339 : total OK 24383 NOK 9517\n", + "batch 340 : total OK 24458 NOK 9542\n", + "batch 341 : total OK 24507 NOK 9593\n", + "batch 342 : total OK 24579 NOK 9621\n", + "batch 343 : total OK 24664 NOK 9636\n", + "batch 344 : total OK 24731 NOK 9669\n", + "batch 345 : total OK 24792 NOK 9708\n", + "batch 346 : total OK 24852 NOK 9748\n", + "batch 347 : total OK 24907 NOK 9793\n", + "batch 348 : total OK 24978 NOK 9822\n", + "batch 349 : total OK 25041 NOK 9859\n", + "batch 350 : total OK 25113 NOK 9887\n", + "batch 351 : total OK 25184 NOK 9916\n", + "batch 352 : total OK 25247 NOK 9953\n", + "batch 353 : total OK 25315 NOK 9985\n", + "batch 354 : total OK 25385 NOK 10015\n", + "batch 355 : total OK 25454 NOK 10046\n", + "batch 356 : total OK 25505 NOK 10095\n", + "batch 357 : total OK 25580 NOK 10120\n", + "batch 358 : total OK 25655 NOK 10145\n", + "batch 359 : total OK 25727 NOK 10173\n", + "batch 360 : total OK 25794 NOK 10206\n", + "batch 361 : total OK 25863 NOK 10237\n", + "batch 362 : total OK 25946 NOK 10254\n", + "batch 363 : total OK 26008 NOK 10292\n", + "batch 364 : total OK 26088 NOK 10312\n", + "batch 365 : total OK 26129 NOK 10371\n", + "batch 366 : total OK 26183 NOK 10417\n", + "batch 367 : total OK 26240 NOK 10460\n", + "batch 368 : total OK 26308 NOK 10492\n", + "batch 369 : total OK 26383 NOK 10517\n", + "batch 370 : total OK 26468 NOK 10532\n", + "batch 371 : total OK 26518 NOK 10582\n", + "batch 372 : total OK 26578 NOK 10622\n", + "batch 373 : total OK 26630 NOK 10670\n", + "batch 374 : total OK 26701 NOK 10699\n", + "batch 375 : total OK 26753 NOK 10747\n", + "batch 376 : total OK 26820 NOK 10780\n", + "batch 377 : total OK 26891 NOK 10809\n", + "batch 378 : total OK 26961 NOK 10839\n", + "batch 379 : total OK 27037 NOK 10863\n", + "batch 380 : total OK 27103 NOK 10897\n", + "batch 381 : total OK 27175 NOK 10925\n", + "batch 382 : total OK 27242 NOK 10958\n", + "batch 383 : total OK 27297 NOK 11003\n", + "batch 384 : total OK 27366 NOK 11034\n", + "batch 385 : total OK 27442 NOK 11058\n", + "batch 386 : total OK 27524 NOK 11076\n", + "batch 387 : total OK 27575 NOK 11125\n", + "batch 388 : total OK 27634 NOK 11166\n", + "batch 389 : total OK 27703 NOK 11197\n", + "batch 390 : total OK 27776 NOK 11224\n", + "batch 391 : total OK 27860 NOK 11240\n", + "batch 392 : total OK 27916 NOK 11284\n", + "batch 393 : total OK 27970 NOK 11330\n", + "batch 394 : total OK 28032 NOK 11368\n", + "batch 395 : total OK 28106 NOK 11394\n", + "batch 396 : total OK 28171 NOK 11429\n", + "batch 397 : total OK 28233 NOK 11467\n", + "batch 398 : total OK 28302 NOK 11498\n", + "batch 399 : total OK 28368 NOK 11532\n", + "batch 400 : total OK 28427 NOK 11573\n", + "batch 401 : total OK 28518 NOK 11582\n", + "batch 402 : total OK 28605 NOK 11595\n", + "batch 403 : total OK 28677 NOK 11623\n", + "batch 404 : total OK 28741 NOK 11659\n", + "batch 405 : total OK 28797 NOK 11703\n", + "batch 406 : total OK 28842 NOK 11758\n", + "batch 407 : total OK 28897 NOK 11803\n", + "batch 408 : total OK 28975 NOK 11825\n", + "batch 409 : total OK 29047 NOK 11853\n", + "batch 410 : total OK 29101 NOK 11899\n", + "batch 411 : total OK 29193 NOK 11907\n", + "batch 412 : total OK 29264 NOK 11936\n", + "batch 413 : total OK 29319 NOK 11981\n", + "batch 414 : total OK 29367 NOK 12033\n", + "batch 415 : total OK 29439 NOK 12061\n", + "batch 416 : total OK 29507 NOK 12093\n", + "batch 417 : total OK 29584 NOK 12116\n", + "batch 418 : total OK 29639 NOK 12161\n", + "batch 419 : total OK 29663 NOK 12237\n", + "batch 420 : total OK 29707 NOK 12293\n", + "batch 421 : total OK 29759 NOK 12341\n", + "batch 422 : total OK 29828 NOK 12372\n", + "batch 423 : total OK 29885 NOK 12415\n", + "batch 424 : total OK 29953 NOK 12447\n", + "batch 425 : total OK 30012 NOK 12488\n", + "batch 426 : total OK 30079 NOK 12521\n", + "batch 427 : total OK 30161 NOK 12539\n", + "batch 428 : total OK 30231 NOK 12569\n", + "batch 429 : total OK 30294 NOK 12606\n", + "batch 430 : total OK 30356 NOK 12644\n", + "batch 431 : total OK 30407 NOK 12693\n", + "batch 432 : total OK 30480 NOK 12720\n", + "batch 433 : total OK 30545 NOK 12755\n", + "batch 434 : total OK 30620 NOK 12780\n", + "batch 435 : total OK 30672 NOK 12828\n", + "batch 436 : total OK 30746 NOK 12854\n", + "batch 437 : total OK 30822 NOK 12878\n", + "batch 438 : total OK 30900 NOK 12900\n", + "batch 439 : total OK 30962 NOK 12938\n", + "batch 440 : total OK 31025 NOK 12975\n", + "batch 441 : total OK 31093 NOK 13007\n", + "batch 442 : total OK 31147 NOK 13053\n", + "batch 443 : total OK 31187 NOK 13113\n", + "batch 444 : total OK 31259 NOK 13141\n", + "batch 445 : total OK 31329 NOK 13171\n", + "batch 446 : total OK 31408 NOK 13192\n", + "batch 447 : total OK 31460 NOK 13240\n", + "batch 448 : total OK 31535 NOK 13265\n", + "batch 449 : total OK 31611 NOK 13289\n", + "batch 450 : total OK 31651 NOK 13349\n", + "batch 451 : total OK 31724 NOK 13376\n", + "batch 452 : total OK 31798 NOK 13402\n", + "batch 453 : total OK 31854 NOK 13446\n", + "batch 454 : total OK 31887 NOK 13513\n", + "batch 455 : total OK 31936 NOK 13564\n", + "batch 456 : total OK 31980 NOK 13620\n", + "batch 457 : total OK 32055 NOK 13645\n", + "batch 458 : total OK 32133 NOK 13667\n", + "batch 459 : total OK 32215 NOK 13685\n", + "batch 460 : total OK 32295 NOK 13705\n", + "batch 461 : total OK 32357 NOK 13743\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "batch 462 : total OK 32421 NOK 13779\n", + "batch 463 : total OK 32487 NOK 13813\n", + "batch 464 : total OK 32574 NOK 13826\n", + "batch 465 : total OK 32643 NOK 13857\n", + "batch 466 : total OK 32703 NOK 13897\n", + "batch 467 : total OK 32777 NOK 13923\n", + "batch 468 : total OK 32843 NOK 13957\n", + "batch 469 : total OK 32932 NOK 13968\n", + "batch 470 : total OK 33008 NOK 13992\n", + "batch 471 : total OK 33090 NOK 14010\n", + "batch 472 : total OK 33159 NOK 14041\n", + "batch 473 : total OK 33240 NOK 14060\n", + "batch 474 : total OK 33304 NOK 14096\n", + "batch 475 : total OK 33384 NOK 14116\n", + "batch 476 : total OK 33461 NOK 14139\n", + "batch 477 : total OK 33544 NOK 14156\n", + "batch 478 : total OK 33631 NOK 14169\n", + "batch 479 : total OK 33716 NOK 14184\n", + "batch 480 : total OK 33797 NOK 14203\n", + "batch 481 : total OK 33835 NOK 14265\n", + "batch 482 : total OK 33903 NOK 14297\n", + "batch 483 : total OK 33972 NOK 14328\n", + "batch 484 : total OK 34032 NOK 14368\n", + "batch 485 : total OK 34072 NOK 14428\n", + "batch 486 : total OK 34135 NOK 14465\n", + "batch 487 : total OK 34198 NOK 14502\n", + "batch 488 : total OK 34271 NOK 14529\n", + "batch 489 : total OK 34327 NOK 14573\n", + "batch 490 : total OK 34381 NOK 14619\n", + "batch 491 : total OK 34461 NOK 14639\n", + "batch 492 : total OK 34544 NOK 14656\n", + "batch 493 : total OK 34640 NOK 14660\n", + "batch 494 : total OK 34711 NOK 14689\n", + "batch 495 : total OK 34804 NOK 14696\n", + "batch 496 : total OK 34896 NOK 14704\n", + "batch 497 : total OK 34986 NOK 14714\n", + "batch 498 : total OK 35080 NOK 14720\n", + "batch 499 : total OK 35153 NOK 14747\n", + "batch 500 : total OK 35196 NOK 14804\n" ] } ], "source": [ + "img_queue = setup_dataloader(val_dir, label_file, batch_size)\n", + "\n", "ok = 0\n", "nok = 0\n", "i = 0\n", - "for (imgs, lbls) in val_loader:\n", - " ibuf_normal = imgs.numpy().reshape(accel.ishape_normal)\n", - " exp = lbls.numpy()\n", + "while not img_queue.last_batch:\n", + " imgs, lbls = img_queue.get_batch(batch_size, timeout=None)\n", + " imgs = np.array(imgs)\n", + " exp = np.array(lbls)\n", + " \n", + " ibuf_normal = imgs.reshape(accel.ishape_normal)\n", " obuf_normal = accel.execute(ibuf_normal)\n", " obuf_normal = obuf_normal.reshape(batch_size, -1)[:,0]\n", " ret = np.bincount(obuf_normal.flatten() == exp.flatten())\n", @@ -404,14 +877,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Final top-1 accuracy: 70.412%\n" + "Final top-1 accuracy: 70.392%\n" ] } ], @@ -430,27 +903,27 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'runtime[ms]': 551.5413284301758,\n", - " 'throughput[images/s]': 1813.1007568304074,\n", - " 'DRAM_in_bandwidth[Mb/s]': 272.92243072416755,\n", - " 'DRAM_out_bandwidth[Mb/s]': 0.018131007568304075,\n", - " 'fclk[mhz]': 206,\n", - " 'batch_size': 1000,\n", - " 'fold_input[ms]': 1.0013580322265625e-05,\n", - " 'pack_input[ms]': 1.52587890625e-05,\n", - " 'copy_input_data_to_device[ms]': 0.07888936996459961,\n", - " 'copy_output_data_from_device[ms]': 0.00015473365783691406,\n", - " 'unpack_output[ms]': 0.040567874908447266,\n", - " 'unfold_output[ms]': 6.4373016357421875e-06}" + "{'runtime[ms]': 50.49920082092285,\n", + " 'throughput[images/s]': 1980.2293575815947,\n", + " 'DRAM_in_bandwidth[Mb/s]': 298.0799647380423,\n", + " 'DRAM_out_bandwidth[Mb/s]': 0.01980229357581595,\n", + " 'fclk[mhz]': 100.0,\n", + " 'batch_size': 100,\n", + " 'fold_input[ms]': 1.5020370483398438e-05,\n", + " 'pack_input[ms]': 2.4080276489257812e-05,\n", + " 'copy_input_data_to_device[ms]': 0.006676673889160156,\n", + " 'copy_output_data_from_device[ms]': 0.00022292137145996094,\n", + " 'unpack_output[ms]': 0.004586219787597656,\n", + " 'unfold_output[ms]': 6.9141387939453125e-06}" ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } diff --git a/setup.py b/setup.py index 7edd82b..1eb0930 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Xilinx, Inc +# Copyright (C) 2020-2021 Xilinx, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ from pynq.utils import build_py as _build_py __author__ = "Yaman Umuroglu" -__copyright__ = "Copyright 2020, Xilinx" +__copyright__ = "Copyright 2020-2021, Xilinx" __email__ = "yamanu@xilinx.com" @@ -90,7 +90,7 @@ def extend_package(path): setup( name=module_name, - version="0.0.1b", + version="0.0.2b", description="FINN Examples on PYNQ for Zynq and Alveo", long_description=long_description, long_description_content_type="text/markdown", @@ -108,8 +108,8 @@ def extend_package(path): setup_requires=["pynq>=2.5.1"], install_requires=[ "pynq>=2.5.1", - "finn-base==0.0.1b0", - "finn-dataset_loading==0.0.4", # noqa + "finn-base==0.0.2b0", + "finn-dataset_loading==0.0.5", # noqa ], extras_require={ ':python_version<"3.6"': ["matplotlib<3.1", "ipython==7.9"],