Merge pull request #43 from JiacongSun/master

Merging ZigZag-IMC repository into ZigZag repository
KULeuven-MICAS · Mar 13, 2024 · 866cf03 · 866cf03
2 parents f6342c6 + 8f1c06f
commit 866cf03
Show file tree

Hide file tree

Showing 40 changed files with 18,267 additions and 72 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
+*.pyc
 *$py.class
 .idea
 
@@ -152,4 +153,7 @@ docs/make.bat
 html/
 
 # debug file
-debug*
+debug*
+
+# cacti cache files for imc
+zigzag/classes/cacti/cacti_master/self_gen/
diff --git a/README.md b/README.md
@@ -53,3 +53,8 @@ L. Mei, K. Goetschalckx, A. Symons and M. Verhelst, " DeFiNES: Enabling Fast Exp
 A. Symons, L. Mei, S. Colleman, P. Houshmand, S. Karl and M. Verhelst, “Towards Heterogeneous Multi-core Accelerators Exploiting Fine-grained Scheduling of Layer-Fused Deep Neural Networks”, <i>arXiv e-prints</i>, 2022. doi:10.48550/arXiv.2212.10612. [paper](https://arxiv.org/abs/2212.10612), [github](https://github.com/ZigZag-Project/stream)
 
 S. Karl, A. Symons, N. Fasfous and M. Verhelst, "Genetic Algorithm-based Framework for Layer-Fused Scheduling of Multiple DNNs on Multi-core Systems," 2023 Design, Automation & Test in Europe Conference & Exhibition (DATE), Antwerp, Belgium, 2023, pp. 1-6, doi: 10.23919/DATE56975.2023.10137070. [paper](https://ieeexplore.ieee.org/document/10137070), [slides](https://www.dropbox.com/s/rv8qiko59h4pp0s/Genetic%20Algorithm-based%20Framework%20for.pptx?dl=0), [video](https://www.dropbox.com/s/12v94stvevj9xns/Genetic%20Algorithm-based%20Framework%20for.mp4?dl=0)
+
+#### Extend ZigZag to support In-Memory-Computing cores
+J. Sun, P. Houshmand and M. Verhelst, "Analog or Digital In-Memory Computing? Benchmarking through Quantitative Modeling," Proceedings of the IEEE/ACM Internatoinal Conference On Computer Aided Design (ICCAD), October 2023. [paper](https://ieeexplore.ieee.org/document/10323763), [poster](https://drive.google.com/file/d/1EVdua-y2Wg8WL-ovUIw7KUR9kpnpN4AS/view?usp=sharing), [slides](https://docs.google.com/presentation/d/19OXRDh6NCBUIOVGneO3lrZfVT58xh06U/edit?usp=sharing&ouid=108247328431603587200&rtpof=true&sd=true), [video](https://drive.google.com/file/d/10-k4XEPan-O-QAH4Q0uvone36qfNRCpK/view?usp=sharing)
+
+P. Houshmand, J. Sun and M. Verhelst, "Benchmarking and modeling of analog and digital SRAM in-memory computing architectures," arXiv preprint arXiv:2305.18335 (2023). [paper](https://arxiv.org/abs/2305.18335)
diff --git a/tests/main/test_imc/__init__.py b/tests/main/test_imc/__init__.py
diff --git a/tests/main/test_imc/test_aimc.py b/tests/main/test_imc/test_aimc.py
@@ -0,0 +1,40 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_imc
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy, latency (#cycles), clk time and area for each workload defined above
+ens_lats_clks_areas = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (2557076250.266322, 44012016.0, 6.61184, 0.7892517658006044),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (802185102.578702, 14939020.0, 6.61184, 0.7892517658006044),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (2252151728.145326, 62079022.0, 6.61184, 0.7892517658006044),
+    "zigzag.inputs.examples.workload.resnet18": (2466090000.2577806, 67309272.0, 6.61184, 0.7892517658006044),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.default_imc"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Aimc"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, tclk, area, cmes) = get_hardware_performance_zigzag_imc(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency, expected_tclk, expected_area) = ens_lats_clks_areas[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
+    assert tclk == pytest.approx(expected_tclk)
+    assert area == pytest.approx(expected_area)
diff --git a/tests/main/test_imc/test_dimc.py b/tests/main/test_imc/test_dimc.py
@@ -0,0 +1,40 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_imc
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy, latency (#cycles), clk time and area for each workload defined above
+ens_lats_clks_areas = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (2340181787.2719307, 72692592.0, 3.2026, 0.785592664),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (703506891.3687075, 28005964.0, 3.2026, 0.785592664),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1828766840.9463186, 120700590.0, 3.2026, 0.785592664),
+    "zigzag.inputs.examples.workload.resnet18": (2008581031.8287854, 130747736.0, 3.2026, 0.785592664),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.default_imc"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Dimc"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, tclk, area, cmes) = get_hardware_performance_zigzag_imc(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency, expected_tclk, expected_area) = ens_lats_clks_areas[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
+    assert tclk == pytest.approx(expected_tclk)
+    assert area == pytest.approx(expected_area)
diff --git a/zigzag/api.py b/zigzag/api.py
@@ -81,6 +81,84 @@ def get_hardware_performance_zigzag(
 
     return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes
 
+def get_hardware_performance_zigzag_imc(
+    workload,
+    accelerator,
+    mapping,
+    opt="latency",
+    dump_filename_pattern="outputs/layer_?.json",
+    pickle_filename="outputs/list_of_cmes.pickle",
+):
+    # Initialize the logger
+    import logging as _logging
+
+    _logging_level = _logging.INFO
+    _logging_format = ( 
+        "%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
+    )   
+    _logging.basicConfig(level=_logging_level, format=_logging_format)
+
+    # Sanity check on the optimization criterion
+    if opt == "energy":
+        opt_stage = MinimalEnergyStage
+    elif opt == "latency":
+        opt_stage = MinimalLatencyStage
+    elif opt == "EDP":
+        opt_stage = MinimalEDPStage
+    else:
+        raise NotImplementedError(
+            "Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
+        )   
+
+    # Check workload format and based on it select the correct workload parser stage
+    try:
+        if workload.split(".")[-1] == "onnx":
+            workload_parser_stage = ONNXModelParserStage
+        else:
+            workload_parser_stage = WorkloadParserStage
+    except:
+        workload_parser_stage = WorkloadParserStage
+
+    mainstage = MainStage(
+        [  # Initialize the MainStage as entry point
+            workload_parser_stage,  # Parse the ONNX Model into the workload
+            AcceleratorParserStage,  # Parse the accelerator module/passthrough given accelerator
+            SimpleSaveStage,  # Save the summed CME energy and latency to a json
+            PickleSaveStage,  # Save all received CMEs in a list to a pickle file
+            SumStage,  # Sum up the received best CME across all layers of the workload
+            SearchUnusedMemoryStage,  # Detect unnecessary memory instances
+            WorkloadStage,  # Iterate through the different layers in the workload
+            RemoveUnusedMemoryStage,  # Remove unnecessary memory instances
+            CompleteSaveStage,  # Save each processed layer to a json
+            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
+            SpatialMappingGeneratorStage,  # Generate multiple spatial mappings (SM)
+            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
+            LomaStage,  # Generate multiple temporal mappings (TM)
+            # TemporalOrderingConversionStage,  # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
+            CostModelStage,  # Evaluate generated SM and TM through cost model
+        ],
+        accelerator=accelerator,  # required by AcceleratorParserStage
+        workload=workload,  # required by workload_parser_stage
+        mapping=mapping,  # required by workload_parser_stage
+        dump_filename_pattern=dump_filename_pattern,  # output file save pattern
+        pickle_filename=pickle_filename,  # filename for pickled list of cmes
+        loma_lpf_limit=6,  # required by LomaStage
+        enable_mix_spatial_mapping_generation=True,  # enable auto-generation of mix spatial mapping
+        maximize_hardware_utilization=True, # only evaluate spatial mapping with top2 utilization (fast simulation)
+        enable_weight_diagonal_mapping=True,  # required by SpatialMappingGeneratorStage
+        loma_show_progress_bar=True,
+        # If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
+        # take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
+        # By default, if the parameter is not defined, it will be set as False internally.
+        access_same_data_considered_as_no_access=True,
+    )
+
+    # Launch the MainStage
+    answers = mainstage.run()
+    # Get CME from answer
+    cmes = answers
+
+    return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes[0][0].tclk, cmes[0][0].area_total, cmes
 
 def get_hardware_performance_zigzag_pe_array_scaling(
     workload,