Skip to content

Commit

Permalink
Merge pull request #43 from JiacongSun/master
Browse files Browse the repository at this point in the history
Merging ZigZag-IMC repository into ZigZag repository
  • Loading branch information
asyms authored Mar 13, 2024
2 parents f6342c6 + 8f1c06f commit 866cf03
Show file tree
Hide file tree
Showing 40 changed files with 18,267 additions and 72 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*.pyc
*$py.class
.idea

Expand Down Expand Up @@ -152,4 +153,7 @@ docs/make.bat
html/

# debug file
debug*
debug*

# cacti cache files for imc
zigzag/classes/cacti/cacti_master/self_gen/
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,8 @@ L. Mei, K. Goetschalckx, A. Symons and M. Verhelst, " DeFiNES: Enabling Fast Exp
A. Symons, L. Mei, S. Colleman, P. Houshmand, S. Karl and M. Verhelst, “Towards Heterogeneous Multi-core Accelerators Exploiting Fine-grained Scheduling of Layer-Fused Deep Neural Networks”, <i>arXiv e-prints</i>, 2022. doi:10.48550/arXiv.2212.10612. [paper](https://arxiv.org/abs/2212.10612), [github](https://github.com/ZigZag-Project/stream)

S. Karl, A. Symons, N. Fasfous and M. Verhelst, "Genetic Algorithm-based Framework for Layer-Fused Scheduling of Multiple DNNs on Multi-core Systems," 2023 Design, Automation & Test in Europe Conference & Exhibition (DATE), Antwerp, Belgium, 2023, pp. 1-6, doi: 10.23919/DATE56975.2023.10137070. [paper](https://ieeexplore.ieee.org/document/10137070), [slides](https://www.dropbox.com/s/rv8qiko59h4pp0s/Genetic%20Algorithm-based%20Framework%20for.pptx?dl=0), [video](https://www.dropbox.com/s/12v94stvevj9xns/Genetic%20Algorithm-based%20Framework%20for.mp4?dl=0)

#### Extend ZigZag to support In-Memory-Computing cores
J. Sun, P. Houshmand and M. Verhelst, "Analog or Digital In-Memory Computing? Benchmarking through Quantitative Modeling," Proceedings of the IEEE/ACM Internatoinal Conference On Computer Aided Design (ICCAD), October 2023. [paper](https://ieeexplore.ieee.org/document/10323763), [poster](https://drive.google.com/file/d/1EVdua-y2Wg8WL-ovUIw7KUR9kpnpN4AS/view?usp=sharing), [slides](https://docs.google.com/presentation/d/19OXRDh6NCBUIOVGneO3lrZfVT58xh06U/edit?usp=sharing&ouid=108247328431603587200&rtpof=true&sd=true), [video](https://drive.google.com/file/d/10-k4XEPan-O-QAH4Q0uvone36qfNRCpK/view?usp=sharing)

P. Houshmand, J. Sun and M. Verhelst, "Benchmarking and modeling of analog and digital SRAM in-memory computing architectures," arXiv preprint arXiv:2305.18335 (2023). [paper](https://arxiv.org/abs/2305.18335)
Empty file added tests/main/test_imc/__init__.py
Empty file.
40 changes: 40 additions & 0 deletions tests/main/test_imc/test_aimc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_imc

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy, latency (#cycles), clk time and area for each workload defined above
ens_lats_clks_areas = {
"zigzag/inputs/examples/workload/alexnet.onnx": (2557076250.266322, 44012016.0, 6.61184, 0.7892517658006044),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (802185102.578702, 14939020.0, 6.61184, 0.7892517658006044),
"zigzag/inputs/examples/workload/resnet18.onnx": (2252151728.145326, 62079022.0, 6.61184, 0.7892517658006044),
"zigzag.inputs.examples.workload.resnet18": (2466090000.2577806, 67309272.0, 6.61184, 0.7892517658006044),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.default_imc"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Aimc"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, tclk, area, cmes) = get_hardware_performance_zigzag_imc(
workload, accelerator, mapping
)
(expected_energy, expected_latency, expected_tclk, expected_area) = ens_lats_clks_areas[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
assert tclk == pytest.approx(expected_tclk)
assert area == pytest.approx(expected_area)
40 changes: 40 additions & 0 deletions tests/main/test_imc/test_dimc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_imc

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy, latency (#cycles), clk time and area for each workload defined above
ens_lats_clks_areas = {
"zigzag/inputs/examples/workload/alexnet.onnx": (2340181787.2719307, 72692592.0, 3.2026, 0.785592664),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (703506891.3687075, 28005964.0, 3.2026, 0.785592664),
"zigzag/inputs/examples/workload/resnet18.onnx": (1828766840.9463186, 120700590.0, 3.2026, 0.785592664),
"zigzag.inputs.examples.workload.resnet18": (2008581031.8287854, 130747736.0, 3.2026, 0.785592664),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.default_imc"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Dimc"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, tclk, area, cmes) = get_hardware_performance_zigzag_imc(
workload, accelerator, mapping
)
(expected_energy, expected_latency, expected_tclk, expected_area) = ens_lats_clks_areas[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
assert tclk == pytest.approx(expected_tclk)
assert area == pytest.approx(expected_area)
78 changes: 78 additions & 0 deletions zigzag/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,84 @@ def get_hardware_performance_zigzag(

return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes

def get_hardware_performance_zigzag_imc(
workload,
accelerator,
mapping,
opt="latency",
dump_filename_pattern="outputs/layer_?.json",
pickle_filename="outputs/list_of_cmes.pickle",
):
# Initialize the logger
import logging as _logging

_logging_level = _logging.INFO
_logging_format = (
"%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
)
_logging.basicConfig(level=_logging_level, format=_logging_format)

# Sanity check on the optimization criterion
if opt == "energy":
opt_stage = MinimalEnergyStage
elif opt == "latency":
opt_stage = MinimalLatencyStage
elif opt == "EDP":
opt_stage = MinimalEDPStage
else:
raise NotImplementedError(
"Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
)

# Check workload format and based on it select the correct workload parser stage
try:
if workload.split(".")[-1] == "onnx":
workload_parser_stage = ONNXModelParserStage
else:
workload_parser_stage = WorkloadParserStage
except:
workload_parser_stage = WorkloadParserStage

mainstage = MainStage(
[ # Initialize the MainStage as entry point
workload_parser_stage, # Parse the ONNX Model into the workload
AcceleratorParserStage, # Parse the accelerator module/passthrough given accelerator
SimpleSaveStage, # Save the summed CME energy and latency to a json
PickleSaveStage, # Save all received CMEs in a list to a pickle file
SumStage, # Sum up the received best CME across all layers of the workload
SearchUnusedMemoryStage, # Detect unnecessary memory instances
WorkloadStage, # Iterate through the different layers in the workload
RemoveUnusedMemoryStage, # Remove unnecessary memory instances
CompleteSaveStage, # Save each processed layer to a json
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
SpatialMappingGeneratorStage, # Generate multiple spatial mappings (SM)
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
LomaStage, # Generate multiple temporal mappings (TM)
# TemporalOrderingConversionStage, # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
CostModelStage, # Evaluate generated SM and TM through cost model
],
accelerator=accelerator, # required by AcceleratorParserStage
workload=workload, # required by workload_parser_stage
mapping=mapping, # required by workload_parser_stage
dump_filename_pattern=dump_filename_pattern, # output file save pattern
pickle_filename=pickle_filename, # filename for pickled list of cmes
loma_lpf_limit=6, # required by LomaStage
enable_mix_spatial_mapping_generation=True, # enable auto-generation of mix spatial mapping
maximize_hardware_utilization=True, # only evaluate spatial mapping with top2 utilization (fast simulation)
enable_weight_diagonal_mapping=True, # required by SpatialMappingGeneratorStage
loma_show_progress_bar=True,
# If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
# take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
# By default, if the parameter is not defined, it will be set as False internally.
access_same_data_considered_as_no_access=True,
)

# Launch the MainStage
answers = mainstage.run()
# Get CME from answer
cmes = answers

return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes[0][0].tclk, cmes[0][0].area_total, cmes

def get_hardware_performance_zigzag_pe_array_scaling(
workload,
Expand Down
Loading

0 comments on commit 866cf03

Please sign in to comment.