Merge pull request #25 from JiacongSun/master

New feature: Allows the output data of a layer to remain in the low-level memory for later use when possible
KULeuven-MICAS · Oct 23, 2023 · 2b2e076 · 2b2e076
2 parents 364f0a5 + aeb74ab
commit 2b2e076
Show file tree

Hide file tree

Showing 16 changed files with 987 additions and 2 deletions.
diff --git a/main.py b/main.py
@@ -49,4 +49,4 @@
 )
 
 # Launch the MainStage
-mainstage.run()
+mainstage.run()
diff --git a/main_onnx.py b/main_onnx.py
@@ -45,4 +45,4 @@
 )
 
 # Launch the MainStage
-mainstage.run()
+mainstage.run()
diff --git a/tests/main/test_ascend_like.py → tests/main/test_origin/test_ascend_like.py b/tests/main/test_ascend_like.py → tests/main/test_origin/test_ascend_like.py
diff --git a/tests/main/test_edge_tpu_like.py → tests/main/test_origin/test_edge_tpu_like.py b/tests/main/test_edge_tpu_like.py → tests/main/test_origin/test_edge_tpu_like.py
diff --git a/tests/main/test_meta_prototype_like.py → ...n/test_origin/test_meta_prototype_like.py b/tests/main/test_meta_prototype_like.py → ...n/test_origin/test_meta_prototype_like.py
diff --git a/tests/main/test_tesla_npu_like.py → ...s/main/test_origin/test_tesla_npu_like.py b/tests/main/test_tesla_npu_like.py → ...s/main/test_origin/test_tesla_npu_like.py
diff --git a/tests/main/test_tpu_like.py → tests/main/test_origin/test_tpu_like.py b/tests/main/test_tpu_like.py → tests/main/test_origin/test_tpu_like.py
diff --git a/tests/main/test_without_unused_memory/test_ascend_like.py b/tests/main/test_without_unused_memory/test_ascend_like.py
@@ -0,0 +1,38 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_without_unused_memory
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy and latency for each workload defined above
+ens_lats = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047),
+    "zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.ascend_like"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Ascend_like"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency) = ens_lats[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
diff --git a/tests/main/test_without_unused_memory/test_edge_tpu_like.py b/tests/main/test_without_unused_memory/test_edge_tpu_like.py
@@ -0,0 +1,38 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_without_unused_memory
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy and latency for each workload defined above
+ens_lats = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (5568602396.684999, 8134431),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (751128562.4699999, 2427487),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1784539639.4349997, 3176546),
+    "zigzag.inputs.examples.workload.resnet18": (2115122870.395, 3884789),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.edge_tpu_like"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Edge_TPU_like"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency) = ens_lats[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
diff --git a/tests/main/test_without_unused_memory/test_meta_prototype_like.py b/tests/main/test_without_unused_memory/test_meta_prototype_like.py
@@ -0,0 +1,38 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_without_unused_memory
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy and latency for each workload defined above
+ens_lats = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (5679695605.4400015, 8299150),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (901092009.6000001, 2610609),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1730672410.3200004, 3262009),
+    "zigzag.inputs.examples.workload.resnet18": (2265438430.2299995, 4017227),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.meta_prototype_like"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Meta_prototype"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency) = ens_lats[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
diff --git a/tests/main/test_without_unused_memory/test_tesla_npu_like.py b/tests/main/test_without_unused_memory/test_tesla_npu_like.py
@@ -0,0 +1,38 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_without_unused_memory
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy and latency for each workload defined above
+ens_lats = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898),
+    "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.tesla_npu_like"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.Tesla_NPU_like"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency) = ens_lats[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
diff --git a/tests/main/test_without_unused_memory/test_tpu_like.py b/tests/main/test_without_unused_memory/test_tpu_like.py
@@ -0,0 +1,38 @@
+import pytest
+
+from zigzag.api import get_hardware_performance_zigzag_without_unused_memory
+
+workloads = (
+    "zigzag/inputs/examples/workload/alexnet.onnx",
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx",
+    "zigzag/inputs/examples/workload/resnet18.onnx",
+    "zigzag.inputs.examples.workload.resnet18",
+)
+
+# Expected energy and latency for each workload defined above
+ens_lats = {
+    "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956),
+    "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214),
+    "zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289),
+    "zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235),
+}
+
+
+@pytest.fixture
+def mapping():
+    return "zigzag.inputs.examples.mapping.tpu_like"
+
+
+@pytest.fixture
+def accelerator():
+    return "zigzag.inputs.examples.hardware.TPU_like"
+
+
+@pytest.mark.parametrize("workload", workloads)
+def test_api(workload, accelerator, mapping):
+    (energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
+        workload, accelerator, mapping
+    )
+    (expected_energy, expected_latency) = ens_lats[workload]
+    assert energy == pytest.approx(expected_energy)
+    assert latency == pytest.approx(expected_latency)
diff --git a/zigzag/api.py b/zigzag/api.py
@@ -155,6 +155,82 @@ def get_hardware_performance_zigzag_pe_array_scaling(
     return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes
 
 
+def get_hardware_performance_zigzag_without_unused_memory(
+    workload,
+    accelerator,
+    mapping,
+    opt="latency",
+    dump_filename_pattern="outputs/{datetime}.json",
+    pickle_filename="outputs/list_of_cmes.pickle",
+):
+    # Initialize the logger
+    import logging as _logging
+
+    _logging_level = _logging.INFO
+    _logging_format = (
+        "%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
+    )
+    _logging.basicConfig(level=_logging_level, format=_logging_format)
+
+    # Sanity check on the optimization criterion
+    if opt == "energy":
+        opt_stage = MinimalEnergyStage
+    elif opt == "latency":
+        opt_stage = MinimalLatencyStage
+    elif opt == "EDP":
+        opt_stage = MinimalEDPStage
+    else:
+        raise NotImplementedError(
+            "Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
+        )
+
+    # Check workload format and based on it select the correct workload parser stage
+    try:
+        if workload.split(".")[-1] == "onnx":
+            workload_parser_stage = ONNXModelParserStage
+        else:
+            workload_parser_stage = WorkloadParserStage
+    except:
+        workload_parser_stage = WorkloadParserStage
+
+    mainstage = MainStage(
+        [  # Initialize the MainStage as entry point
+            workload_parser_stage,  # Parse the ONNX Model into the workload
+            AcceleratorParserStage,  # Parse the accelerator module/passthrough given accelerator
+            SimpleSaveStage,  # Save the summed CME energy and latency to a json
+            PickleSaveStage,  # Save all received CMEs in a list to a pickle file
+            SumStage,  # Sum up the received best CME across all layers of the workload
+            SearchUnusedMemoryStage,  # Search for unused memory instance
+            WorkloadStage,  # Iterate through the different layers in the workload
+            RemoveUnusedMemoryStage,  # Remove unused memory instance
+            CompleteSaveStage,  # Save each processed layer to a json
+            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
+            SpatialMappingGeneratorStage,  # Generate multiple spatial mappings (SM)
+            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
+            LomaStage,  # Generate multiple temporal mappings (TM)
+            # TemporalOrderingConversionStage,  # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
+            CostModelStage,  # Evaluate generated SM and TM through cost model
+        ],
+        accelerator=accelerator,  # required by AcceleratorParserStage
+        workload=workload,  # required by workload_parser_stage
+        mapping=mapping,  # required by workload_parser_stage
+        dump_filename_pattern=dump_filename_pattern,  # output file save pattern
+        pickle_filename=pickle_filename,  # filename for pickled list of cmes
+        loma_lpf_limit=6,  # required by LomaStage
+        loma_show_progress_bar=True,
+        # If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
+        # take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
+        # By default, if the parameter is not defined, it will be set as False internally.
+        access_same_data_considered_as_no_access=True,
+    )
+
+    # Launch the MainStage
+    answers = mainstage.run()
+    # Get CME from answer
+    cmes = answers
+
+    return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes
+
 if __name__ == "__main__":
     workload = "zigzag/inputs/examples/workload/mobilenetv2.onnx"
     # workload = 'inputs.examples.workload.resnet18'