Skip to content

Commit

Permalink
Merge pull request #25 from JiacongSun/master
Browse files Browse the repository at this point in the history
New feature: Allows the output data of a layer to remain in the low-level memory for later use when possible
  • Loading branch information
asyms authored Oct 23, 2023
2 parents 364f0a5 + aeb74ab commit 2b2e076
Show file tree
Hide file tree
Showing 16 changed files with 987 additions and 2 deletions.
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@
)

# Launch the MainStage
mainstage.run()
mainstage.run()
2 changes: 1 addition & 1 deletion main_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@
)

# Launch the MainStage
mainstage.run()
mainstage.run()
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_ascend_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685),
"zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047),
"zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.ascend_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Ascend_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_edge_tpu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5568602396.684999, 8134431),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (751128562.4699999, 2427487),
"zigzag/inputs/examples/workload/resnet18.onnx": (1784539639.4349997, 3176546),
"zigzag.inputs.examples.workload.resnet18": (2115122870.395, 3884789),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.edge_tpu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Edge_TPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_meta_prototype_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5679695605.4400015, 8299150),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (901092009.6000001, 2610609),
"zigzag/inputs/examples/workload/resnet18.onnx": (1730672410.3200004, 3262009),
"zigzag.inputs.examples.workload.resnet18": (2265438430.2299995, 4017227),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.meta_prototype_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Meta_prototype"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_tesla_npu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457),
"zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898),
"zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.tesla_npu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Tesla_NPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_tpu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214),
"zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289),
"zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.tpu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.TPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
76 changes: 76 additions & 0 deletions zigzag/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,82 @@ def get_hardware_performance_zigzag_pe_array_scaling(
return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes


def get_hardware_performance_zigzag_without_unused_memory(
workload,
accelerator,
mapping,
opt="latency",
dump_filename_pattern="outputs/{datetime}.json",
pickle_filename="outputs/list_of_cmes.pickle",
):
# Initialize the logger
import logging as _logging

_logging_level = _logging.INFO
_logging_format = (
"%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
)
_logging.basicConfig(level=_logging_level, format=_logging_format)

# Sanity check on the optimization criterion
if opt == "energy":
opt_stage = MinimalEnergyStage
elif opt == "latency":
opt_stage = MinimalLatencyStage
elif opt == "EDP":
opt_stage = MinimalEDPStage
else:
raise NotImplementedError(
"Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
)

# Check workload format and based on it select the correct workload parser stage
try:
if workload.split(".")[-1] == "onnx":
workload_parser_stage = ONNXModelParserStage
else:
workload_parser_stage = WorkloadParserStage
except:
workload_parser_stage = WorkloadParserStage

mainstage = MainStage(
[ # Initialize the MainStage as entry point
workload_parser_stage, # Parse the ONNX Model into the workload
AcceleratorParserStage, # Parse the accelerator module/passthrough given accelerator
SimpleSaveStage, # Save the summed CME energy and latency to a json
PickleSaveStage, # Save all received CMEs in a list to a pickle file
SumStage, # Sum up the received best CME across all layers of the workload
SearchUnusedMemoryStage, # Search for unused memory instance
WorkloadStage, # Iterate through the different layers in the workload
RemoveUnusedMemoryStage, # Remove unused memory instance
CompleteSaveStage, # Save each processed layer to a json
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
SpatialMappingGeneratorStage, # Generate multiple spatial mappings (SM)
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
LomaStage, # Generate multiple temporal mappings (TM)
# TemporalOrderingConversionStage, # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
CostModelStage, # Evaluate generated SM and TM through cost model
],
accelerator=accelerator, # required by AcceleratorParserStage
workload=workload, # required by workload_parser_stage
mapping=mapping, # required by workload_parser_stage
dump_filename_pattern=dump_filename_pattern, # output file save pattern
pickle_filename=pickle_filename, # filename for pickled list of cmes
loma_lpf_limit=6, # required by LomaStage
loma_show_progress_bar=True,
# If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
# take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
# By default, if the parameter is not defined, it will be set as False internally.
access_same_data_considered_as_no_access=True,
)

# Launch the MainStage
answers = mainstage.run()
# Get CME from answer
cmes = answers

return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes

if __name__ == "__main__":
workload = "zigzag/inputs/examples/workload/mobilenetv2.onnx"
# workload = 'inputs.examples.workload.resnet18'
Expand Down
Loading

0 comments on commit 2b2e076

Please sign in to comment.