Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New feature: Allows the output data of a layer to remain in the low-level memory for later use when possible #25

Merged
merged 8 commits into from
Oct 23, 2023
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@
)

# Launch the MainStage
mainstage.run()
mainstage.run()
2 changes: 1 addition & 1 deletion main_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@
)

# Launch the MainStage
mainstage.run()
mainstage.run()
File renamed without changes.
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_ascend_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685),
"zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047),
"zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.ascend_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Ascend_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_edge_tpu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5568602396.684999, 8134431),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (751128562.4699999, 2427487),
"zigzag/inputs/examples/workload/resnet18.onnx": (1784539639.4349997, 3176546),
"zigzag.inputs.examples.workload.resnet18": (2115122870.395, 3884789),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.edge_tpu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Edge_TPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_meta_prototype_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5679695605.4400015, 8299150),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (901092009.6000001, 2610609),
"zigzag/inputs/examples/workload/resnet18.onnx": (1730672410.3200004, 3262009),
"zigzag.inputs.examples.workload.resnet18": (2265438430.2299995, 4017227),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.meta_prototype_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Meta_prototype"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_tesla_npu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457),
"zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898),
"zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.tesla_npu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.Tesla_NPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
38 changes: 38 additions & 0 deletions tests/main/test_without_unused_memory/test_tpu_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from zigzag.api import get_hardware_performance_zigzag_without_unused_memory

workloads = (
"zigzag/inputs/examples/workload/alexnet.onnx",
"zigzag/inputs/examples/workload/mobilenetv2.onnx",
"zigzag/inputs/examples/workload/resnet18.onnx",
"zigzag.inputs.examples.workload.resnet18",
)

# Expected energy and latency for each workload defined above
ens_lats = {
"zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956),
"zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214),
"zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289),
"zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235),
}


@pytest.fixture
def mapping():
return "zigzag.inputs.examples.mapping.tpu_like"


@pytest.fixture
def accelerator():
return "zigzag.inputs.examples.hardware.TPU_like"


@pytest.mark.parametrize("workload", workloads)
def test_api(workload, accelerator, mapping):
(energy, latency, cmes) = get_hardware_performance_zigzag_without_unused_memory(
workload, accelerator, mapping
)
(expected_energy, expected_latency) = ens_lats[workload]
assert energy == pytest.approx(expected_energy)
assert latency == pytest.approx(expected_latency)
76 changes: 76 additions & 0 deletions zigzag/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,82 @@ def get_hardware_performance_zigzag_pe_array_scaling(
return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes


def get_hardware_performance_zigzag_without_unused_memory(
workload,
accelerator,
mapping,
opt="latency",
dump_filename_pattern="outputs/{datetime}.json",
pickle_filename="outputs/list_of_cmes.pickle",
):
# Initialize the logger
import logging as _logging

_logging_level = _logging.INFO
_logging_format = (
"%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
)
_logging.basicConfig(level=_logging_level, format=_logging_format)

# Sanity check on the optimization criterion
if opt == "energy":
opt_stage = MinimalEnergyStage
elif opt == "latency":
opt_stage = MinimalLatencyStage
elif opt == "EDP":
opt_stage = MinimalEDPStage
else:
raise NotImplementedError(
"Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
)

# Check workload format and based on it select the correct workload parser stage
try:
if workload.split(".")[-1] == "onnx":
workload_parser_stage = ONNXModelParserStage
else:
workload_parser_stage = WorkloadParserStage
except:
workload_parser_stage = WorkloadParserStage

mainstage = MainStage(
[ # Initialize the MainStage as entry point
workload_parser_stage, # Parse the ONNX Model into the workload
AcceleratorParserStage, # Parse the accelerator module/passthrough given accelerator
SimpleSaveStage, # Save the summed CME energy and latency to a json
PickleSaveStage, # Save all received CMEs in a list to a pickle file
SumStage, # Sum up the received best CME across all layers of the workload
SearchUnusedMemoryStage, # Search for unused memory instance
WorkloadStage, # Iterate through the different layers in the workload
RemoveUnusedMemoryStage, # Remove unused memory instance
CompleteSaveStage, # Save each processed layer to a json
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
SpatialMappingGeneratorStage, # Generate multiple spatial mappings (SM)
opt_stage, # Reduce all CMEs, returning minimal energy/latency one
LomaStage, # Generate multiple temporal mappings (TM)
# TemporalOrderingConversionStage, # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
CostModelStage, # Evaluate generated SM and TM through cost model
],
accelerator=accelerator, # required by AcceleratorParserStage
workload=workload, # required by workload_parser_stage
mapping=mapping, # required by workload_parser_stage
dump_filename_pattern=dump_filename_pattern, # output file save pattern
pickle_filename=pickle_filename, # filename for pickled list of cmes
loma_lpf_limit=6, # required by LomaStage
loma_show_progress_bar=True,
# If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
# take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
# By default, if the parameter is not defined, it will be set as False internally.
access_same_data_considered_as_no_access=True,
)

# Launch the MainStage
answers = mainstage.run()
# Get CME from answer
cmes = answers

return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes

if __name__ == "__main__":
workload = "zigzag/inputs/examples/workload/mobilenetv2.onnx"
# workload = 'inputs.examples.workload.resnet18'
Expand Down
Loading
Loading