New User Interface (#50) (#53)

* Create AcceleratorValidator and Factory * Create WorkloadValidator and Factory * test works for tpu_like * tests work with new hardware definitions * Minor cleanup: examle file, nice print * restore get_total_inst_bandwidth function (used in stream) * Refactor AcceleratorFactory into CoreFactory, re-introduce in Core * make memory area a float * Multiplier area can be float * Extract LayetAttributes class from LayerNode for Stream compatibility * Make core allocation a list * Create ABC for LayerNode and DummyNode * Fix very nasty bug in LayerEquation * minor bug in spatial mapping generator * fix ruff error in AimcArray so CI/CD can run tetst * add cerberus library to requirements.txt * Fix bug in workload validator when using padding * fix minor bug in parsing * Fix __eq__ for LayerAttributes * Fix test energy/latency values after fixing bug in commit c408a13 * edit .pylintrc indentation from 2 to 4 Co-authored-by: RobinGeens <[email protected]>
KULeuven-MICAS · May 15, 2024 · 4bef22b · 4bef22b
1 parent e0ba0b4
commit 4bef22b
Show file tree

Hide file tree

Showing 110 changed files with 3,509 additions and 3,305 deletions.
diff --git a/.github/workflows/.pylintrc b/.github/workflows/.pylintrc
@@ -259,7 +259,7 @@ max-module-lines=99999
 # spaces.  Google's externaly-published style guide says 4, consistent with
 # PEP 8.  Here, we use 2 spaces, for conformity with many open-sourced Google
 # projects (like TensorFlow).
-indent-string='  '
+indent-string='    '
 
 # Number of spaces of indent required inside a hanging  or continued line.
 indent-after-paren=4

diff --git a/example.py b/example.py
@@ -0,0 +1,40 @@
+import pickle
+from zigzag import api
+from zigzag.visualization.results.plot_cme import (
+    bar_plot_cost_model_evaluations_breakdown,
+)
+from zigzag.visualization.results.print_mapping import print_mapping
+from zigzag.visualization.graph.memory_hierarchy import visualize_memory_hierarchy_graph
+
+
+model = "resnet"
+workload_path = "inputs/workload/resnet18.onnx"
+accelerator_path = "inputs/hardware/tpu_like.yaml"
+mapping_path = "inputs/mapping/tpu_like.yaml"
+pickle_filename = f"outputs/TPU-{model}-saved_list_of_cmes.pickle"
+
+
+energy, latency, cmes = api.get_hardware_performance_zigzag(
+    workload=workload_path,
+    accelerator=accelerator_path,
+    mapping=mapping_path,
+    opt="energy",
+    pickle_filename=pickle_filename,
+)
+print(f"Total network energy = {energy:.2e} pJ")
+print(f"Total network latency = {latency:.2e} cycles")
+
+with open(pickle_filename, "rb") as fp:
+    cmes = pickle.load(fp)
+
+
+bar_plot_cost_model_evaluations_breakdown(cmes, save_path="outputs/plot_breakdown.png")
+
+
+visualize_memory_hierarchy_graph(
+    cmes[0].accelerator.cores[0].memory_hierarchy,
+    save_path="outputs/mem_hierarchy.png",
+)
+
+for cme in cmes:
+    print_mapping(cme)
diff --git a/inputs/hardware/ascend_like.yaml b/inputs/hardware/ascend_like.yaml
@@ -0,0 +1,169 @@
+name: ascend_like
+
+memories:
+  rf_1B:
+    size: 8
+    r_bw: 8
+    w_bw: 8
+    r_cost: 0.01
+    w_cost: 0.01
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [I2]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D3, D4]
+
+  rf_2B:
+    size: 16
+    r_bw: 16
+    w_bw: 16
+    r_cost: 0.02
+    w_cost: 0.02
+    area: 0
+    r_port: 2
+    w_port: 2
+    rw_port: 0
+    latency: 1
+    operands: [O]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_2
+        th: r_port_2
+    served_dimensions: [D2]
+
+  rf_64KB_I:
+    size: 65536
+    r_bw: 512
+    w_bw: 512
+    r_cost: 26.56
+    w_cost: 30.72
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    min_r_granularity: 64
+    min_w_granularity: 64
+    operands: [I1]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  rf_64KB_W:
+    size: 65536
+    r_bw: 2048
+    w_bw: 2048
+    r_cost: 50.16
+    w_cost: 108.0
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    min_r_granularity: 64
+    min_w_granularity: 64
+    operands: [I2]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  sram_256KB_O:
+    size: 2097152
+    r_bw: 2048
+    w_bw: 2048
+    r_cost: 123.2
+    w_cost: 212.8
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    min_r_granularity: 64
+    min_w_granularity: 64
+    operands: [O]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_1
+        th: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  sram_1MB_A:
+    size: 8388608
+    r_bw: 4096
+    w_bw: 4096
+    r_cost: 465.6
+    w_cost: 825.6
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    min_r_granularity: 64
+    min_w_granularity: 64
+    latency: 1
+    operands: [I1, O]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_1
+        th: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  sram_1MB_W:
+    size: 8388608
+    r_bw: 4096
+    w_bw: 4096
+    r_cost: 465.6
+    w_cost: 825.6
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    min_r_granularity: 64
+    min_w_granularity: 64
+    latency: 1
+    operands: [I2]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  dram:
+    size: 10000000000
+    r_bw: 64
+    w_bw: 64
+    r_cost: 700
+    w_cost: 750
+    area: 0
+    r_port: 0
+    w_port: 0
+    rw_port: 1
+    latency: 1
+    operands: [I1, I2, O]
+    ports:
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+        fl: rw_port_1
+        th: rw_port_1
+    served_dimensions: [D1, D2]
+
+multipliers:
+  input_precision: [8, 8]
+  multiplier_energy: 0.04 # pJ
+  multiplier_area: 1 # unit
+  dimensions: [D1, D2, D3, D4]
+  sizes: [16, 16, 2, 2]
diff --git a/inputs/hardware/edge_tpu_like.yaml b/inputs/hardware/edge_tpu_like.yaml
@@ -0,0 +1,111 @@
+name: edge_tpu_like
+
+memories:
+  rf_1B:
+    size: 8
+    r_bw: 8
+    w_bw: 8
+    r_cost: 0.01
+    w_cost: 0.01
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    auto_cost_extraction: False
+    operands: [I2]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D3, D4]
+
+  rf_2B:
+    size: 16
+    r_bw: 16
+    w_bw: 16
+    r_cost: 0.02
+    w_cost: 0.02
+    area: 0
+    r_port: 2
+    w_port: 2
+    rw_port: 0
+    latency: 1
+    operands: [O]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_2
+        th: r_port_2
+    served_dimensions: [D2]
+
+  sram_32KB:
+    size: 262144
+    r_bw: 512
+    w_bw: 512
+    r_cost: 22.9
+    w_cost: 52.01
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    min_r_granularity: 64
+    min_w_granularity: 64
+    operands: [I2]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  sram_2MB:
+    size: 16777216
+    r_bw: 2048
+    w_bw: 2048
+    r_cost: 416.16
+    w_cost: 378.4
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    min_r_granularity: 64
+    min_w_granularity: 64
+    operands: [I1, O]
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_1
+        th: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+  dram:
+    size: 10000000000
+    r_bw: 64
+    w_bw: 64
+    r_cost: 700
+    w_cost: 750
+    area: 0
+    r_port: 0
+    w_port: 0
+    rw_port: 1
+    latency: 1
+    operands: [I1, I2, O]
+    ports:
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+        fl: rw_port_1
+        th: rw_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+multipliers:
+  input_precision: [8, 8]
+  multiplier_energy: 0.04 # pJ
+  multiplier_area: 1 # unit
+  dimensions: [D1, D2, D3, D4]
+  sizes: [8, 8, 4, 4]