diff --git a/.github/workflows/.pylintrc b/.github/workflows/.pylintrc index 24bc5cf1..0529b013 100644 --- a/.github/workflows/.pylintrc +++ b/.github/workflows/.pylintrc @@ -259,7 +259,7 @@ max-module-lines=99999 # spaces. Google's externaly-published style guide says 4, consistent with # PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google # projects (like TensorFlow). -indent-string=' ' +indent-string=' ' # Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 diff --git a/example.py b/example.py new file mode 100644 index 00000000..fe73e676 --- /dev/null +++ b/example.py @@ -0,0 +1,40 @@ +import pickle +from zigzag import api +from zigzag.visualization.results.plot_cme import ( + bar_plot_cost_model_evaluations_breakdown, +) +from zigzag.visualization.results.print_mapping import print_mapping +from zigzag.visualization.graph.memory_hierarchy import visualize_memory_hierarchy_graph + + +model = "resnet" +workload_path = "inputs/workload/resnet18.onnx" +accelerator_path = "inputs/hardware/tpu_like.yaml" +mapping_path = "inputs/mapping/tpu_like.yaml" +pickle_filename = f"outputs/TPU-{model}-saved_list_of_cmes.pickle" + + +energy, latency, cmes = api.get_hardware_performance_zigzag( + workload=workload_path, + accelerator=accelerator_path, + mapping=mapping_path, + opt="energy", + pickle_filename=pickle_filename, +) +print(f"Total network energy = {energy:.2e} pJ") +print(f"Total network latency = {latency:.2e} cycles") + +with open(pickle_filename, "rb") as fp: + cmes = pickle.load(fp) + + +bar_plot_cost_model_evaluations_breakdown(cmes, save_path="outputs/plot_breakdown.png") + + +visualize_memory_hierarchy_graph( + cmes[0].accelerator.cores[0].memory_hierarchy, + save_path="outputs/mem_hierarchy.png", +) + +for cme in cmes: + print_mapping(cme) diff --git a/inputs/hardware/ascend_like.yaml b/inputs/hardware/ascend_like.yaml new file mode 100644 index 00000000..4a6bdeea --- /dev/null +++ b/inputs/hardware/ascend_like.yaml @@ -0,0 +1,169 @@ +name: ascend_like + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.01 + w_cost: 0.01 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D3, D4] + + rf_2B: + size: 16 + r_bw: 16 + w_bw: 16 + r_cost: 0.02 + w_cost: 0.02 + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D2] + + rf_64KB_I: + size: 65536 + r_bw: 512 + w_bw: 512 + r_cost: 26.56 + w_cost: 30.72 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + rf_64KB_W: + size: 65536 + r_bw: 2048 + w_bw: 2048 + r_cost: 50.16 + w_cost: 108.0 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_256KB_O: + size: 2097152 + r_bw: 2048 + w_bw: 2048 + r_cost: 123.2 + w_cost: 212.8 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_1MB_A: + size: 8388608 + r_bw: 4096 + w_bw: 4096 + r_cost: 465.6 + w_cost: 825.6 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + min_r_granularity: 64 + min_w_granularity: 64 + latency: 1 + operands: [I1, O] + ports: + - fh: w_port_1 + tl: r_port_1 + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_1MB_W: + size: 8388608 + r_bw: 4096 + w_bw: 4096 + r_cost: 465.6 + w_cost: 825.6 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + min_r_granularity: 64 + min_w_granularity: 64 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 700 + w_cost: 750 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.04 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2, D3, D4] + sizes: [16, 16, 2, 2] diff --git a/inputs/hardware/edge_tpu_like.yaml b/inputs/hardware/edge_tpu_like.yaml new file mode 100644 index 00000000..03bfad28 --- /dev/null +++ b/inputs/hardware/edge_tpu_like.yaml @@ -0,0 +1,111 @@ +name: edge_tpu_like + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.01 + w_cost: 0.01 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D3, D4] + + rf_2B: + size: 16 + r_bw: 16 + w_bw: 16 + r_cost: 0.02 + w_cost: 0.02 + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D2] + + sram_32KB: + size: 262144 + r_bw: 512 + w_bw: 512 + r_cost: 22.9 + w_cost: 52.01 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_2MB: + size: 16777216 + r_bw: 2048 + w_bw: 2048 + r_cost: 416.16 + w_cost: 378.4 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1, O] + ports: + - fh: w_port_1 + tl: r_port_1 + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 700 + w_cost: 750 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2, D3, D4] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.04 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2, D3, D4] + sizes: [8, 8, 4, 4] diff --git a/inputs/hardware/eyeriss_like.yaml b/inputs/hardware/eyeriss_like.yaml new file mode 100644 index 00000000..f6db5270 --- /dev/null +++ b/inputs/hardware/eyeriss_like.yaml @@ -0,0 +1,142 @@ +name: eyeriss_like + +memories: + rf_64B_A: + size: 512 + r_bw: 8 + w_bw: 8 + r_cost: 1.0 + w_cost: 1.5 + area: 0.3 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] + + rf_64B_W: + size: 512 + r_bw: 8 + w_bw: 8 + r_cost: 1.0 + w_cost: 1.5 + area: 0.3 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] + + rf_16B: + size: 128 + r_bw: 24 + w_bw: 24 + r_cost: 1.5 + w_cost: 2.0 + area: 0.95 + r_port: 1 + w_port: 1 + rw_port: 1 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [] + + sram_8KB: + size: 65536 + r_bw: 128 + w_bw: 128 + r_cost: 10.0 + w_cost: 15.0 + area: 3 + r_port: 0 + w_port: 1 + rw_port: 2 + latency: 1 + operands: [O] + ports: + - fh: rw_port_1 + tl: rw_port_2 + fl: rw_port_2 + th: rw_port_1 + served_dimensions: [D1, D2] + + sram_64KB: + size: 524288 + r_bw: 128 + w_bw: 128 + r_cost: 20 + w_cost: 25 + area: 6 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2] + + sram_1M: + size: 8388608 + r_bw: 384 + w_bw: 384 + r_cost: 100 + w_cost: 130 + area: 25 + r_port: 0 + w_port: 0 + rw_port: 2 + latency: 1 + operands: [I1, O] + ports: + - fh: rw_port_1 + tl: wr_port_2 + - fh: rw_port_1 + tl: rw_port_2 + fl: rw_port_2 + th: rw_port_1 + served_dimensions: [D1, D2] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 1000 + w_cost: 1000 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.5 # pJ + multiplier_area: 0.1 # unit + dimensions: [D1, D2] + sizes: [14, 12] diff --git a/inputs/hardware/meta_prototype.yaml b/inputs/hardware/meta_prototype.yaml new file mode 100644 index 00000000..1ca03e23 --- /dev/null +++ b/inputs/hardware/meta_prototype.yaml @@ -0,0 +1,149 @@ +name: meta_prototype + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.01 + w_cost: 0.01 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D3, D4] + + rf_2B: + size: 16 + r_bw: 16 + w_bw: 16 + r_cost: 0.02 + w_cost: 0.02 + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D2] + + sram_64KB: + size: 524288 + r_bw: 512 + w_bw: 512 + r_cost: 26.56 + w_cost: 30.8 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_32KB: + size: 262144 + r_bw: 256 + w_bw: 256 + r_cost: 13.28 + w_cost: 15.4 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_1MB_A: + size: 8388608 + r_bw: 1024 + w_bw: 1024 + r_cost: 208.08 + w_cost: 189.2 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1, O] + ports: + - fh: w_port_1 + tl: r_port_1 + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + sram_1MB_W: + size: 8388608 + r_bw: 1024 + w_bw: 1024 + r_cost: 208.08 + w_cost: 189.2 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3, D4] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 700 + w_cost: 750 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2, D3, D4] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.04 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2, D3, D4] + sizes: [32, 2, 4, 4] diff --git a/inputs/hardware/tesla_npu_like.yaml b/inputs/hardware/tesla_npu_like.yaml new file mode 100644 index 00000000..4e217788 --- /dev/null +++ b/inputs/hardware/tesla_npu_like.yaml @@ -0,0 +1,149 @@ +name: npu_like + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.01 + w_cost: 0.01 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D2, D3] + + rf_4B: + size: 32 + r_bw: 16 + w_bw: 16 + r_cost: 0.022 + w_cost: 0.022 + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [] + + sram_1KB_I: + size: 8192 + r_bw: 256 + w_bw: 256 + r_cost: 4.78 + w_cost: 5.59 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3] + + sram_1KB_W: + size: 8192 + r_bw: 256 + w_bw: 256 + r_cost: 4.78 + w_cost: 5.59 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3] + + sram_1MB_A: + size: 8388608 + r_bw: 1024 + w_bw: 1024 + r_cost: 208.08 + w_cost: 189.2 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1, O] + ports: + - fh: w_port_1 + tl: r_port_1 + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2, D3] + + sram_1MB_W: + size: 8388608 + r_bw: 1024 + w_bw: 1024 + r_cost: 208.08 + w_cost: 189.2 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 700 + w_cost: 750 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2, D3] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.04 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2, D3] + sizes: [32, 32, 4] diff --git a/inputs/hardware/tpu_like.yaml b/inputs/hardware/tpu_like.yaml new file mode 100644 index 00000000..e85d5721 --- /dev/null +++ b/inputs/hardware/tpu_like.yaml @@ -0,0 +1,92 @@ +name: tpu_like + +memories: + rf_128B: + size: 1024 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 + w_cost: 0.095 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] # Fully unrolled over all multipliers + + rf_2B: + size: 16 + r_bw: 16 + w_bw: 16 + r_cost: 0.021 + w_cost: 0.021 + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D2] + + sram_2MB: + size: 16777216 + r_bw: 2048 + w_bw: 2048 + r_cost: 416.16 + w_cost: 378.4 + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + min_r_granularity: 64 + min_w_granularity: 64 + operands: [I1, O] + ports: + - fh: w_port_1 + tl: r_port_1 + - fh: w_port_1 + tl: r_port_1 + fl: w_port_1 + th: r_port_1 + served_dimensions: [D1, D2] + + dram: + size: 10000000000 + r_bw: 64 + w_bw: 64 + r_cost: 700 + w_cost: 750 + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2] + +multipliers: + input_precision: [8, 8] + multiplier_energy: 0.04 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2] + sizes: [32, 32] diff --git a/inputs/mapping/ascend_like.yaml b/inputs/mapping/ascend_like.yaml new file mode 100644 index 00000000..51c72bc0 --- /dev/null +++ b/inputs/mapping/ascend_like.yaml @@ -0,0 +1,31 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 16 + D2: + - C, 16 + D3: + - OX, 2 + D4: + - OY, 2 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Add + core_allocation: [1] + spatial_mapping: + D1: + - G, 16 + D2: + - C, 1 + D3: + - OX, 1 + D4: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/inputs/mapping/default.yaml b/inputs/mapping/default.yaml new file mode 100644 index 00000000..2917fb49 --- /dev/null +++ b/inputs/mapping/default.yaml @@ -0,0 +1,11 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 32 + D2: + - C, 32 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/inputs/mapping/default_imc.yaml b/inputs/mapping/default_imc.yaml new file mode 100644 index 00000000..844cefcb --- /dev/null +++ b/inputs/mapping/default_imc.yaml @@ -0,0 +1,26 @@ +- name: default + core_allocation: [1] + memory_operand_links: + O: O + W: I2 + I: I1 + spatial_mapping_hint: + D1: + - K + - OX + D2: + - C + - FX + - FY + +- name: Add + core_allocation: [1] + memory_operand_links: + O: O + W: I2 + I: I1 + spatial_mapping_hint: + D1: + - G + D2: + - C diff --git a/inputs/mapping/edge_tpu_like.yaml b/inputs/mapping/edge_tpu_like.yaml new file mode 100644 index 00000000..8aad013e --- /dev/null +++ b/inputs/mapping/edge_tpu_like.yaml @@ -0,0 +1,47 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 8 + D2: + - C, 8 + D3: + - OX, 4 + D4: + - OY, 4 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Add + core_allocation: [1] + spatial_mapping: + D1: + - G, 8 + D2: + - C, 1 + D3: + - OX, 1 + D4: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Pooling + core_allocation: [1] + spatial_mapping: + D1: + - G, 8 + D2: + - C, 1 + D3: + - OX, 1 + D4: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/inputs/mapping/meta_prototype_like.yaml b/inputs/mapping/meta_prototype_like.yaml new file mode 100644 index 00000000..72d2078a --- /dev/null +++ b/inputs/mapping/meta_prototype_like.yaml @@ -0,0 +1,31 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 32 + D2: + - C, 2 + D3: + - OX, 4 + D4: + - OY, 4 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Add + core_allocation: [1] + spatial_mapping: + D1: + - G, 32 + D2: + - C, 1 + D3: + - OX, 1 + D4: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/inputs/mapping/tesla_npu_like.yaml b/inputs/mapping/tesla_npu_like.yaml new file mode 100644 index 00000000..79719105 --- /dev/null +++ b/inputs/mapping/tesla_npu_like.yaml @@ -0,0 +1,41 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 32 + D2: + - OX, 8 + D3: + - OY, 4 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Add + core_allocation: [1] + spatial_mapping: + D1: + - G, 32 + D2: + - OX, 1 + D3: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Pooling + core_allocation: [1] + spatial_mapping: + D1: + - G, 32 + D2: + - OX, 1 + D3: + - OY, 1 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/inputs/mapping/tpu_like.yaml b/inputs/mapping/tpu_like.yaml new file mode 100644 index 00000000..9528b97f --- /dev/null +++ b/inputs/mapping/tpu_like.yaml @@ -0,0 +1,35 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - K, 32 + D2: + - C, 32 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Add + core_allocation: [1] + spatial_mapping: + D1: + - G, 32 + D2: + - C, 1 + memory_operand_links: + O: O + W: I2 + I: I1 + +- name: Pooling + core_allocation: [1] + spatial_mapping: + D1: + - G, 32 + D2: + - C, 1 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/zigzag/inputs/examples/workload/.gitignore b/inputs/workload/.gitignore similarity index 100% rename from zigzag/inputs/examples/workload/.gitignore rename to inputs/workload/.gitignore diff --git a/zigzag/inputs/examples/workload/alexnet.onnx b/inputs/workload/alexnet.onnx similarity index 100% rename from zigzag/inputs/examples/workload/alexnet.onnx rename to inputs/workload/alexnet.onnx diff --git a/zigzag/inputs/examples/workload/mobilenetv2.onnx b/inputs/workload/mobilenetv2.onnx similarity index 100% rename from zigzag/inputs/examples/workload/mobilenetv2.onnx rename to inputs/workload/mobilenetv2.onnx diff --git a/zigzag/inputs/examples/workload/resnet18.onnx b/inputs/workload/resnet18.onnx similarity index 100% rename from zigzag/inputs/examples/workload/resnet18.onnx rename to inputs/workload/resnet18.onnx diff --git a/zigzag/inputs/examples/workload/resnet18.py b/inputs/workload/resnet18.py similarity index 100% rename from zigzag/inputs/examples/workload/resnet18.py rename to inputs/workload/resnet18.py diff --git a/inputs/workload/resnet18.yaml b/inputs/workload/resnet18.yaml new file mode 100644 index 00000000..be3fb6b7 --- /dev/null +++ b/inputs/workload/resnet18.yaml @@ -0,0 +1,456 @@ +- id: 0 # conv1 stride 2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 64, 3, 112, 112, 7, 7] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 0 + W: 0 + +- id: 1 # max pool, stride 2 + operator_type: Pooling + equation: O[b][g][oy][ox]+=W[fx][fy]*I[b][g][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, G, OY, OX, FX, FY] + loop_sizes: [1, 64, 56, 56, 3, 3] + operand_precision: + I: 8 + W: 0 + O: 16 + O_final: 8 + operand_source: + I: 0 + W: 1 # Constant operand + +- id: 2 # conv2_1 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 64, 64, 56, 56, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 1 + W: 2 + +- id: 3 # conv2_2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 64, 64, 56, 56, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 2 + W: 3 + +- id: 4 # Addition of layer 1 (residual path) and layer 3 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 64, 56, 56] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 1 + I: 3 + +- id: 5 # conv2_3 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 64, 64, 56, 56, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 4 + W: 5 + +- id: 6 # conv2_4 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 64, 64, 56, 56, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 5 + W: 6 + +- id: 7 # Addition of layer 4 (residual connection) and layer 6 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 64, 56, 56] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 4 + I: 6 + +- id: 8 # conv3_1 stride 2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 128, 64, 28, 28, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 7 + W: 8 + +- id: 9 # conv3_2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 128, 128, 28, 28, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 8 + W: 9 + +- id: 10 # conv downsample of layer 7 + operator_type: Conv_downsample + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 128, 64, 28, 28, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 7 + W: 10 + +- id: 11 # Addition of layer 10 (residual connection) and layer 9 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 128, 28, 28] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 10 + I: 9 + +- id: 12 # conv3_3 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 128, 128, 28, 28, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 11 + W: 12 + +- id: 13 # conv3_4 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 128, 128, 28, 28, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 12 + W: 13 + +- id: 14 # Addition of layer 11 (residual connection) and layer 13 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 128, 28, 28] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 11 + I: 13 + +- id: 15 # conv4_1 stride 2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 256, 128, 14, 14, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 14 + W: 15 + +- id: 16 # conv4_2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 256, 256, 14, 14, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 15 + W: 16 + +- id: 17 # conv downsample of layer 14 + operator_type: Conv_downsample + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 256, 128, 14, 14, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 14 + W: 17 + +- id: 18 # Addition of layer 17 (residual connection) and layer 16 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 256, 14, 14] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 17 + I: 16 + +- id: 19 # conv4_3 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 256, 256, 14, 14, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 18 + W: 19 + +- id: 20 # conv4_4 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 256, 256, 14, 14, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 19 + W: 20 + +- id: 21 # Addition of layer 18 (residual connection) and layer 20 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 256, 14, 14] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 18 + I: 20 + +- id: 22 # conv5_1 stride 2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 512, 256, 7, 7, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 21 + W: 22 + +- id: 23 # conv5_2 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 512, 512, 7, 7, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 22 + W: 23 + +- id: 24 # conv downsample of layer 21 + operator_type: Conv_downsample + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=2*ox+1*fx, iy=2*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 512, 256, 7, 7, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 21 + W: 24 + +- id: 25 # Addition of layer 24 (residual connection) and layer 23 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 512, 7, 7] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 24 + I: 23 + +- id: 26 # conv5_3 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 512, 512, 7, 7, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 25 + W: 26 + +- id: 27 # conv4_4 + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 512, 512, 7, 7, 3, 3] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 26 + W: 27 + +- id: 28 # Addition of layer 25 (residual connection) and layer 27 (main path) + operator_type: Add + equation: O[b][g][oy][ox]=W[b][g][oy][ox]+I[b][g][oy][ox] + loop_dims: [B, G, OY, OX] + loop_sizes: [1, 512, 7, 7] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + W: 25 + I: 27 + +- id: 29 # aver pool + operator_type: Pooling + equation: O[b][g][oy][ox]+=W[fx][fy]*I[b][g][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, G, OY, OX, FX, FY] + loop_sizes: [1, 512, 1, 1, 7, 7] + operand_precision: + I: 8 + W: 0 + O: 16 + O_final: 8 + operand_source: + I: 28 + W: 29 + +- id: 30 # fc + operator_type: Conv + equation: O[b][k][oy][ox]+=W[k][c][fy][fx]*I[b][c][iy][ix] + dimension_relations: [ix=1*ox+1*fx, iy=1*oy+1*fy] + loop_dims: [B, K, C, OY, OX, FY, FX] + loop_sizes: [1, 1000, 512, 1, 1, 1, 1] + operand_precision: + W: 8 + I: 8 + O: 16 + O_final: 8 + operand_source: + I: 29 + W: 30 diff --git a/main.py b/main.py index c44c8ca8..83a0d85f 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,8 @@ from zigzag.stages.MainStage import MainStage from zigzag.stages.SpatialMappingGeneratorStage import SpatialMappingGeneratorStage from zigzag.stages.WorkloadStage import WorkloadStage -from zigzag.stages.input_parser_stages import AcceleratorParserStage, WorkloadParserStage +from zigzag.stages.WorkloadParserStage import WorkloadParserStage +from zigzag.stages.AcceleratorParserStage import AcceleratorParserStage from zigzag.stages.reduce_stages import MinimalLatencyStage, SumStage from zigzag.stages.save_stages import CompleteSaveStage, PickleSaveStage, SimpleSaveStage from zigzag.stages.LomaStage import LomaStage diff --git a/main_onnx.py b/main_onnx.py index b2a94510..baee9ecf 100644 --- a/main_onnx.py +++ b/main_onnx.py @@ -6,7 +6,7 @@ from zigzag.stages.ONNXModelParserStage import ONNXModelParserStage from zigzag.stages.SpatialMappingGeneratorStage import SpatialMappingGeneratorStage from zigzag.stages.WorkloadStage import WorkloadStage -from zigzag.stages.input_parser_stages import AcceleratorParserStage +from zigzag.stages.AcceleratorParserStage import AcceleratorParserStage from zigzag.stages.reduce_stages import MinimalLatencyStage from zigzag.stages.save_stages import SimpleSaveStage from zigzag.stages.LomaStage import LomaStage diff --git a/main_onnx_salsa.py b/main_onnx_salsa.py index b31e1940..691b2833 100644 --- a/main_onnx_salsa.py +++ b/main_onnx_salsa.py @@ -35,7 +35,7 @@ from zigzag.stages.SalsaStage import SalsaStage from zigzag.stages.SpatialMappingGeneratorStage import SpatialMappingGeneratorStage from zigzag.stages.WorkloadStage import WorkloadStage -from zigzag.stages.input_parser_stages import AcceleratorParserStage +from zigzag.stages.AcceleratorParserStage import AcceleratorParserStage from zigzag.stages.reduce_stages import MinimalLatencyStage from zigzag.stages.save_stages import SimpleSaveStage diff --git a/requirements.txt b/requirements.txt index 729881dc..221ea7e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ tqdm multiprocessing_on_dill pyyaml pytest -typeguard \ No newline at end of file +typeguard +cerberus diff --git a/tests/main/test_origin/test_ascend_like.py b/tests/main/test_origin/test_ascend_like.py index 15c3578c..d7a6073b 100644 --- a/tests/main/test_origin/test_ascend_like.py +++ b/tests/main/test_origin/test_ascend_like.py @@ -3,34 +3,35 @@ from zigzag.api import get_hardware_performance_zigzag workloads = ( - "zigzag/inputs/examples/workload/alexnet.onnx", - "zigzag/inputs/examples/workload/mobilenetv2.onnx", - "zigzag/inputs/examples/workload/resnet18.onnx", - "zigzag.inputs.examples.workload.resnet18", + "inputs/workload/alexnet.onnx", + "inputs/workload/mobilenetv2.onnx", + "inputs/workload/resnet18.onnx", + "inputs/workload/resnet18.yaml", ) # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5737868753.12, 8696023), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913561726.0000005, 7359650), - "zigzag/inputs/examples/workload/resnet18.onnx": (1860918012.2400002, 3698589), - "zigzag.inputs.examples.workload.resnet18": (2345967030.96, 4779555), + "inputs/workload/alexnet.onnx": (5738188827.200001, 8696068.0), + "inputs/workload/mobilenetv2.onnx": (1913154775.600001, 7359658.0), + "inputs/workload/resnet18.onnx": (1860963861.6800003, 3698589.0), + "inputs/workload/resnet18.yaml": (2411783423.28, 4779709.0), } @pytest.fixture def mapping(): - return "zigzag.inputs.examples.mapping.ascend_like" + return "inputs/mapping/ascend_like.yaml" @pytest.fixture def accelerator(): - return "zigzag.inputs.examples.hardware.Ascend_like" + return "inputs/hardware/ascend_like.yaml" @pytest.mark.parametrize("workload", workloads) def test_api(workload, accelerator, mapping): (energy, latency, cmes) = get_hardware_performance_zigzag(workload, accelerator, mapping) (expected_energy, expected_latency) = ens_lats[workload] + print(f"'{workload}': ({energy}, {latency}),") assert energy == pytest.approx(expected_energy) assert latency == pytest.approx(expected_latency) diff --git a/tests/main/test_origin/test_edge_tpu_like.py b/tests/main/test_origin/test_edge_tpu_like.py index 98547c4a..5f244eb1 100644 --- a/tests/main/test_origin/test_edge_tpu_like.py +++ b/tests/main/test_origin/test_edge_tpu_like.py @@ -3,34 +3,35 @@ from zigzag.api import get_hardware_performance_zigzag workloads = ( - "zigzag/inputs/examples/workload/alexnet.onnx", - "zigzag/inputs/examples/workload/mobilenetv2.onnx", - "zigzag/inputs/examples/workload/resnet18.onnx", - "zigzag.inputs.examples.workload.resnet18", + "inputs/workload/alexnet.onnx", + "inputs/workload/mobilenetv2.onnx", + "inputs/workload/resnet18.onnx", + "inputs/workload/resnet18.yaml", ) # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5646369654.200001, 8221207), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1680400085.4500012, 3562331), - "zigzag/inputs/examples/workload/resnet18.onnx": (1902488242.3499994, 3333310), - "zigzag.inputs.examples.workload.resnet18": (2347758970.83, 4187369), + "inputs/workload/alexnet.onnx": (5657178248.6, 8221391.0), + "inputs/workload/mobilenetv2.onnx": (1680773377.4499998, 3562331.0), + "inputs/workload/resnet18.onnx": (1902637139.1499994, 3333310.0), + "inputs/workload/resnet18.yaml": (2413348670.67, 4268451.0), } @pytest.fixture def mapping(): - return "zigzag.inputs.examples.mapping.edge_tpu_like" + return "inputs/mapping/edge_tpu_like.yaml" @pytest.fixture def accelerator(): - return "zigzag.inputs.examples.hardware.Edge_TPU_like" + return "inputs/hardware/edge_tpu_like.yaml" @pytest.mark.parametrize("workload", workloads) def test_api(workload, accelerator, mapping): (energy, latency, cmes) = get_hardware_performance_zigzag(workload, accelerator, mapping) (expected_energy, expected_latency) = ens_lats[workload] + print(f"'{workload}': ({energy}, {latency}),") assert energy == pytest.approx(expected_energy) assert latency == pytest.approx(expected_latency) diff --git a/tests/main/test_origin/test_meta_prototype_like.py b/tests/main/test_origin/test_meta_prototype_like.py index 47ce3c12..8afbe549 100644 --- a/tests/main/test_origin/test_meta_prototype_like.py +++ b/tests/main/test_origin/test_meta_prototype_like.py @@ -3,34 +3,35 @@ from zigzag.api import get_hardware_performance_zigzag workloads = ( - "zigzag/inputs/examples/workload/alexnet.onnx", - "zigzag/inputs/examples/workload/mobilenetv2.onnx", - "zigzag/inputs/examples/workload/resnet18.onnx", - "zigzag.inputs.examples.workload.resnet18", + "inputs/workload/alexnet.onnx", + "inputs/workload/mobilenetv2.onnx", + "inputs/workload/resnet18.onnx", + "inputs/workload/resnet18.yaml", ) # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5766869514.52, 8338950), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1728388906.7599993, 3429446), - "zigzag/inputs/examples/workload/resnet18.onnx": (1868963025.12, 3366695), - "zigzag.inputs.examples.workload.resnet18": (2352271282.04, 4129027), + "inputs/workload/alexnet.onnx": (5771499135.4800005, 8338950.0), + "inputs/workload/mobilenetv2.onnx": (1728572789.1600003, 3429446.0), + "inputs/workload/resnet18.onnx": (1869036158.08, 3366695.0), + "inputs/workload/resnet18.yaml": (2418511845.2400002, 4130645.0), } @pytest.fixture def mapping(): - return "zigzag.inputs.examples.mapping.meta_prototype_like" + return "inputs/mapping/meta_prototype_like.yaml" @pytest.fixture def accelerator(): - return "zigzag.inputs.examples.hardware.Meta_prototype" + return "inputs/hardware/meta_prototype.yaml" @pytest.mark.parametrize("workload", workloads) def test_api(workload, accelerator, mapping): (energy, latency, cmes) = get_hardware_performance_zigzag(workload, accelerator, mapping) (expected_energy, expected_latency) = ens_lats[workload] + print(f"'{workload}': ({energy}, {latency}),") assert energy == pytest.approx(expected_energy) assert latency == pytest.approx(expected_latency) diff --git a/tests/main/test_origin/test_tesla_npu_like.py b/tests/main/test_origin/test_tesla_npu_like.py index caa08a58..20a3bf5c 100644 --- a/tests/main/test_origin/test_tesla_npu_like.py +++ b/tests/main/test_origin/test_tesla_npu_like.py @@ -3,34 +3,35 @@ from zigzag.api import get_hardware_performance_zigzag workloads = ( - "zigzag/inputs/examples/workload/alexnet.onnx", - "zigzag/inputs/examples/workload/mobilenetv2.onnx", - "zigzag/inputs/examples/workload/resnet18.onnx", - "zigzag.inputs.examples.workload.resnet18", + "inputs/workload/alexnet.onnx", + "inputs/workload/mobilenetv2.onnx", + "inputs/workload/resnet18.onnx", + "inputs/workload/resnet18.yaml", ) # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6129156613.576, 8442657), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1682873656.7980008, 2824122), - "zigzag/inputs/examples/workload/resnet18.onnx": (1863651442.3999999, 3380242), - "zigzag.inputs.examples.workload.resnet18": (2308838375.536, 4066942), + "inputs/workload/alexnet.onnx": (6131856327.976001, 8442657.0), + "inputs/workload/mobilenetv2.onnx": (1683001389.998, 2824122.0), + "inputs/workload/resnet18.onnx": (1863716799.84, 3380242.0), + "inputs/workload/resnet18.yaml": (2374655424.176, 4066942.0), } @pytest.fixture def mapping(): - return "zigzag.inputs.examples.mapping.tesla_npu_like" + return "inputs/mapping/tesla_npu_like.yaml" @pytest.fixture def accelerator(): - return "zigzag.inputs.examples.hardware.Tesla_NPU_like" + return "inputs/hardware/tesla_npu_like.yaml" @pytest.mark.parametrize("workload", workloads) def test_api(workload, accelerator, mapping): (energy, latency, cmes) = get_hardware_performance_zigzag(workload, accelerator, mapping) (expected_energy, expected_latency) = ens_lats[workload] + print(f"'{workload}': ({energy}, {latency}),") assert energy == pytest.approx(expected_energy) assert latency == pytest.approx(expected_latency) diff --git a/tests/main/test_origin/test_tpu_like.py b/tests/main/test_origin/test_tpu_like.py index b93f97c7..c7f3c6ba 100644 --- a/tests/main/test_origin/test_tpu_like.py +++ b/tests/main/test_origin/test_tpu_like.py @@ -3,34 +3,35 @@ from zigzag.api import get_hardware_performance_zigzag workloads = ( - "zigzag/inputs/examples/workload/alexnet.onnx", - "zigzag/inputs/examples/workload/mobilenetv2.onnx", - "zigzag/inputs/examples/workload/resnet18.onnx", - "zigzag.inputs.examples.workload.resnet18", + "inputs/workload/alexnet.onnx", + "inputs/workload/mobilenetv2.onnx", + "inputs/workload/resnet18.onnx", + "inputs/workload/resnet18.yaml", ) # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5562971337.551999, 9061821), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904302988.3070006, 23101112), - "zigzag/inputs/examples/workload/resnet18.onnx": (1795832911.4720004, 4158539), - "zigzag.inputs.examples.workload.resnet18": (2230898567.856, 4816575), + "inputs/workload/alexnet.onnx": (5567501203.632, 9061821.0), + "inputs/workload/mobilenetv2.onnx": (1904482765.907, 23101112.0), + "inputs/workload/resnet18.onnx": (1795904402.5120003, 4158539.0), + "inputs/workload/resnet18.yaml": (2296490149.296, 4906975.0), } @pytest.fixture def mapping(): - return "zigzag.inputs.examples.mapping.tpu_like" + return "inputs/mapping/tpu_like.yaml" @pytest.fixture def accelerator(): - return "zigzag.inputs.examples.hardware.TPU_like" + return "inputs/hardware/tpu_like.yaml" @pytest.mark.parametrize("workload", workloads) def test_api(workload, accelerator, mapping): (energy, latency, _) = get_hardware_performance_zigzag(workload, accelerator, mapping) (expected_energy, expected_latency) = ens_lats[workload] + print(f"{workload}: ({energy}, {latency}),") assert energy == pytest.approx(expected_energy) assert latency == pytest.approx(expected_latency) diff --git a/zigzag/__main__.py b/zigzag/__main__.py index 0e7ec95b..c62b5520 100644 --- a/zigzag/__main__.py +++ b/zigzag/__main__.py @@ -6,7 +6,7 @@ from zigzag.stages.ONNXModelParserStage import ONNXModelParserStage from zigzag.stages.SpatialMappingConversionStage import SpatialMappingConversionStage from zigzag.stages.WorkloadStage import WorkloadStage -from zigzag.stages.input_parser_stages import AcceleratorParserStage +from zigzag.stages.AcceleratorParserStage import AcceleratorParserStage from zigzag.stages.reduce_stages import MinimalLatencyStage from zigzag.stages.save_stages import SimpleSaveStage from zigzag.stages.LomaStage import LomaStage diff --git a/zigzag/api.py b/zigzag/api.py index f1ea84a9..b651d773 100644 --- a/zigzag/api.py +++ b/zigzag/api.py @@ -1,7 +1,7 @@ from onnx import ModelProto - -from typing import Any import re +from datetime import datetime +from typing import Any from zigzag.stages.CostModelStage import CostModelStage from zigzag.stages.MainStage import MainStage @@ -9,7 +9,8 @@ from zigzag.stages.PEArrayScalingStage import PEArrayScalingStage from zigzag.stages.SpatialMappingGeneratorStage import SpatialMappingGeneratorStage from zigzag.stages.WorkloadStage import WorkloadStage -from zigzag.stages.input_parser_stages import AcceleratorParserStage, WorkloadParserStage +from zigzag.stages.WorkloadParserStage import WorkloadParserStage +from zigzag.stages.AcceleratorParserStage import AcceleratorParserStage from zigzag.stages.reduce_stages import MinimalEDPStage, MinimalEnergyStage, MinimalLatencyStage, SumStage from zigzag.stages.save_stages import CompleteSaveStage, PickleSaveStage, SimpleSaveStage from zigzag.stages.LomaStage import LomaStage @@ -23,12 +24,12 @@ def get_hardware_performance_zigzag( accelerator: str, mapping: str | dict[str, dict[str, Any]], opt: str = "latency", - dump_filename_pattern: str = "outputs/{datetime}.json", + dump_filename_pattern: str = f"outputs/{datetime.now()}.json", pickle_filename: str = "outputs/list_of_cmes.pickle", lpf_limit: int = 6, ) -> tuple[float, float, list[tuple[CostModelEvaluationABC, Any]]]: """ - # TODO the API should probably be better documented + # TODO the API should be better documented """ # Initialize the logger import logging as _logging diff --git a/zigzag/cost_model/CostModelEvaluationForIMC.py b/zigzag/cost_model/CostModelEvaluationForIMC.py index 4aedc2a9..f75461eb 100644 --- a/zigzag/cost_model/CostModelEvaluationForIMC.py +++ b/zigzag/cost_model/CostModelEvaluationForIMC.py @@ -1,4 +1,6 @@ import logging +from termios import ICANON +from zigzag.hardware.architecture.ImcArray import ImcArray from zigzag.utils import pickle_deepcopy from zigzag.cost_model.cost_model import CostModelEvaluation, PortActivity @@ -25,6 +27,13 @@ class CostModelEvaluationForIMC(CostModelEvaluation): After initialization, the cost model evaluation is run. """ + def __init__(self): + super().__init__(...) # TODO + operational_array = self.accelerator.get_core(self.core_id).operational_array + self.imc_area = operational_array.total_area + assert isinstance(operational_array, ImcArray) + self.operational_array: ImcArray = operational_array + def run(self) -> None: """! Run the cost model evaluation.""" super().calc_memory_utilization() @@ -35,9 +44,8 @@ def run(self) -> None: def collect_area_data(self): # get imc area - operational_array = self.accelerator.get_core(self.core_id).operational_array - self.imc_area = operational_array.total_area - self.imc_area_breakdown = operational_array.area_breakdown + + self.imc_area_breakdown = self.operational_array.area_breakdown # get mem area self.mem_area = 0 self.mem_area_breakdown = {} diff --git a/zigzag/cost_model/cost_model.py b/zigzag/cost_model/cost_model.py index 5bc99ad4..d878a1df 100644 --- a/zigzag/cost_model/cost_model.py +++ b/zigzag/cost_model/cost_model.py @@ -5,6 +5,7 @@ from zigzag.cost_model.port_activity import PortActivity, PortBeginOrEndActivity from zigzag.datatypes import Constants, LayerOperand, MemoryOperand from zigzag.hardware.architecture.Accelerator import Accelerator +from zigzag.hardware.architecture.MemoryInstance import MemoryInstance from zigzag.mapping.Mapping import Mapping from zigzag.mapping.data_movement import DataDirection, FourWayDataMoving from zigzag.mapping.SpatialMappingInternal import SpatialMappingInternal @@ -298,7 +299,7 @@ def __init__( self.temporal_mapping = temporal_mapping self.access_same_data_considered_as_no_access = access_same_data_considered_as_no_access - self.core_id = layer.core_allocation + self.core_id = layer.core_allocation[0] core = accelerator.get_core(self.core_id) self.mem_level_list = core.memory_hierarchy.mem_level_list self.mem_hierarchy_dict = core.mem_hierarchy_dict @@ -1069,6 +1070,26 @@ def calc_overall_latency(self, cycles_per_mac: int = 1) -> None: self.MAC_utilization1 = MAC_utilization1 self.MAC_utilization2 = MAC_utilization2 + def get_total_inst_bandwidth(self, memory_instance: MemoryInstance) -> FourWayDataMoving: + """Given a cost model evaluation and a memory instance, compute the memory's total instantaneous bandwidth + required throughout the execution of the layer that corresponds to this CME. Returns empty bandwidth + requirements if the given memory instance is not included in this CME's memory hierarchy. + NOTE: this function is used in Stream + """ + # Check which operands require offchip memory throughout the computation + offchip_mem_operands: list[MemoryOperand] = [] + for op, memory_levels in self.mem_hierarchy_dict.items(): + last_mem_level = memory_levels[-1] + if last_mem_level.memory_instance == memory_instance: + offchip_mem_operands.append(op) + # Obtain the required instantaneous bandwidth to/from offchip for these operands + total_inst_bw = FourWayDataMoving(0, 0, 0, 0) + for mem_op in offchip_mem_operands: + layer_op = self.layer.memory_operand_links.mem_to_layer_op(mem_op) + inst_bw_4way = self.mapping.unit_mem_data_movement[layer_op][-1].req_mem_bw_inst + total_inst_bw += inst_bw_4way + return total_inst_bw + def __str__(self): return f"CostModelEvaluation({self.layer}, core {self.core_id})" diff --git a/zigzag/datatypes.py b/zigzag/datatypes.py index 74d76ed2..9d0ed5c3 100644 --- a/zigzag/datatypes.py +++ b/zigzag/datatypes.py @@ -2,6 +2,8 @@ import re from typing import Any, TypeAlias +from zigzag.parser.AcceleratorValidator import AcceleratorValidator + class OperandABC(metaclass=ABCMeta): """! Abstract Base Class for all dimension- and operand-like classes""" @@ -77,7 +79,7 @@ class OADimension(OperandABC): """! Operational Array Dimension""" def __init__(self, name: str): - assert bool(re.match(r"D\d", name)), f"OADimension {name} does not resemble `D1`" + assert bool(re.match(AcceleratorValidator.DIMENSION_REGEX, name)), f"OADimension {name} does not resemble `D1`" super().__init__(name) def __eq__(self, other: Any): @@ -86,29 +88,17 @@ def __eq__(self, other: Any): def __hash__(self): return hash(self.name) - @staticmethod - def parse_user_input(x: str): - assert bool(re.match(r"D\d", x)), f"OADimension {x} does not resemble `D1`" - return OADimension(x) - class Constants: """! Store constant objects used throughout ZigZag (instead of hardcoding them)""" - # Intermediate output operand. Hard coded, and must be specified by the user as such - OUTPUT_OPERAND_STR = "O" - # Final output operand after scaling. Hard coded, and must be specified by the user as such - FINAL_OUTPUT_OPERAND_STR = "O_final" - - OUTPUT_LAYER_OP = LayerOperand(OUTPUT_OPERAND_STR) - FINAL_OUTPUT_LAYER_OP = LayerOperand(FINAL_OUTPUT_OPERAND_STR) - OUTPUT_MEM_OP = MemoryOperand(OUTPUT_OPERAND_STR) - FINAL_OUTPUT_MEM_OP = MemoryOperand(FINAL_OUTPUT_OPERAND_STR) + OUTPUT_LAYER_OP = LayerOperand(AcceleratorValidator.OUTPUT_OPERAND_STR) + FINAL_OUTPUT_LAYER_OP = LayerOperand(AcceleratorValidator.FINAL_OUTPUT_OPERAND_STR) + OUTPUT_MEM_OP = MemoryOperand(AcceleratorValidator.OUTPUT_OPERAND_STR) + FINAL_OUTPUT_MEM_OP = MemoryOperand(AcceleratorValidator.FINAL_OUTPUT_OPERAND_STR) - MEM_OP_1_STR = "I1" - MEM_OP_2_STR = "I2" - MEM_OP_1 = MemoryOperand(MEM_OP_1_STR) - MEM_OP_2 = MemoryOperand(MEM_OP_2_STR) + MEM_OP_1 = MemoryOperand(AcceleratorValidator.MEM_OP_1_STR) + MEM_OP_2 = MemoryOperand(AcceleratorValidator.MEM_OP_2_STR) ###### Type aliases ###### diff --git a/zigzag/hardware/architecture/Accelerator.py b/zigzag/hardware/architecture/Accelerator.py index 7c620848..eb880835 100644 --- a/zigzag/hardware/architecture/Accelerator.py +++ b/zigzag/hardware/architecture/Accelerator.py @@ -21,7 +21,7 @@ def __jsonrepr__(self): """! JSON representation used for saving this object to a json file.""" return json_repr_handler({"name": self.name, "cores": self.cores}) - def get_core(self, core_id: int | str) -> Core: + def get_core(self, core_id: int) -> Core: """! Return the core with id 'core_id'. Raises ValueError() when a core_id is not found in the available cores. """ diff --git a/zigzag/hardware/architecture/AimcArray.py b/zigzag/hardware/architecture/AimcArray.py index 4460e979..9f63146a 100644 --- a/zigzag/hardware/architecture/AimcArray.py +++ b/zigzag/hardware/architecture/AimcArray.py @@ -10,6 +10,7 @@ from zigzag.hardware.architecture.DimcArray import UserSpatialMappingGenerator + if __name__ == "__main__": from imc_unit import ImcUnit from DimcArray import DimcArray diff --git a/zigzag/hardware/architecture/Core.py b/zigzag/hardware/architecture/Core.py index d43f9c2f..7fe970e4 100644 --- a/zigzag/hardware/architecture/Core.py +++ b/zigzag/hardware/architecture/Core.py @@ -1,8 +1,10 @@ from zigzag.datatypes import MemoryOperand +from zigzag.hardware.architecture.MemoryInstance import MemoryInstance from zigzag.hardware.architecture.memory_level import MemoryLevel from zigzag.hardware.architecture.operational_array import OperationalArray from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy +from zigzag.mapping.spatial_mapping import SpatialMapping from zigzag.utils import json_repr_handler @@ -14,14 +16,18 @@ class Core: def __init__( self, - id: int, + core_id: int, operational_array: OperationalArray, memory_hierarchy: MemoryHierarchy, + dataflows: SpatialMapping | None = None, ): - self.id = id + self.id = core_id self.operational_array = operational_array self.memory_hierarchy = memory_hierarchy + self.mem_hierarchy_dict: dict[MemoryOperand, list[MemoryLevel]] = {} + + self.dataflows = dataflows self.recalculate_memory_hierarchy_information() def get_memory_level(self, mem_op: MemoryOperand, mem_lv: int) -> MemoryLevel: @@ -90,6 +96,13 @@ def __generate_memory_sharing_list(self): self.mem_sharing_list = memory_sharing_list + def get_top_memory_instance(self, mem_op: MemoryOperand) -> MemoryInstance: + if mem_op not in self.memory_hierarchy.get_operands(): + raise ValueError(f"Memory operand {mem_op} not in {self}.") + mem_level = self.memory_hierarchy.get_operand_top_level(mem_op) + mem_instance = mem_level.memory_instance + return mem_instance + def get_memory_bw_dict(self): return self.mem_r_bw_dict, self.mem_w_bw_dict diff --git a/zigzag/hardware/architecture/DimcArray.py b/zigzag/hardware/architecture/DimcArray.py index 68982c8f..31477b1c 100644 --- a/zigzag/hardware/architecture/DimcArray.py +++ b/zigzag/hardware/architecture/DimcArray.py @@ -24,6 +24,7 @@ import logging as _logging from zigzag.hardware.architecture.imc_unit import ImcUnit + class UserSpatialMappingGenerator: """Dummy class to get rid of ruff lint check warnings. This should be removed and the code should be updated accordingly. diff --git a/zigzag/hardware/architecture/MemoryHierarchy.py b/zigzag/hardware/architecture/MemoryHierarchy.py index f302a8b7..b60ff555 100644 --- a/zigzag/hardware/architecture/MemoryHierarchy.py +++ b/zigzag/hardware/architecture/MemoryHierarchy.py @@ -5,8 +5,8 @@ from zigzag.datatypes import MemoryOperand from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.memory_level import MemoryLevel, ServedMemDimensions, ServedMemDimsUserFormat -from zigzag.hardware.architecture.memory_port import PortAllocUserFormat, PortAllocation +from zigzag.hardware.architecture.memory_level import MemoryLevel, ServedMemDimensions +from zigzag.hardware.architecture.memory_port import PortAllocation from zigzag.hardware.architecture.operational_array import OperationalArray from zigzag.utils import json_repr_handler @@ -42,9 +42,9 @@ def __init__( def add_memory( self, memory_instance: MemoryInstance, - operands: tuple[str, ...], - port_alloc: PortAllocUserFormat = (), - served_dimensions: ServedMemDimsUserFormat = (), + operands: list[MemoryOperand], + port_alloc: PortAllocation, + served_dimensions: ServedMemDimensions, ): """! Adds a memory to the memory hierarchy graph. NOTE: memory level need to be added from bottom level (e.g., Reg) to top level (e.g., DRAM) for each operand !!! @@ -54,20 +54,11 @@ def add_memory( Edges are added from all sink nodes in the graph to this node if the memory operands match @param memory_instance: The MemoryInstance containing the different memory characteristics. @param operands: The memory operands the memory level stores. - @param served_dimensions: The operational array dimensions this memory level serves. Default: no served - dimensions -> unroll over every Operational Array unit + @param served_dimensions: The operational array dimensions this memory level serves. Default: no served dimensions -> unroll over every Operational Array unit """ - operands_parsed: list[MemoryOperand] = [MemoryOperand(x) for x in operands] - served_dims_parsed = ServedMemDimensions.parse_user_format(served_dimensions) - - port_alloc_parsed: PortAllocation = ( - PortAllocation.get_default(operands_parsed) - if port_alloc == () - else PortAllocation.parse_user_input(port_alloc, operands_parsed) - ) # Add the memory operands to the self.operands set attribute that stores all memory operands. - for mem_op in operands_parsed: + for mem_op in operands: if mem_op not in self.operands: self.nb_levels[mem_op] = 1 self.operands.add(mem_op) @@ -77,7 +68,7 @@ def add_memory( # Compute which memory level this is for all the operands mem_level_of_operands: dict[MemoryOperand, int] = {} - for mem_op in operands_parsed: + for mem_op in operands: nb_levels_so_far = len([node for node in self.memory_nodes if mem_op in node.operands]) mem_level_of_operands[mem_op] = nb_levels_so_far @@ -85,17 +76,17 @@ def add_memory( memory_instance=memory_instance, operands=operands, mem_level_of_operands=mem_level_of_operands, - port_alloc=port_alloc_parsed, - served_dimensions=served_dims_parsed, + port_alloc=port_alloc, + served_dimensions=served_dimensions, operational_array=self.operational_array, - id=self.memory_level_id, + identifier=self.memory_level_id, ) self.mem_level_list.append(memory_level) self.memory_level_id += 1 # Pre-compute appropriate edges to_edge_from: set[MemoryLevel] = set() - for mem_op in operands_parsed: + for mem_op in operands: # Find top level memories of the operands for m in self.get_operator_top_level(mem_op)[0]: to_edge_from.add(m) @@ -120,11 +111,11 @@ def get_operands(self) -> set[MemoryOperand]: def get_inner_memories(self) -> list[MemoryLevel]: """! Returns the inner-most memory levels for all memory operands.""" - return [node for node, in_degree in self.in_degree() if in_degree == 0] + return [node for node, in_degree in self.in_degree() if in_degree == 0] # type: ignore def get_outer_memories(self) -> list[MemoryLevel]: """! Returns the outer-most memory levels for all memory operands.""" - return [node for node, out_degree in self.out_degree() if out_degree == 0] + return [node for node, out_degree in self.out_degree() if out_degree == 0] # type: ignore def get_top_memories(self) -> tuple[list[MemoryLevel], int]: """! Returns the 'top'-most MemoryLevels, where 'the' level of MemoryLevel is considered to be the largest diff --git a/zigzag/hardware/architecture/memory_level.py b/zigzag/hardware/architecture/memory_level.py index 9e7a0705..3485a71d 100644 --- a/zigzag/hardware/architecture/memory_level.py +++ b/zigzag/hardware/architecture/memory_level.py @@ -34,23 +34,10 @@ def to_vec_format(self, nb_oa_dims: int, nb_operands: int) -> tuple[set[tuple[in return tuple(vec_single_operand for _ in range(nb_operands)) + @property def nb_dims(self): return len(self.data) - def assert_valid(self, oa_dim_sizes: dict[OADimension, int]) -> None: - """! Return True iff: - - all served dimensions are contained within the given Operational Array Dimensions - (Not the other way around: the served dimensions are a subset of the Dimensions of the Operational Array) - @param oa_dims a list with OA Dimensions to compare to - """ - assert all( - [served_dim in oa_dim_sizes for served_dim in self] - ), f"""User-specified served dimensions {self.data} contains element not part of the Operational - Array Dimensions {oa_dim_sizes.keys()}""" - - def to_user_format(self) -> ServedMemDimsUserFormat: - return tuple(oa_dim.name for oa_dim in self) - def __eq__(self, other: Any): return ( isinstance(other, ServedMemDimensions) @@ -70,40 +57,29 @@ def __iter__(self): def __len__(self): return len(self.data) - @staticmethod - def parse_user_format(x: ServedMemDimsUserFormat) -> "ServedMemDimensions": - """! Initialize an instance from the given data in user format""" - assert isinstance(x, tuple), "User provided served memory dimensions must be a tuple" - assert all([isinstance(x, str) for x in x]) - - data = {OADimension.parse_user_input(oa_dim) for oa_dim in x} - return ServedMemDimensions(data) - class MemoryLevel: def __init__( self, memory_instance: MemoryInstance, - operands: tuple[str, ...], + operands: list[MemoryOperand], mem_level_of_operands: dict[MemoryOperand, int], port_alloc: PortAllocation, served_dimensions: ServedMemDimensions, operational_array: OperationalArray, - id: int, + identifier: int, ): """! Initialize the memory level in the hierarchy with the physical memory instance @param port_alloc: memory port allocation (physical memory port -> functional memory port) @param id: an identifier used for reference check. """ self.memory_instance = memory_instance - # Convert to MemoryOperand - self.operands = [MemoryOperand(x) for x in operands] + self.operands = operands self.mem_level_of_operands = mem_level_of_operands - self.oa_dim_sizes: dict[OADimension, int] = operational_array.oa_dim_sizes - self.id: int = id - self.served_dimensions: ServedMemDimensions = served_dimensions - self.served_dimensions.assert_valid(self.oa_dim_sizes) + self.oa_dim_sizes = operational_array.oa_dim_sizes + self.id: int = identifier + self.served_dimensions = served_dimensions self.name = self.memory_instance.name # To be compatible with legacy code @@ -143,7 +119,8 @@ def __allocate_ports(self): port_list.append(new_port) for i in range(1, rw_port_nb + 1): port_name = "rw_port_" + str(i) - port_bw = self.memory_instance.r_bw # we assume the read-write port has the same bw for read and write + # we assume the read-write port has the same bw for read and write + port_bw = self.memory_instance.r_bw port_bw_min = self.memory_instance.r_bw_min port_attr = MemoryPortType.READ_WRITE new_port = MemoryPort(port_name, port_bw, port_bw_min, port_attr) @@ -170,8 +147,10 @@ def __jsonrepr__(self): return str(self) def __update_formatted_string(self): - self.formatted_string = f"""MemoryLevel(instance={self.memory_instance.name},operands={self.operands}, - served_dimensions={self.served_dimensions})""" + self.formatted_string = ( + f"MemoryLevel(instance={self.memory_instance.name},operands={self.operands}, " + f"served_dimensions={self.served_dimensions})" + ) def __str__(self): self.__update_formatted_string() diff --git a/zigzag/hardware/architecture/memory_port.py b/zigzag/hardware/architecture/memory_port.py index e6ecb972..bef7c487 100644 --- a/zigzag/hardware/architecture/memory_port.py +++ b/zigzag/hardware/architecture/memory_port.py @@ -2,7 +2,8 @@ import re from typing import Any, TypeAlias -from zigzag.datatypes import Constants, MemoryOperand +from zigzag.datatypes import MemoryOperand +from zigzag.parser.AcceleratorValidator import AcceleratorValidator class MemoryPortType(StrEnum): @@ -77,67 +78,19 @@ def __hash__(self): class PortAllocation: + """Port allocation for a single memory instance. Stores which ports are available for which memory operands and + their corresponding direction. + + """ + def __init__(self, data: dict[MemoryOperand, dict[DataDirection, str]]): assert all( [ - all([isinstance(v, str) and re.match(r"^[r]?[w]?_port_\d+$", v) for v in d.values()]) + all([isinstance(v, str) and re.match(AcceleratorValidator.PORT_REGEX, v) for v in d.values()]) for d in data.values() ] ) self.data = data - def get_alloc_for_mem_op(self, mem_op: MemoryOperand): + def get_alloc_for_mem_op(self, mem_op: MemoryOperand) -> dict[DataDirection, str]: return self.data[mem_op] - - @staticmethod - def get_default(mem_operands: list[MemoryOperand]) -> "PortAllocation": - data: dict[MemoryOperand, dict[DataDirection, str]] = dict() - for mem_op in mem_operands: - if mem_op == Constants.OUTPUT_MEM_OP: - data[mem_op] = { - DataDirection.WR_IN_BY_HIGH: "w_port_1", - DataDirection.WR_IN_BY_LOW: "w_port_1", - DataDirection.RD_OUT_TO_HIGH: "r_port_1", - DataDirection.RD_OUT_TO_LOW: "r_port_1", - } - else: - data[mem_op] = { - DataDirection.WR_IN_BY_HIGH: "w_port_1", - DataDirection.RD_OUT_TO_LOW: "r_port_1", - } - return PortAllocation(data) - - @staticmethod - def parse_user_input(x: PortAllocUserFormat, mem_operands: list[MemoryOperand]) -> "PortAllocation": - """! - The order of the port allocations matches the order of the MemoryOperands from the given list. - """ - - def translate_to_data_direction(x: str) -> DataDirection: - match x: - case "fh": - return DataDirection.WR_IN_BY_HIGH - case "fl": - return DataDirection.WR_IN_BY_LOW - case "th": - return DataDirection.RD_OUT_TO_HIGH - case "tl": - return DataDirection.RD_OUT_TO_LOW - case _: - raise ValueError(f"Data direction must be either `fh`, `th`, `fl`, or `tl`. Not {x}") - - assert isinstance(x, tuple) - assert all([isinstance(d, dict) for d in x]) - assert all([isinstance(d, dict) for d in x]) - assert all([all([isinstance(k, str) for k in d.keys()]) for d in x]) - assert all([all([isinstance(v, str) for v in d.values()]) for d in x]) - assert all( - [all([re.match(r"^[r]?[w]?_port_\d+$", v) for v in d.values()]) for d in x] - ), "Port name should follow the pattern `r_`, `w_` or `rw_port_1`" + str(x) - assert len(x) == len(mem_operands) - - data: dict[MemoryOperand, dict[DataDirection, str]] = { - mem_op: {translate_to_data_direction(k): v for k, v in x[idx].items()} - for idx, mem_op in enumerate(mem_operands) - } - return PortAllocation(data) diff --git a/zigzag/hardware/architecture/operational_array.py b/zigzag/hardware/architecture/operational_array.py index 3cdc592f..37c9611d 100644 --- a/zigzag/hardware/architecture/operational_array.py +++ b/zigzag/hardware/architecture/operational_array.py @@ -1,7 +1,6 @@ from typing import Any import numpy as np from zigzag.datatypes import OADimension -from zigzag.hardware.architecture.imc_unit import ImcUnit from zigzag.hardware.architecture.operational_unit import ( OperationalUnit, Multiplier, @@ -12,7 +11,7 @@ class OperationalArray: """! This class captures multi-dimensional operational array size.""" - def __init__(self, operational_unit: OperationalUnit, dimensions: dict[str, int]): + def __init__(self, operational_unit: OperationalUnit, dimensions: dict[OADimension, int]): """ @param operational_unit: an OperationalUnit object including precision and single operation energy, later we can add idle energy also (e.g. for situations that one or two of the input operands is zero). @@ -20,14 +19,8 @@ def __init__(self, operational_unit: OperationalUnit, dimensions: dict[str, int] """ self.unit: OperationalUnit = operational_unit self.total_unit_count = int(np.prod(list(dimensions.values()))) - self.oa_dim_sizes: dict[OADimension, int] = { - OADimension(oa_dim_str): size for oa_dim_str, size in dimensions.items() - } - - if isinstance(operational_unit, OperationalUnit): - self.total_area = operational_unit.area * self.total_unit_count - else: # branch for IMC - self.total_area = operational_unit.area + self.oa_dim_sizes = dimensions + self.total_area = operational_unit.area * self.total_unit_count def __jsonrepr__(self): return json_repr_handler({"operational_unit": self.unit, "dimensions": self.oa_dim_sizes}) @@ -43,39 +36,39 @@ class MultiplierArray(OperationalArray): def __init__( self, multiplier: Multiplier, - dimensions: dict[str, int], - operand_spatial_sharing: dict[str, set[tuple[int, ...]]] = {}, + dimensions: dict[OADimension, int], + operand_spatial_sharing: dict[str, set[tuple[int, ...]]] | None = None, ): super(MultiplierArray, self).__init__(multiplier, dimensions) self.multiplier = self.unit self.operand_spatial_sharing = operand_spatial_sharing -def multiplier_array_example1(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.5 - multiplier_area = 0.1 - dimensions = {"D1": 14, "D2": 3, "D3": 4} - operand_spatial_sharing = { - "I1": {(1, 0, 0)}, - "O": {(0, 1, 0)}, - "I2": {(0, 0, 1), (1, 1, 0)}, - } - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions, operand_spatial_sharing) +# def multiplier_array_example1(): +# """Multiplier array variables""" +# multiplier_input_precision = [8, 8] +# multiplier_energy = 0.5 +# multiplier_area = 0.1 +# dimensions = {"D1": 14, "D2": 3, "D3": 4} +# operand_spatial_sharing = { +# "I1": {(1, 0, 0)}, +# "O": {(0, 1, 0)}, +# "I2": {(0, 0, 1), (1, 1, 0)}, +# } +# multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) +# multiplier_array = MultiplierArray(multiplier, dimensions, operand_spatial_sharing) - return multiplier_array +# return multiplier_array -def multiplier_array_example2(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.5 - multiplier_area = 0.1 - dimensions = {"D1": 14, "D2": 12} - operand_spatial_sharing = {"I1": {(1, 0)}, "O": {(0, 1)}, "I2": {(1, 1)}} - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions, operand_spatial_sharing) +# def multiplier_array_example2(): +# """Multiplier array variables""" +# multiplier_input_precision = [8, 8] +# multiplier_energy = 0.5 +# multiplier_area = 0.1 +# dimensions = {"D1": 14, "D2": 12} +# operand_spatial_sharing = {"I1": {(1, 0)}, "O": {(0, 1)}, "I2": {(1, 1)}} +# multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) +# multiplier_array = MultiplierArray(multiplier, dimensions, operand_spatial_sharing) - return multiplier_array +# return multiplier_array diff --git a/zigzag/inputs/examples/hardware/Aimc.py b/zigzag/inputs/examples/hardware/Aimc.py deleted file mode 100755 index c9d805df..00000000 --- a/zigzag/inputs/examples/hardware/Aimc.py +++ /dev/null @@ -1,248 +0,0 @@ -""" -Analog In-Memory Computing (AIMC) core definition -This example will define an AIMC core with a single macro, sized 32 rows x 32 columns. -Supported operand precision: 8 bit -Technology node: 28 nm -The architecture hierarchy looks like: - ------- dram (I, W, O) ---------- - | | - sram (I, O) cell_group (W) - |-> reg_I1 (I) --> imc_array <--| - | | - | <---> reg_O1 (O) <--> | -""" - -import os, math -import random - -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core -from zigzag.hardware.architecture.ImcArray import ImcArray -from zigzag.hardware.architecture.get_cacti_cost import ( - get_w_cost_per_weight_from_cacti, -) -from zigzag.hardware.architecture.get_cacti_cost import get_cacti_cost - - -def memory_hierarchy_dut(imc_array, visualize=False): - """[OPTIONAL] Get w_cost of imc cell group from CACTI if required""" - cacti_path = "zigzag/classes/cacti/cacti_master" - tech_param = imc_array.unit.logic_unit.tech_param - hd_param = imc_array.unit.hd_param - dimensions = imc_array.unit.dimensions - output_precision = hd_param["input_precision"] + hd_param["weight_precision"] - if hd_param["enable_cacti"]: - # unit: pJ/weight writing - w_cost_per_weight_writing = get_w_cost_per_weight_from_cacti(cacti_path, tech_param, hd_param, dimensions) - else: - w_cost_per_weight_writing = hd_param["w_cost_per_weight_writing"] # user-provided value (unit: pJ/weight) - - # Memory hierarchy variables - # size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy) - cell_group = MemoryInstance( - name="cell_group", - size=hd_param["weight_precision"] * hd_param["group_depth"], - r_bw=hd_param["weight_precision"], - w_bw=hd_param["weight_precision"], - r_cost=0, - w_cost=w_cost_per_weight_writing, # unit: pJ/weight - area=0, # this area is already included in imc_array - r_port=0, # no standalone read port - w_port=0, # no standalone write port - rw_port=1, # 1 port for both reading and writing - latency=0, # no extra clock cycle required - ) - reg_I1 = MemoryInstance( - name="rf_I1", - size=hd_param["input_precision"], - r_bw=hd_param["input_precision"], - w_bw=hd_param["input_precision"], - r_cost=0, - w_cost=tech_param["dff_cap"] * (tech_param["vdd"] ** 2) * hd_param["input_precision"], # pJ/access - area=tech_param["dff_area"] * hd_param["input_precision"], # mm^2 - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O1 = MemoryInstance( - name="rf_O1", - size=output_precision, - r_bw=output_precision, - w_bw=output_precision, - r_cost=0, - w_cost=tech_param["dff_cap"] * (tech_param["vdd"] ** 2) * output_precision, # pJ/access - area=tech_param["dff_area"] * output_precision, # mm^2 - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_size = 256 * 1024 # unit: byte - sram_bw = max( - imc_array.unit.bl_dim_size * hd_param["input_precision"] * imc_array.unit.nb_of_banks, - imc_array.unit.wl_dim_size * output_precision * imc_array.unit.nb_of_banks, - ) - ac_time, sram_area, sram_r_cost, sram_w_cost = get_cacti_cost( - cacti_path, - tech_param["tech_node"], - "sram", - sram_size, - sram_bw, - hd_hash=str(hash((sram_size, sram_bw, random.randbytes(8)))), - ) - sram_256KB_256_3r_3w = MemoryInstance( - name="sram_256KB", - size=sram_size * 8, # byte -> bit - r_bw=sram_bw, - w_bw=sram_bw, - r_cost=sram_r_cost, - w_cost=sram_w_cost, - area=sram_area, - r_port=3, - w_port=3, - rw_port=0, - latency=1, - min_r_granularity=sram_bw // 16, # assume there are 16 sub-banks - min_w_granularity=sram_bw // 16, # assume there are 16 sub-banks - ) - - ####################################################################################################################### - - dram_size = 1 * 1024 * 1024 * 1024 # unit: byte - dram_ac_cost_per_bit = 3.7 # unit: pJ/bit - dram_bw = imc_array.unit.wl_dim_size * hd_param["weight_precision"] * imc_array.unit.nb_of_banks - dram_100MB_32_3r_3w = MemoryInstance( - name="dram_1GB", - size=dram_size * 8, # byte -> bit - r_bw=dram_bw, - w_bw=dram_bw, - r_cost=dram_ac_cost_per_bit * dram_bw, # pJ/access - w_cost=dram_ac_cost_per_bit * dram_bw, # pJ/access - area=0, - r_port=3, - w_port=3, - rw_port=0, - latency=1, - min_r_granularity=dram_bw // 16, # assume there are 16 sub-banks - min_w_granularity=dram_bw // 16, # assume there are 16 sub-banks - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=imc_array) - - # fh: from high = wr_in_by_high - # fl: from low = wr_in_by_low - # th: to high = rd_out_to_high - # tl: to low = rd_out_to_low - memory_hierarchy_graph.add_memory( - memory_instance=cell_group, - operands=("I2",), - port_alloc=({"fh": "rw_port_1", "tl": "rw_port_1"},), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_I1, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1",), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O1, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_256KB_256_3r_3w, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_2", "tl": "r_port_2", "fl": "w_port_3", "th": "r_port_3"}, - ), - served_dimensions=("D1", "D2", "D3"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram_100MB_32_3r_3w, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_2", "tl": "r_port_2"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_3", "th": "r_port_3"}, - ), - served_dimensions=("D1", "D2", "D3"), - ) - - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def imc_array_dut(): - """Multiplier array variables""" - tech_param = { # 28nm - "tech_node": 0.028, # unit: um - "vdd": 0.9, # unit: V - "nd2_cap": 0.7 / 1e3, # unit: pF - "xor2_cap": 0.7 * 1.5 / 1e3, # unit: pF - "dff_cap": 0.7 * 3 / 1e3, # unit: pF - "nd2_area": 0.614 / 1e6, # unit: mm^2 - "xor2_area": 0.614 * 2.4 / 1e6, # unit: mm^2 - "dff_area": 0.614 * 6 / 1e6, # unit: mm^2 - "nd2_dly": 0.0478, # unit: ns - "xor2_dly": 0.0478 * 2.4, # unit: ns - # "dff_dly": 0.0478*3.4, # unit: ns - } - hd_param = { - "pe_type": "in_sram_computing", # for in-memory-computing. Digital core for different values. - "imc_type": "analog", # "digital" or "analog" - "input_precision": 8, # activation precision - "weight_precision": 8, # weight precision - "input_bit_per_cycle": 2, # nb_bits of input/cycle (treated as DAC resolution) - "group_depth": 1, # #cells/multiplier - "adc_resolution": 8, # ADC resolution - "wordline_dimension": "D1", # hardware dimension where wordline is (corresponds to the served dimension of input regs) - "bitline_dimension": "D2", # hardware dimension where bitline is (corresponds to the served dimension of output regs) - "enable_cacti": True, # use CACTI to estimated cell array area cost (cell array exclude build-in logic part) - # Energy of writing weight. Required when enable_cacti is False. - # "w_cost_per_weight_writing": 0.08, # [OPTIONAL] unit: pJ/weight. - } - - dimensions = { - "D1": 4, # wordline dimension - "D2": 32, # bitline dimension - "D3": 1, # nb_macros (nb_arrays) - } # {"D1": ("K", 4), "D2": ("C", 32),} - hd_param["adc_resolution"] = hd_param["input_bit_per_cycle"] + 0.5 * int(math.log2(dimensions["D2"])) - - aimc_array = ImcArray(tech_param, hd_param, dimensions) - - return aimc_array - - -def cores_dut(): - imc_array1 = imc_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(imc_array1) - - core1 = Core(1, imc_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/Ascend_like.py b/zigzag/inputs/examples/hardware/Ascend_like.py deleted file mode 100644 index 7853d637..00000000 --- a/zigzag/inputs/examples/hardware/Ascend_like.py +++ /dev/null @@ -1,279 +0,0 @@ -import os -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_dut(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - - reg_W1 = MemoryInstance( - name="rf_1B", - size=8, - r_bw=8, - w_bw=8, - r_cost=0.01, - w_cost=0.01, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O1 = MemoryInstance( - name="rf_2B", - size=16, - r_bw=16, - w_bw=16, - r_cost=0.02, - w_cost=0.02, - area=0, - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_64KB_with_8_8K_64_1r_1w_I = MemoryInstance( - name="sram_64KB_I", - size=8192 * 8, - r_bw=64 * 8, - w_bw=64 * 8, - r_cost=3.32 * 8, - w_cost=3.84 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_64KB_with_8_8K_256_1r_1w_W = MemoryInstance( - name="sram_64KB_W", - size=8192 * 8, - r_bw=256 * 8, - w_bw=256 * 8, - r_cost=6.27 * 8, - w_cost=13.5 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_256KB_with_8_32KB_256_1r_1w_O = MemoryInstance( - name="sram_256KB_O", - size=32768 * 8 * 8, - r_bw=256 * 8, - w_bw=256 * 8, - r_cost=15.4 * 8, - w_cost=26.6 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_256KB_with_8_32KB_256_1r_1w_O_staging = MemoryInstance( - name="sram_256KB_O_staging", - size=32768 * 8 * 8 + 1, - r_bw=256 * 8, - w_bw=256 * 8, - r_cost=15.4 * 8, - w_cost=26.6 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_A = MemoryInstance( - name="sram_1MB_A", - size=131072 * 8 * 8, - r_bw=512 * 8, - w_bw=512 * 8, - r_cost=58.2 * 8, - w_cost=103.2 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_W = MemoryInstance( - name="sram_1MB_W", - size=131072 * 8 * 8, - r_bw=512 * 8, - w_bw=512 * 8, - r_cost=58.2 * 8, - w_cost=103.2 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - ####################################################################################################################### - - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=700, - w_cost=750, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - # we don't have unrolled I-Reg to better support G unrolling - # memory_hierarchy_graph.add_memory(memory_instance=reg_IW1, operands=('I1',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': None, 'th': None},), - # served_dimensions={(0, 0, 0, 0)}) - memory_hierarchy_graph.add_memory( - memory_instance=reg_W1, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O1, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_64KB_with_8_8K_256_1r_1w_W, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - memory_hierarchy_graph.add_memory( - memory_instance=sram_64KB_with_8_8K_64_1r_1w_I, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - memory_hierarchy_graph.add_memory( - memory_instance=sram_256KB_with_8_32KB_256_1r_1w_O, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - # memory_hierarchy_graph.add_memory(memory_instance=sram_256KB_with_8_32KB_256_1r_1w_O_staging, operands=('O',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': 'w_port_1', 'th': 'r_port_1'},), - # served_dimensions='all') - - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_W, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_A, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"}, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_dut(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.04 - multiplier_area = 1 - dimensions = { - "D1": 16, - "D2": 16, - "D3": 2, - "D4": 2, - } # {'D1': ('K', 16), 'D2': ('C', 16), 'D3': ('OX', 2), 'D4': ('OY', 2),} - - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/Dimc.py b/zigzag/inputs/examples/hardware/Dimc.py deleted file mode 100644 index dfa3fd94..00000000 --- a/zigzag/inputs/examples/hardware/Dimc.py +++ /dev/null @@ -1,248 +0,0 @@ -import os -import random -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core -from zigzag.hardware.architecture.ImcArray import ImcArray -from zigzag.hardware.architecture.get_cacti_cost import ( - get_w_cost_per_weight_from_cacti, -) -from zigzag.hardware.architecture.get_cacti_cost import get_cacti_cost - -# Digital In-Memory Computing (DIMC) core definition -# This example will define an DIMC core with a single macro, sized 32 rows x 32 columns. -# Supported operand precision: 8 bit -# Technology node: 28 nm -# The architecture hierarchy looks like: -# ------- dram (I, W, O) ---------- -# | | -# sram (I, O) cell_group (W) -# |-> reg_I1 (I) --> imc_array <--| -# | | -# | <---> reg_O1 (O) <--> | - - -def memory_hierarchy_dut(imc_array, visualize=False): - """[OPTIONAL] Get w_cost of imc cell group from CACTI if required""" - cacti_path = "zigzag/classes/cacti/cacti_master" - tech_param = imc_array.unit.logic_unit.tech_param - hd_param = imc_array.unit.hd_param - dimensions = imc_array.unit.dimensions - output_precision = hd_param["input_precision"] + hd_param["weight_precision"] - if hd_param["enable_cacti"]: - # unit: pJ/weight writing - w_cost_per_weight_writing = get_w_cost_per_weight_from_cacti(cacti_path, tech_param, hd_param, dimensions) - else: - w_cost_per_weight_writing = hd_param["w_cost_per_weight_writing"] # user-provided value (unit: pJ/weight) - - """Memory hierarchy variables""" - """ size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy) """ - cell_group = MemoryInstance( - name="cell_group", - size=hd_param["weight_precision"] * hd_param["group_depth"], - r_bw=hd_param["weight_precision"], - w_bw=hd_param["weight_precision"], - r_cost=0, - w_cost=w_cost_per_weight_writing, # unit: pJ/weight - area=0, # this area is already included in imc_array - r_port=0, # no standalone read port - w_port=0, # no standalone write port - rw_port=1, # 1 port for both reading and writing - latency=0, # no extra clock cycle required - ) - reg_I1 = MemoryInstance( - name="rf_I1", - size=hd_param["input_precision"], - r_bw=hd_param["input_precision"], - w_bw=hd_param["input_precision"], - r_cost=0, - w_cost=tech_param["dff_cap"] * (tech_param["vdd"] ** 2) * hd_param["input_precision"], # pJ/access - area=tech_param["dff_area"] * hd_param["input_precision"], # mm^2 - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O1 = MemoryInstance( - name="rf_O1", - size=output_precision, - r_bw=output_precision, - w_bw=output_precision, - r_cost=0, - w_cost=tech_param["dff_cap"] * (tech_param["vdd"] ** 2) * output_precision, # pJ/access - area=tech_param["dff_area"] * output_precision, # mm^2 - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_size = 256 * 1024 # unit: byte - sram_bw = max( - imc_array.unit.bl_dim_size * hd_param["input_precision"] * imc_array.unit.nb_of_banks, - imc_array.unit.wl_dim_size * output_precision * imc_array.unit.nb_of_banks, - ) - ac_time, sram_area, sram_r_cost, sram_w_cost = get_cacti_cost( - cacti_path, - tech_param["tech_node"], - "sram", - sram_size, - sram_bw, - hd_hash=str(hash((sram_size, sram_bw, random.randbytes(8)))), - ) - sram_256KB_256_3r_3w = MemoryInstance( - name="sram_256KB", - size=sram_size * 8, # byte -> bit - r_bw=sram_bw, - w_bw=sram_bw, - r_cost=sram_r_cost, - w_cost=sram_w_cost, - area=sram_area, - r_port=3, - w_port=3, - rw_port=0, - latency=1, - min_r_granularity=sram_bw // 16, # assume there are 16 sub-banks - min_w_granularity=sram_bw // 16, # assume there are 16 sub-banks - ) - - ####################################################################################################################### - - dram_size = 1 * 1024 * 1024 * 1024 # unit: byte - dram_ac_cost_per_bit = 3.7 # unit: pJ/bit - dram_bw = imc_array.unit.wl_dim_size * hd_param["weight_precision"] * imc_array.unit.nb_of_banks - dram_100MB_32_3r_3w = MemoryInstance( - name="dram_1GB", - size=dram_size * 8, # byte -> bit - r_bw=dram_bw, - w_bw=dram_bw, - r_cost=dram_ac_cost_per_bit * dram_bw, # pJ/access - w_cost=dram_ac_cost_per_bit * dram_bw, # pJ/access - area=0, - r_port=3, - w_port=3, - rw_port=0, - latency=1, - min_r_granularity=dram_bw // 16, # assume there are 16 sub-banks - min_w_granularity=dram_bw // 16, # assume there are 16 sub-banks - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=imc_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - memory_hierarchy_graph.add_memory( - memory_instance=cell_group, - operands=("I2",), - port_alloc=({"fh": "rw_port_1", "tl": "rw_port_1"},), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_I1, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1",), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O1, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_256KB_256_3r_3w, - operands=( - "I1", - "O", - ), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_2", "tl": "r_port_2", "fl": "w_port_3", "th": "r_port_3"}, - ), - served_dimensions=("D1", "D2", "D3"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram_100MB_32_3r_3w, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_2", "tl": "r_port_2"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_3", "th": "r_port_3"}, - ), - served_dimensions=("D1", "D2", "D3"), - ) - - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def imc_array_dut(): - """Multiplier array variables""" - tech_param = { # 28nm - "tech_node": 0.028, # unit: um - "vdd": 0.9, # unit: V - "nd2_cap": 0.7 / 1e3, # unit: pF - "xor2_cap": 0.7 * 1.5 / 1e3, # unit: pF - "dff_cap": 0.7 * 3 / 1e3, # unit: pF - "nd2_area": 0.614 / 1e6, # unit: mm^2 - "xor2_area": 0.614 * 2.4 / 1e6, # unit: mm^2 - "dff_area": 0.614 * 6 / 1e6, # unit: mm^2 - "nd2_dly": 0.0478, # unit: ns - "xor2_dly": 0.0478 * 2.4, # unit: ns - # "dff_dly": 0.0478*3.4, # unit: ns - } - hd_param = { - "pe_type": "in_sram_computing", # for in-memory-computing. Digital core for different values. - "imc_type": "digital", # "digital" or "analog" - "input_precision": 8, # activation precision expected in the hardware - "weight_precision": 8, # weight precision expected in the hardware - "input_bit_per_cycle": 1, # nb_bits of input/cycle/PE - "group_depth": 1, # #cells/multiplier - "wordline_dimension": "D1", # hardware dimension where wordline is (corresponds to the served dimension of input regs) - "bitline_dimension": "D2", # hardware dimension where bitline is (corresponds to the served dimension of output regs) - "enable_cacti": True, # use CACTI to estimated cell array area cost (cell array exclude build-in logic part) - # Energy of writing weight. Required when enable_cacti is False. - # "w_cost_per_weight_writing": 0.08, # [OPTIONAL] unit: pJ/weight. - } - - dimensions = { - "D1": 4, # wordline dimension - "D2": 32, # bitline dimension - "D3": 1, # nb_macros (nb_arrays) - } # e.g. {"D1": ("K", 4), "D2": ("C", 32),} - - imc_array = ImcArray(tech_param, hd_param, dimensions) - - return imc_array - - -def cores_dut(): - imc_array1 = imc_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(imc_array1) - - core1 = Core(1, imc_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/Edge_TPU_like.py b/zigzag/inputs/examples/hardware/Edge_TPU_like.py deleted file mode 100644 index 67e7f366..00000000 --- a/zigzag/inputs/examples/hardware/Edge_TPU_like.py +++ /dev/null @@ -1,188 +0,0 @@ -import os -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_dut(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - - reg_IW1 = MemoryInstance( - name="rf_1B", - size=8, - r_bw=8, - w_bw=8, - r_cost=0.01, - w_cost=0.01, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O1 = MemoryInstance( - name="rf_2B", - size=16, - r_bw=16, - w_bw=16, - r_cost=0.02, - w_cost=0.02, - area=0, - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_32KB_512_1r_1w = MemoryInstance( - name="sram_32KB", - size=32768 * 8, - r_bw=512, - w_bw=512, - r_cost=22.9, - w_cost=52.01, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_2M_with_16_128K_bank_128_1r_1w = MemoryInstance( - name="sram_2MB", - size=131072 * 16 * 8, - r_bw=128 * 16, - w_bw=128 * 16, - r_cost=26.01 * 16, - w_cost=23.65 * 16, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - ####################################################################################################################### - - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=700, - w_cost=750, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - # fh: from high = wr_in_by_high - # fl: from low = wr_in_by_low - # th: to high = rd_out_to_high - # tl: to low = rd_out_to_low - # we don't have unrolled I-Reg to better support G unrolling - # memory_hierarchy_graph.add_memory(memory_instance=reg_IW1, operands=('I1',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': None, 'th': None},), - # served_dimensions={(0, 0, 0, 0)}) - memory_hierarchy_graph.add_memory( - memory_instance=reg_IW1, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O1, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_32KB_512_1r_1w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_2M_with_16_128K_bank_128_1r_1w, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"}, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_dut(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.04 - multiplier_area = 1 - dimensions = { - "D1": 8, - "D2": 8, - "D3": 4, - "D4": 4, - } # {'D1': ('K', 8), 'D2': ('C', 8), 'D3': ('OX', 4), 'D4': ('OY', 4),} - - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/Eyeriss_like.py b/zigzag/inputs/examples/hardware/Eyeriss_like.py deleted file mode 100644 index 870b6f86..00000000 --- a/zigzag/inputs/examples/hardware/Eyeriss_like.py +++ /dev/null @@ -1,196 +0,0 @@ -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.memory_level import MemoryLevel -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_latency_test1(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - rf1 = MemoryInstance( - name="rf_64B", - size=512, - r_bw=8, - w_bw=8, - r_cost=1.0, - w_cost=1.5, - area=0.3, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) # rd E per bit 0.125 - rf2 = MemoryInstance( - name="rf_16B", - size=128, - r_bw=24, - w_bw=24, - r_cost=1.5, - w_cost=2, - area=0.95, - r_port=1, - w_port=1, - rw_port=1, - latency=1, - ) # rd E per bit 0.0625 - # lb1 = MemoryInstance(name="sram_64KB", size=524288, r_bw=128, w_bw=128, r_cost=20, w_cost=25, area=6, r_port=1, w_port=1, rw_port=0, latency=1) # rd E per bit 0.16 - lb2 = MemoryInstance( - name="sram_8KB", - size=65536, - r_bw=128, - w_bw=128, - r_cost=10, - w_cost=15, - r_port=0, - area=3, - w_port=0, - rw_port=2, - latency=1, - ) # rd E per bit 0.08 - lb2_64KB = MemoryInstance( - name="sram_64KB", - size=524288, - r_bw=128, - w_bw=128, - r_cost=20, - w_cost=25, - area=6, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) # rd E per bit 0.08 - gb = MemoryInstance( - name="sram_1M", - size=8388608, - r_bw=384, - w_bw=384, - r_cost=100, - w_cost=130, - area=25, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - ) # rd E per bit 0.26 - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=1000, - w_cost=1000, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) # rd E per bit 16 - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - memory_hierarchy_graph.add_memory( - memory_instance=rf1, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - ) - memory_hierarchy_graph.add_memory( - memory_instance=rf1, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - ) - memory_hierarchy_graph.add_memory( - memory_instance=rf2, - operands=("O",), - port_alloc=({"fh": "rw_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "rw_port_1"},), - ) - - memory_hierarchy_graph.add_memory( - memory_instance=lb2, - operands=("O",), - port_alloc=( - { - "fh": "rw_port_1", - "tl": "rw_port_2", - "fl": "rw_port_2", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=lb2_64KB, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=gb, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_2"}, - { - "fh": "rw_port_1", - "tl": "rw_port_2", - "fl": "rw_port_2", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_latency_test1(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.5 - multiplier_area = 0.1 - dimensions = {"D1": 14, "D2": 12} - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_latency_test1() - memory_hierarchy1 = memory_hierarchy_latency_test1(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -accelerator = Accelerator("Eyeriss-like-simple", cores) diff --git a/zigzag/inputs/examples/hardware/Meta_prototype.py b/zigzag/inputs/examples/hardware/Meta_prototype.py deleted file mode 100644 index 76ce063c..00000000 --- a/zigzag/inputs/examples/hardware/Meta_prototype.py +++ /dev/null @@ -1,236 +0,0 @@ -import os -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.memory_level import MemoryLevel -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_dut(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - - reg_IW1 = MemoryInstance( - name="rf_1B", - size=8, - r_bw=8, - w_bw=8, - r_cost=0.01, - w_cost=0.01, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O1 = MemoryInstance( - name="rf_2B", - size=16, - r_bw=16, - w_bw=16, - r_cost=0.02, - w_cost=0.02, - area=0, - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_64KB_with_8_8K_64_1r_1w = MemoryInstance( - name="sram_64KB", - size=8192 * 8 * 8, - r_bw=64 * 8, - w_bw=64 * 8, - r_cost=3.32 * 8, - w_cost=3.85 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_32KB_with_4_8K_64_1r_1w = MemoryInstance( - name="sram_32KB", - size=8192 * 4 * 8, - r_bw=64 * 4, - w_bw=64 * 4, - r_cost=3.32 * 4, - w_cost=3.85 * 4, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_A = MemoryInstance( - name="sram_1MB_A", - size=131072 * 8 * 8, - r_bw=128 * 8, - w_bw=128 * 8, - r_cost=26.01 * 8, - w_cost=23.65 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_W = MemoryInstance( - name="sram_1MB_W", - size=131072 * 8 * 8, - r_bw=128 * 8, - w_bw=128 * 8, - r_cost=26.01 * 8, - w_cost=23.65 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - ####################################################################################################################### - - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=700, - w_cost=750, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - # we don't have unrolled I-Reg to better support G unrolling - # memory_hierarchy_graph.add_memory(memory_instance=reg_IW1, operands=('I1',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': None, 'th': None},), - # served_dimensions={(0, 0, 0, 0)}) - memory_hierarchy_graph.add_memory( - memory_instance=reg_IW1, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O1, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_64KB_with_8_8K_64_1r_1w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_W, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - memory_hierarchy_graph.add_memory( - memory_instance=sram_32KB_with_4_8K_64_1r_1w, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_A, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"}, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2", "D3", "D4"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_dut(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.04 - multiplier_area = 1 - dimensions = { - "D1": 32, - "D2": 2, - "D3": 4, - "D4": 4, - } # {'D1': ('K', 32), 'D2': ('C', 2), 'D3': ('OX', 4), 'D4': ('OY', 4),} - - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/TPU_like.py b/zigzag/inputs/examples/hardware/TPU_like.py deleted file mode 100644 index 1e007eae..00000000 --- a/zigzag/inputs/examples/hardware/TPU_like.py +++ /dev/null @@ -1,163 +0,0 @@ -import os -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.memory_level import MemoryLevel -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_dut(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - - reg_W_128B = MemoryInstance( - name="rf_128B", - size=128 * 8, - r_bw=8, - w_bw=8, - r_cost=0.095, - w_cost=0.095, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O_2B = MemoryInstance( - name="rf_2B", - size=16, - r_bw=16, - w_bw=16, - r_cost=0.021, - w_cost=0.021, - area=0, - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - # sram_32KB_512_1r_1w = \ - # MemoryInstance(name="sram_32KB", size=32768 * 8, r_bw=512, w_bw=512, r_cost=22.9, w_cost=52.01, area=0, - # r_port=1, w_port=1, rw_port=0, latency=1, min_r_granularity=64, min_w_granularity=64) - - sram_2M_with_16_128K_bank_128_1r_1w = MemoryInstance( - name="sram_2MB", - size=131072 * 16 * 8, - r_bw=128 * 16, - w_bw=128 * 16, - r_cost=26.01 * 16, - w_cost=23.65 * 16, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - ####################################################################################################################### - - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=700, - w_cost=750, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - memory_hierarchy_graph.add_memory( - memory_instance=reg_W_128B, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O_2B, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - served_dimensions=("D2",), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_2M_with_16_128K_bank_128_1r_1w, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"}, - ), - served_dimensions=("D1", "D2"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_dut(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.04 - multiplier_area = 1 - dimensions = {"D1": 32, "D2": 32} # {'D1': ('K', 32), 'D2': ('C', 32)} - - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/hardware/Tesla_NPU_like.py b/zigzag/inputs/examples/hardware/Tesla_NPU_like.py deleted file mode 100644 index 92a4c015..00000000 --- a/zigzag/inputs/examples/hardware/Tesla_NPU_like.py +++ /dev/null @@ -1,237 +0,0 @@ -import os -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy -from zigzag.hardware.architecture.memory_level import MemoryLevel -from zigzag.hardware.architecture.operational_unit import Multiplier -from zigzag.hardware.architecture.operational_array import MultiplierArray -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance -from zigzag.hardware.architecture.Accelerator import Accelerator -from zigzag.hardware.architecture.Core import Core - - -def memory_hierarchy_dut(multiplier_array, visualize=False): - """! Memory hierarchy variables - size=#bit, bw=(read bw, write bw), cost=(read word energy, write work energy)""" - - reg_W1 = MemoryInstance( - name="rf_1B", - size=8, - r_bw=8, - w_bw=8, - r_cost=0.01, - w_cost=0.01, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - ) - - reg_O4 = MemoryInstance( - name="rf_4B", - size=32, - r_bw=16, - w_bw=16, - r_cost=0.022, - w_cost=0.022, - area=0, - r_port=2, - w_port=2, - rw_port=0, - latency=1, - ) - - ##################################### on-chip memory hierarchy building blocks ##################################### - - sram_1KB_256_1r_1w_I = MemoryInstance( - name="sram_1KB_I", - size=1024 * 8, - r_bw=256, - w_bw=256, - r_cost=4.78, - w_cost=5.59, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1KB_256_1r_1w_W = MemoryInstance( - name="sram_1KB_W", - size=1024 * 8, - r_bw=256, - w_bw=256, - r_cost=4.78, - w_cost=5.59, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_A = MemoryInstance( - name="sram_1MB_A", - size=131072 * 8 * 8, - r_bw=128 * 8, - w_bw=128 * 8, - r_cost=26.01 * 8, - w_cost=23.65 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - sram_1M_with_8_128K_bank_128_1r_1w_W = MemoryInstance( - name="sram_1MB_W", - size=131072 * 8 * 8, - r_bw=128 * 8, - w_bw=128 * 8, - r_cost=26.01 * 8, - w_cost=23.65 * 8, - area=0, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - min_r_granularity=64, - min_w_granularity=64, - ) - - ####################################################################################################################### - - dram = MemoryInstance( - name="dram", - size=10000000000, - r_bw=64, - w_bw=64, - r_cost=700, - w_cost=750, - area=0, - r_port=0, - w_port=0, - rw_port=1, - latency=1, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high - fl: from low = wr_in_by_low - th: to high = rd_out_to_high - tl: to low = rd_out_to_low - """ - # we don't have unrolled I-Reg to better support G unrolling - # memory_hierarchy_graph.add_memory(memory_instance=reg_IW1, operands=('I1',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': None, 'th': None},), - # served_dimensions={(0, 0, 0, 0)}) - memory_hierarchy_graph.add_memory( - memory_instance=reg_W1, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D2", "D3"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=reg_O4, - operands=("O",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"},), - ) - - ##################################### on-chip highest memory hierarchy initialization ##################################### - - memory_hierarchy_graph.add_memory( - memory_instance=sram_1KB_256_1r_1w_I, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_1KB_256_1r_1w_W, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3"), - ) - # memory_hierarchy_graph.add_memory(memory_instance=sram_2KB_with_2_1KB_256_1r_1w, operands=('O',), - # port_alloc=({'fh': 'w_port_1', 'tl': 'r_port_1', 'fl': 'w_port_1', 'th': 'r_port_1'},), - # served_dimensions='all') - - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_W, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1"},), - served_dimensions=("D1", "D2", "D3"), - ) - memory_hierarchy_graph.add_memory( - memory_instance=sram_1M_with_8_128K_bank_128_1r_1w_A, - operands=("I1", "O"), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1"}, - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_1", "th": "r_port_1"}, - ), - served_dimensions=("D1", "D2", "D3"), - ) - - #################################################################################################################### - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1"}, - {"fh": "rw_port_1", "tl": "rw_port_1"}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_1", - "th": "rw_port_1", - }, - ), - served_dimensions=("D1", "D2", "D3"), - ) - if visualize: - from zigzag.visualization.graph.memory_hierarchy import ( - visualize_memory_hierarchy_graph, - ) - - visualize_memory_hierarchy_graph(memory_hierarchy_graph) - return memory_hierarchy_graph - - -def multiplier_array_dut(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 0.04 - multiplier_area = 1 - dimensions = { - "D1": 32, - "D2": 8, - "D3": 4, - } # {'D1': ('K', 32), 'D2': ('OX', 8), 'D3': ('OY', 4),} - - multiplier = Multiplier(multiplier_input_precision, multiplier_energy, multiplier_area) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def cores_dut(): - multiplier_array1 = multiplier_array_dut() - memory_hierarchy1 = memory_hierarchy_dut(multiplier_array1) - - core1 = Core(1, multiplier_array1, memory_hierarchy1) - - return {core1} - - -cores = cores_dut() -acc_name = os.path.basename(__file__)[:-3] -accelerator = Accelerator(acc_name, cores) diff --git a/zigzag/inputs/examples/mapping/ascend_like.py b/zigzag/inputs/examples/mapping/ascend_like.py deleted file mode 100644 index 17a179e0..00000000 --- a/zigzag/inputs/examples/mapping/ascend_like.py +++ /dev/null @@ -1,22 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("K", 16), - "D2": ("C", 16), - "D3": ("OX", 2), - "D4": ("OY", 2), - }, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, - "Add": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("G", 16), - "D2": ("C", 1), - "D3": ("OX", 1), - "D4": ("OY", 1), - }, - "memory_operand_links": {"O": "O", "X": "I2", "Y": "I1"}, - }, -} diff --git a/zigzag/inputs/examples/mapping/default.py b/zigzag/inputs/examples/mapping/default.py deleted file mode 100644 index d355a9fe..00000000 --- a/zigzag/inputs/examples/mapping/default.py +++ /dev/null @@ -1,7 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("K", 32), "D2": ("C", 32)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - } -} diff --git a/zigzag/inputs/examples/mapping/default_imc.py b/zigzag/inputs/examples/mapping/default_imc.py deleted file mode 100755 index 978ef4cd..00000000 --- a/zigzag/inputs/examples/mapping/default_imc.py +++ /dev/null @@ -1,13 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - # "spatial_mapping": {"D1": ("OX", 25), "D2": (("FX", 3), ("FY", 3))}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - "spatial_mapping_hint": {"D1": ["K", "OX"], "D2": ["C", "FX", "FY"]}, - }, - "Add": { # to avoid errors when the workload is manually defined and contains Add layers. - "core_allocation": 1, - "memory_operand_links": {"O": "O", "X": "I2", "Y": "I1"}, - "spatial_mapping_hint": {"D1": ["G"], "D2": ["C"]}, - }, -} diff --git a/zigzag/inputs/examples/mapping/edge_tpu_like.py b/zigzag/inputs/examples/mapping/edge_tpu_like.py deleted file mode 100644 index 297d5f67..00000000 --- a/zigzag/inputs/examples/mapping/edge_tpu_like.py +++ /dev/null @@ -1,32 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("K", 8), - "D2": ("C", 8), - "D3": ("OX", 4), - "D4": ("OY", 4), - }, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, - "Add": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("G", 8), - "D2": ("C", 1), - "D3": ("OX", 1), - "D4": ("OY", 1), - }, - "memory_operand_links": {"O": "O", "X": "I2", "Y": "I1"}, - }, - "Pooling": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("G", 8), - "D2": ("C", 1), - "D3": ("OX", 1), - "D4": ("OY", 1), - }, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/zigzag/inputs/examples/mapping/meta_prototype_like.py b/zigzag/inputs/examples/mapping/meta_prototype_like.py deleted file mode 100644 index 7db185f2..00000000 --- a/zigzag/inputs/examples/mapping/meta_prototype_like.py +++ /dev/null @@ -1,22 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("K", 32), - "D2": ("C", 2), - "D3": ("OX", 4), - "D4": ("OY", 4), - }, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, - "Add": { - "core_allocation": 1, - "spatial_mapping": { - "D1": ("G", 32), - "D2": ("C", 1), - "D3": ("OX", 1), - "D4": ("OY", 1), - }, - "memory_operand_links": {"O": "O", "X": "I2", "Y": "I1"}, - }, -} diff --git a/zigzag/inputs/examples/mapping/tesla_npu_like.py b/zigzag/inputs/examples/mapping/tesla_npu_like.py deleted file mode 100644 index cd3b2cf3..00000000 --- a/zigzag/inputs/examples/mapping/tesla_npu_like.py +++ /dev/null @@ -1,17 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("K", 32), "D2": ("OX", 8), "D3": ("OY", 4)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, - "Add": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("G", 32), "D2": ("OX", 1), "D3": ("OY", 1)}, - "memory_operand_links": {"O": "O", "X": "I2", "Y": "I1"}, - }, - "Pooling": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("G", 32), "D2": ("OX", 1), "D3": ("OY", 1)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/zigzag/inputs/examples/mapping/tpu_like.py b/zigzag/inputs/examples/mapping/tpu_like.py deleted file mode 100644 index efa73943..00000000 --- a/zigzag/inputs/examples/mapping/tpu_like.py +++ /dev/null @@ -1,21 +0,0 @@ -mapping = { - "default": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("K", 32), "D2": ("C", 32)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, - "Add": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("G", 32), "D2": ("C", 1)}, - "memory_operand_links": { - "O": "O", - "X": "I2", - "Y": "I1", - }, - }, - "Pooling": { - "core_allocation": 1, - "spatial_mapping": {"D1": ("G", 32), "D2": ("C", 1)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/zigzag/inputs/examples/workload/mlperf_tiny/deepautoencoder.onnx b/zigzag/inputs/examples/workload/mlperf_tiny/deepautoencoder.onnx deleted file mode 100644 index b1df94b4..00000000 Binary files a/zigzag/inputs/examples/workload/mlperf_tiny/deepautoencoder.onnx and /dev/null differ diff --git a/zigzag/inputs/examples/workload/mlperf_tiny/ds_cnn.onnx b/zigzag/inputs/examples/workload/mlperf_tiny/ds_cnn.onnx deleted file mode 100644 index d70b918d..00000000 Binary files a/zigzag/inputs/examples/workload/mlperf_tiny/ds_cnn.onnx and /dev/null differ diff --git a/zigzag/inputs/examples/workload/mlperf_tiny/mobilenet_v1.onnx b/zigzag/inputs/examples/workload/mlperf_tiny/mobilenet_v1.onnx deleted file mode 100644 index c50f4d60..00000000 Binary files a/zigzag/inputs/examples/workload/mlperf_tiny/mobilenet_v1.onnx and /dev/null differ diff --git a/zigzag/inputs/examples/workload/mlperf_tiny/resnet8.onnx b/zigzag/inputs/examples/workload/mlperf_tiny/resnet8.onnx deleted file mode 100644 index e36519b4..00000000 Binary files a/zigzag/inputs/examples/workload/mlperf_tiny/resnet8.onnx and /dev/null differ diff --git a/zigzag/io/AcceleratorParser.py b/zigzag/io/AcceleratorParser.py deleted file mode 100644 index 9552d62d..00000000 --- a/zigzag/io/AcceleratorParser.py +++ /dev/null @@ -1,66 +0,0 @@ -import importlib -import logging - -logger = logging.getLogger(__name__) - - -class AcceleratorParser: - """! Parse an accelerator module path into an accelerator object""" - - def __init__(self, accelerator) -> None: - """ - Initialize the parser by checking if the provided argument is a module path or accelerator object - @param accelerator_path (str or Accelerator): The accelerator path or accelerator object - """ - if isinstance(accelerator, str): - self.accelerator_path = accelerator - self.accelerator = None - elif type(accelerator).__name__ == "Accelerator": - self.accelerator_path = None - self.accelerator = accelerator - else: - raise TypeError("Given accelerator is nor a module path string or an Accelerator object.") - - self.supported_accelerators = { - "ascend": "zigzag.inputs.examples.hardware.Ascend_like", - "edge-tpu": "zigzag.inputs.examples.hardware.Edge_TPU_like", - "eyeriss": "zigzag.inputs.examples.hardware.Eyeriss_like", - "meta-prototype": "zigzag.inputs.examples.hardware.Meta_prototype", - "tesla-npu": "zigzag.inputs.examples.hardware.Tesla_NPU_like", - "tpu": "zigzag.inputs.examples.hardware.TPU_like", - } - - def run(self): - if not self.accelerator: - try: - accelerator = self.parse_accelerator_from_path(self.accelerator_path) - except ModuleNotFoundError: - try: - accelerator = self.parse_supported_accelerator(self.accelerator_path) - except KeyError: - raise ValueError( - f"Provided accelerator path ({self.accelerator_path}) is not a valid module path, nor a supported standard accelerator. \ - Supported standard accelerators = {self.get_supported_accelerators()}" - ) - self.accelerator = accelerator - - @staticmethod - def parse_accelerator_from_path(accelerator_path): - """! Parse the input accelerator residing in accelerator_path - @param accelerator_path - """ - global module - module = importlib.import_module(accelerator_path) - accelerator = module.accelerator - logger.info(f"Parsed accelerator with cores {[core.id for core in accelerator.cores]}.") - return accelerator - - def parse_supported_accelerator(self, standard_accelerator): - accelerator_path = self.supported_accelerators[standard_accelerator] - return self.parse_accelerator_from_path(accelerator_path) - - def get_accelerator(self): - return self.accelerator - - def get_supported_accelerators(self): - return list(self.supported_accelerators.keys()) diff --git a/zigzag/io/onnx/ConvParser.py b/zigzag/io/onnx/ConvParser.py deleted file mode 100644 index 65ef815c..00000000 --- a/zigzag/io/onnx/ConvParser.py +++ /dev/null @@ -1,190 +0,0 @@ -from math import ceil -from typing import Any - - -from zigzag.io.onnx.Parser import Parser -from zigzag.io.onnx.utils import ( - get_attribute_ints_with_name, - get_node_input_output_dimension_shapes, - get_onnx_tensor_type, -) -from zigzag.workload.layer_attributes import LayerAttributes -from zigzag.workload.layer_node import LayerNode -from zigzag.utils import pickle_deepcopy - -import logging - -logger = logging.getLogger(__name__) - - -class ConvParser(Parser): - """! Parser for ONNX Conv and QLinearConv nodes into LayerNode.""" - - def __init__(self, node_id, node, nodes_outputs, mapping, onnx_model) -> None: - - super().__init__(node_id, node, nodes_outputs, mapping, onnx_model) - - def run(self) -> LayerNode: - """! Run the parser and return the created LayerNode object""" - layer_node = self.generate_layer_node_for_conv() - return layer_node - - def generate_layer_node_for_conv(self): - def get_weight_name(node): - """! Return the name of the weight input of this node depending on its operator type - @param node (NodeProto): The node - """ - op_type = node.op_type # 'Conv', 'QLinearConv', ... - if op_type == "Conv": - return node.input[1] - elif op_type == "QLinearConv": - return node.input[3] - else: - raise NotImplementedError(f"Retrieving weight name for onnx node of type {op_type} is not supported.") - - def get_input_output_weight_data_type(node, model): - """! Return the data type of the input, output and weight tensors of this node. - @param node - @param model - """ - input_name = node.input[0] - output_name = node.output[0] - weight_name = get_weight_name(node) - - input_elem_type = get_onnx_tensor_type(input_name, model).elem_type - output_elem_type = get_onnx_tensor_type(output_name, model).elem_type - weight_elem_type = get_onnx_tensor_type(weight_name, model).elem_type - - return input_elem_type, output_elem_type, weight_elem_type - - def get_layer_node_input_format( - kernel_shape, - strides, - dilations, - groups, - padding, - ia_shape, - oa_shape, - node_mapping, - ) -> dict[str, Any]: - """! Generate the necessary dictionary items required for the LayerNode creation. If there is no data for a given Layer Attribute, the Layer Attribute is not included in the returned dict.""" - # convert the data types to precisions based on the onnx definition - - # Equation - d = {} - # IMPORTANT: If any of the input loops require padding, they should be defined as the rightmost dimensions in the equation - # This is because we construct the dimensionality order and then add the padding to those last dimensions in the order - d["equation"] = "O[b][g][k][oy][ox]+=W[g][k][c][fy][fx]*I[b][g][c][iy][ix]" - - # Get dimension sizes from input parameters - assert ia_shape[0] == oa_shape[0], "Batch size is different for input and output activations." - B = oa_shape[0] - if B == 0: - B = 1 - G = groups - K = ceil(oa_shape[1] / G) - OX = oa_shape[3] - OY = oa_shape[2] - C = ceil(ia_shape[1] / G) - IX = ia_shape[3] - IY = ia_shape[2] - FX = kernel_shape[0] - FY = kernel_shape[1] - d["loop_dim_size"] = { - "B": B, - "K": K, - "G": G, - "OX": OX, - "OY": OY, - "C": C, - "FX": FX, - "FY": FY, - } - d["pr_loop_dim_size"] = {"IX": IX, "IY": IY} - d["dimension_relations"] = [ - f"ix={strides[0]}*ox+{dilations[0]}*fx", - f"iy={strides[1]}*oy+{dilations[1]}*fy", - ] - d["operand_precision"] = {"O": 16, "O_final": 8, "W": 8, "I": 8} - # d["operand_source"] = {'W': [], 'I': []} - d["constant_operands"] = ["W"] - d["core_allocation"] = node_mapping["core_allocation"] - d["memory_operand_links"] = node_mapping["memory_operand_links"] - - if "temporal_ordering" in node_mapping: - d["temporal_ordering"] = node_mapping["temporal_ordering"] - if "spatial_mapping" in node_mapping: - d["spatial_mapping"] = node_mapping["spatial_mapping"] - if "spatial_mapping_hint" in node_mapping: - d["spatial_mapping_hint"] = node_mapping["spatial_mapping_hint"] - - # Find the previous layer(s) that should be this node's parent(s) - node_inputs = self.node.input - preds = [] - for node_input in node_inputs: - for n in self.nodes_outputs: - if node_input in self.nodes_outputs[n]: - preds.append(n) - d["operand_source"] = {"I": preds} - - # Add padding information - - d["padding"] = { - "IY": (padding[0], padding[2]), - "IX": (padding[1], padding[3]), - } - - return d - - attrs = self.node.attribute - # Find kernel shape in attrs - kernel_shape = get_attribute_ints_with_name("kernel_shape", attrs, default=None) - # Find strides in attrs - strides = get_attribute_ints_with_name("strides", attrs, default=[1, 1]) - # Find dilation rate in attrs - dilations = get_attribute_ints_with_name("dilations", attrs, default=[1, 1]) - # Find number of groups in attrs - groups = get_attribute_ints_with_name("group", attrs, default=1) - # Find padding in attrs - padding = get_attribute_ints_with_name("pads", attrs, default=[0, 0, 0, 0]) - - # Get the input and output activation shapes - ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) - - # Get the input and output activation and weight data type (precision) - ia_data_type, oa_data_type, w_data_type = get_input_output_weight_data_type(self.node, self.onnx_model) - - # Get the hw mapping of this node. - if self.node.name in self.mapping: - node_mapping = self.mapping[self.node.name] - else: - try: - node_mapping = self.mapping["default"] - except: - raise ValueError(f"There is no mapping provided for node {self.node.name}, nor a default one.") - - # Take a deepcopy of the mapping, otherwise it will be changed for other layers if using default - node_mapping = pickle_deepcopy(node_mapping) - - node_attrs = get_layer_node_input_format( - kernel_shape, - strides, - dilations, - groups, - padding, - ia_dimension_shape, - oa_dimension_shape, - node_mapping, - ) - - node_obj = LayerNode( - self.node_id, - # NOTE we first generate the layer attributes in user input format and then parse to `LayerAttributes`. This is redundant - LayerAttributes.parse_user_input(node_attrs), - node_name=self.node.name, - layer_type=self.node.op_type.lower(), - ) - - logger.info(f"Parsed Conv node {self.node.name}") - - return node_obj diff --git a/zigzag/io/onnx/GemmParser.py b/zigzag/io/onnx/GemmParser.py deleted file mode 100644 index 5f6cbe0a..00000000 --- a/zigzag/io/onnx/GemmParser.py +++ /dev/null @@ -1,147 +0,0 @@ -from typing import Any - -from onnx import ModelProto, NodeProto -from zigzag.io.onnx.Parser import Parser -from zigzag.io.onnx.utils import ( - get_node_input_output_dimension_shapes, - get_attribute_ints_with_name, -) -from zigzag.workload.layer_attributes import LayerAttributes -from zigzag.workload.layer_node import LayerNode - -import logging - -logger = logging.getLogger(__name__) - - -class GemmParser(Parser): - """! Parses an ONNX Gemm operator into a LayerNode""" - - def run(self): - """! Run the parser""" - layer_node = self.generate_layer_node_for_gemm() - return layer_node - - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping: dict[str, dict[str, Any]] | None, - onnx_model: ModelProto | None, - ) -> None: - assert mapping is not None - assert onnx_model is not None - super().__init__(node_id, node, nodes_outputs, mapping, onnx_model) - - def generate_layer_node_for_gemm(self): - def get_layer_node_input_format( - B: int, C: int, K: int, node_mapping: dict[str, Any], nodes_outputs: dict[int, Any] - ) -> dict[str, Any]: - """! Generate the necessary dictionary items required for the Node creation.""" - # convert the data types to precisions based on the onnx definition - - # Equation - d: dict[str, Any] = {} - d["equation"] = "O[b][k]+=W[k][c]*I[b][c]" - - # Get dimension sizes from input parameters - K = K - C = C - B = B # Not to be confused with operand 'B' which is the weights - d["loop_dim_size"] = {"K": K, "C": C, "B": B} - d["dimension_relations"] = [] - d["operand_precision"] = {"O": 16, "O_final": 8, "W": 8, "I": 8} - d["operand_source"] = {"W": [], "I": []} - d["constant_operands"] = ["W"] - - d["core_allocation"] = node_mapping["core_allocation"] - d["memory_operand_links"] = {"O": "O", "W": "I2", "I": "I1"} - - if "temporal_ordering" in node_mapping: - d["temporal_ordering"] = node_mapping["temporal_ordering"] - if "spatial_mapping" in node_mapping: - d["spatial_mapping"] = node_mapping["spatial_mapping"] - if "spatial_mapping_hint" in node_mapping: - d["spatial_mapping_hint"] = node_mapping["spatial_mapping_hint"] - - # Find the previous layer(s) that should be this node's parent(s) - node_inputs = self.node.input - preds: list[int] = [] - for node_input in node_inputs: - for n in nodes_outputs: - if node_input in nodes_outputs[n]: - preds.append(n) - d["operand_source"] = {"I": preds} - - return d - - # Already verified in __init__, but helps with type hint - assert self.onnx_model is not None - ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) - - # The Gemm node includes flags for transpose of both of its inputs. - # If the first input is transposed, we need to transpose its shape here. - transA = get_attribute_ints_with_name("transA", self.node.attribute, default=0) - if transA: - assert len(ia_dimension_shape) == 2 - ia_dimension_shape = (ia_dimension_shape[1], ia_dimension_shape[0]) - - # If the input activations are empty (which can happen if there is a shape operator in the path) - # we try to extract the weights from the model graph initializer to get the correct input activation size - # TODO having a shape operator in the ONNX graph should be dealt with at a higher level - if not ia_dimension_shape: - weight_name = self.node.input[1] - initializer_names = [i.name for i in self.onnx_model.graph.initializer] - weight_name_index = initializer_names.index(weight_name) - # Get the weight dimensions - weights = self.onnx_model.graph.initializer[weight_name_index] - weight_dims = list(weights.dims) - assert ( - len(weight_dims) == 2 - ), f"There are {len(weight_dims)} weight dimensions for Gemm node {self.node.name}" - # Check if the weights are transposed - transB = get_attribute_ints_with_name("transB", self.node.attribute, default=0) - if transB: - weight_dims = [weight_dims[1], weight_dims[0]] - assert ( - len(oa_dimension_shape) == 2 - ), "Can't infer ia_dimension_shape if oa_dimension_shape is also not known." - B = oa_dimension_shape[0] - C = weight_dims[0] - ia_dimension_shape = [B, C] - - assert ( - len(ia_dimension_shape) == len(oa_dimension_shape) == 2 - ) # First element is batch size, second is input/output channel - assert ia_dimension_shape[0] == oa_dimension_shape[0] # Batch size should be the same for input and output - # If the batch size is 0, we discard it by setting it to 1 internally inside ZigZag - batch_size = ia_dimension_shape[0] - if batch_size == 0: - B = 1 - else: - B = batch_size - C = ia_dimension_shape[1] - K = oa_dimension_shape[1] - - # Get the hw mapping of this node. - if self.node.name in self.mapping: - node_mapping = self.mapping[self.node.name] - else: - try: - node_mapping = self.mapping["default"] - except: - raise ValueError(f"There is no mapping provided for node {self.node.name}, nor a default one.") - - node_attrs = get_layer_node_input_format(B, C, K, node_mapping, self.nodes_outputs) - node_obj = LayerNode( - self.node_id, - # NOTE we first generate the layer attributes in user input format and then parse to `LayerAttributes`. This is redundant - LayerAttributes.parse_user_input(node_attrs), - node_name=self.node.name, - layer_type=self.node.op_type.lower(), - ) - - logger.info(f"Parsed Gemm node {self.node.name}") - - return node_obj diff --git a/zigzag/io/onnx/MatMulParser.py b/zigzag/io/onnx/MatMulParser.py deleted file mode 100644 index 4a031a6e..00000000 --- a/zigzag/io/onnx/MatMulParser.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Any -import onnx -from onnx import ModelProto -from onnx import NodeProto - -from zigzag.io.onnx.Parser import Parser -from zigzag.io.onnx.utils import get_node_input_output_dimension_shapes -from zigzag.workload.layer_attributes import LayerAttributes -from zigzag.workload.layer_node import LayerNode - -import logging - -logger = logging.getLogger(__name__) - - -class MatMulParser(Parser): - """ - Parses an ONNX MatMul operator into a LayerNode - """ - - def run(self) -> LayerNode: - """Run the parser""" - layer_node = self.generate_layer_node_for_matmul() - return layer_node - - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping: dict[str, dict[str, Any]] | None, - onnx_model: ModelProto | None, - ) -> None: - assert mapping is not None - assert onnx_model is not None - super().__init__(node_id, node, nodes_outputs, mapping, onnx_model) - - def generate_layer_node_for_matmul(self): - def get_layer_node_input_format( - B: int, C: int, K: int, node_mapping: dict[str, Any], nodes_outputs: dict[int, Any] - ) -> dict[str, Any]: - """! Generate the necessary dictionary items required for the Node creation.""" - # convert the data types to precisions based on the onnx definition - - # Equation - d: dict[str, Any] = {} - d["equation"] = "O[b][k]+=B[k][c]*A[b][c]" - - # Get dimension sizes from input parameters - K = K - C = C - B = B # Not to be confused with operand 'B' which is the weights - d["loop_dim_size"] = {"K": K, "C": C, "B": B} - d["dimension_relations"] = [] - d["operand_precision"] = {"O": 16, "O_final": 8, "B": 8, "A": 8} - d["operand_source"] = {"B": [], "A": []} - d["constant_operands"] = ["B"] - - d["core_allocation"] = node_mapping["core_allocation"] - d["memory_operand_links"] = {"O": "O", "B": "I2", "A": "I1"} - - if "temporal_ordering" in node_mapping: - d["temporal_ordering"] = node_mapping["temporal_ordering"] - if "spatial_mapping" in node_mapping: - d["spatial_mapping"] = node_mapping["spatial_mapping"] - if "spatial_mapping_hint" in node_mapping: - d["spatial_mapping_hint"] = node_mapping["spatial_mapping_hint"] - - # Find the previous layer(s) that should be this node's parent(s) - node_inputs = self.node.input - preds = [] - for node_input in node_inputs: - for n in nodes_outputs: - if node_input in nodes_outputs[n]: - preds.append(n) - d["operand_source"] = {"A": preds} - - return d - - ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) - - # TODO it should be able to deal with tensors - assert ( - len(ia_dimension_shape) == len(oa_dimension_shape) == 2 - ) # First element is batch size, second is input/output channel - assert ia_dimension_shape[0] == oa_dimension_shape[0] # Batch size should be the same for input and output - # If the batch size is 0, we discard it by setting it to 1 internally inside ZigZag - batch_size = ia_dimension_shape[0] - if batch_size == 0: - B = 1 - else: - B = batch_size - C = ia_dimension_shape[1] - K = oa_dimension_shape[1] - - # Get the hw mapping of this node. - if self.node.name in self.mapping: - node_mapping = self.mapping[self.node.name] - else: - try: - node_mapping = self.mapping["default"] - except: - raise ValueError(f"There is no mapping provided for node {self.node.name}, nor a default one.") - - node_attrs = get_layer_node_input_format(B, C, K, node_mapping, self.nodes_outputs) - node_obj = LayerNode( - self.node_id, - # NOTE we first generate the layer attributes in user input format and then parse to `LayerAttributes`. This is redundant - LayerAttributes.parse_user_input(node_attrs), - node_name=self.node.name, - layer_type=self.node.op_type.lower(), - ) - - logger.info(f"Parsed MatMul node {self.node.name}") # pylint disable=W1203 - - return node_obj diff --git a/zigzag/io/onnx/Parser.py b/zigzag/io/onnx/Parser.py deleted file mode 100644 index 8dcfb8fd..00000000 --- a/zigzag/io/onnx/Parser.py +++ /dev/null @@ -1,28 +0,0 @@ -from abc import ABCMeta, abstractmethod -from typing import Any - -from onnx import ModelProto, NodeProto - -from zigzag.workload.DummyNode import DummyNode -from zigzag.workload.layer_node import LayerNode - - -class Parser(metaclass=ABCMeta): - """! Abstract base class that represents a parser of an onnx operator. Example: Conv, MatMul, etc.""" - - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping: dict[str, dict[str, Any]] | None, - onnx_model: ModelProto | None, - ) -> None: - self.node_id = node_id - self.node = node - self.nodes_outputs = nodes_outputs - self.mapping = mapping - self.onnx_model = onnx_model - - @abstractmethod - def run(self) -> LayerNode | DummyNode: ... diff --git a/zigzag/mapping/Mapping.py b/zigzag/mapping/Mapping.py index e3de77e4..0740ebae 100644 --- a/zigzag/mapping/Mapping.py +++ b/zigzag/mapping/Mapping.py @@ -364,7 +364,7 @@ def calc_data_access(self): if ( self.access_same_data_considered_as_no_access and mem_level == 0 - and self.accelerator.get_core(self.layer_node.core_allocation).mem_r_bw_dict[ + and self.accelerator.get_core(self.layer_node.core_allocation[0]).mem_r_bw_dict[ self.layer_node.memory_operand_links[operand] ][mem_level] >= self.data_bit_per_level[operand][mem_level] diff --git a/zigzag/mapping/spatial_mapping.py b/zigzag/mapping/spatial_mapping.py index a1e06d3e..8e04a74e 100644 --- a/zigzag/mapping/spatial_mapping.py +++ b/zigzag/mapping/spatial_mapping.py @@ -5,10 +5,11 @@ from zigzag.datatypes import OADimension, LayerDim, UnrollFactor, UnrollFactorInt from zigzag.workload.LayerAttribute import LayerAttribute -from zigzag.utils import json_repr_handler +from zigzag.utils import UniqueMessageFilter, json_repr_handler logger = logging.getLogger(__name__) +logger.addFilter(UniqueMessageFilter()) class MappingSingleOADim: @@ -16,7 +17,7 @@ class MappingSingleOADim: def __init__(self, data: dict[LayerDim, UnrollFactor]): # float type is used in `SpatialMappingConversionStage` - self.data: dict[LayerDim, UnrollFactor] | dict[LayerDim, float] = data + self.data: dict[LayerDim, UnrollFactor] = data @property def utilization(self): @@ -46,7 +47,7 @@ def __setitem__(self, key: LayerDim, value: UnrollFactor | float): self.data[key] = value # type: ignore def __str__(self): - return str({str(k): str(v) for k, v in self.items()}) + return str({str(k): str(v) for k, v in self.items()}).replace("'", "") def __repr__(self): return str(self) @@ -167,8 +168,8 @@ def check_and_reduce( max_unrolling = max_unrollings[oa_dim][layer_dim] if unrolling > max_unrolling: logger.warning( - """User provided spatial unrolling (%s:%i) in Dimension %s exceeded maximally allowed unrolling - of %i. Reducing unrolling to this value.""", + "User provided spatial unrolling (%s:%i) in Dimension %s exceeded maximally allowed unrolling " + "of %i. Reducing unrolling to this value.", layer_dim, unrolling, oa_dim, @@ -285,7 +286,7 @@ def copy(self) -> "SpatialMapping": return copy.deepcopy(self) def __str__(self): - return str({str(k): str(v) for k, v in self.items()}) + return str({str(k): str(v) for k, v in self.items()}).replace('"', "").replace("'", "") def __eq__(self, other: Any) -> bool: """! Return true if the contained dimensions are the same and all MappingSingleOADims are the same""" @@ -303,44 +304,6 @@ def __hash__(self): def empty() -> "SpatialMapping": return SpatialMapping({}) - @staticmethod - def parse_user_input(x: dict[str, tuple[str, int] | tuple[tuple[str, int], ...]]) -> "SpatialMapping": - """! Parse legacy notation - Example input: {"D1": ("OX", 25), "D2": (("FX", 3), ("FY", 3))} - NOTE: this does not (yet) check wether the input sizes are valid - """ - if x is None: # type: ignore - return SpatialMapping.empty() - - if isinstance(x, list): - raise NotImplementedError("No support for multiple provided spatial mappings by user") - - assert isinstance(x, dict) - - data: dict[OADimension, MappingSingleOADim] = {} - for k, v in x.items(): - assert isinstance(k, str) - assert isinstance(v, tuple) - oa_dim = OADimension.parse_user_input(k) - mapping_single_dim_dict: dict[LayerDim, UnrollFactor] = {} - - ## Nested layer dimensions e.g. (("FX", 3), ("FY", 3)) - if all([isinstance(elem, tuple) and len(elem) == 2 for elem in v]): - v_nested: tuple[tuple[str, int], ...] = v # type: ignore - for layer_dim, factor in v_nested: - assert isinstance(layer_dim, str) - assert isinstance(factor, int) - mapping_single_dim_dict[LayerDim(layer_dim)] = int(factor) - # e.g. ("OX", 3) - else: - assert len(v) == 2 - v_single: tuple[str, int] = v # type: ignore - layer_dim, factor = v_single - mapping_single_dim_dict[LayerDim(layer_dim)] = int(factor) - - data[oa_dim] = MappingSingleOADim(mapping_single_dim_dict) - return SpatialMapping(data) - class SpatialMappingHint(LayerAttribute): """! Suggested LayerDims to be unrolled for every OADimension""" @@ -358,12 +321,3 @@ def __getitem__(self, key: OADimension): @staticmethod def empty() -> "SpatialMappingHint": return SpatialMappingHint({}) - - @staticmethod - def parse_user_input(x: dict[str, list[str]]) -> "SpatialMappingHint": - if x is None: # type: ignore - return SpatialMappingHint.empty() - assert isinstance(x, dict) - return SpatialMappingHint( - {OADimension.parse_user_input(k): {LayerDim(layer_dim_str) for layer_dim_str in v} for k, v in x.items()} - ) diff --git a/zigzag/opt/loma/LomaEngine.py b/zigzag/opt/loma/LomaEngine.py index 38cc0ff5..d3f8d240 100644 --- a/zigzag/opt/loma/LomaEngine.py +++ b/zigzag/opt/loma/LomaEngine.py @@ -1,4 +1,4 @@ -from math import factorial, inf +from math import factorial import operator from typing import Any, Generator from tqdm import tqdm @@ -68,7 +68,7 @@ def __init__( # TODO: Take into account that data might be stored in lower level, # TODO: thus adapt the memory hierarchy. # TODO: The fact that there is a global buffer above the cores requires attention. - core_id = layer.core_allocation + core_id = layer.core_allocation[0] self.memory_hierarchy: MemoryHierarchy = accelerator.get_core(core_id).memory_hierarchy self.show_progress_bar = kwargs.get("loma_show_progress_bar", False) @@ -104,8 +104,8 @@ def run(self): if not yielded: # TODO this warning is unclear: an invalid spatial mapping is not necessarily its cause raise NoValidLoopOrderingFoundException( - f"""No valid loop ordering was found for layer {self.layer}. Please make sure the spatial mapping is - compatible with the architecture.""" + f"No valid loop ordering was found for layer {self.layer}. Please make sure the spatial mapping is " + f"compatible with the architecture." ) def get_temporal_loops(self) -> None: diff --git a/zigzag/opt/loma/MemoryAllocator.py b/zigzag/opt/loma/MemoryAllocator.py index 9a31650b..11fc02f4 100644 --- a/zigzag/opt/loma/MemoryAllocator.py +++ b/zigzag/opt/loma/MemoryAllocator.py @@ -88,7 +88,7 @@ def run(self): """ # self.nodes contains the different memory nodes in bottom-up fashion - core_id = self.layer.core_allocation + core_id = self.layer.core_allocation[0] memory_hierarchy: MemoryHierarchy = self.accelerator.get_core(core_id).memory_hierarchy top_levels = {mem_op: memory_hierarchy.get_operand_top_level(mem_op) for mem_op in self.mem_ops} nodes = memory_hierarchy.nodes diff --git a/zigzag/opt/salsa/SalsaEngine.py b/zigzag/opt/salsa/SalsaEngine.py index 56e2a347..a867f041 100644 --- a/zigzag/opt/salsa/SalsaEngine.py +++ b/zigzag/opt/salsa/SalsaEngine.py @@ -77,7 +77,6 @@ def __init__( self.accelerator = accelerator self.layer = layer self.spatial_mapping = spatial_mapping - # self.memory_hierarchy: MemoryHierarchy = self.accelerator.get_core(layer.core_allocation).memory_hierarchy # Algorithm related inputs self.iteration_number = kwargs.get("salsa_iteration_number", 1000) diff --git a/zigzag/opt/salsa/SalsaState.py b/zigzag/opt/salsa/SalsaState.py index c665118c..2f03cdc7 100644 --- a/zigzag/opt/salsa/SalsaState.py +++ b/zigzag/opt/salsa/SalsaState.py @@ -53,7 +53,7 @@ def __init__( self.accelerator = accelerator self.layer = layer self.spatial_mapping = spatial_mapping - self.memory_hierarchy: MemoryHierarchy = self.accelerator.get_core(layer.core_allocation).memory_hierarchy + self.memory_hierarchy: MemoryHierarchy = self.accelerator.get_core(layer.core_allocation[0]).memory_hierarchy self.opt_criterion_name = opt_criterion_name allocator = MemoryAllocator(self.accelerator, self.layer, self.spatial_mapping, ordering) diff --git a/zigzag/parser/AcceleratorValidator.py b/zigzag/parser/AcceleratorValidator.py new file mode 100644 index 00000000..cc8b456e --- /dev/null +++ b/zigzag/parser/AcceleratorValidator.py @@ -0,0 +1,200 @@ +import logging +from typing import Any +from cerberus import Validator + + +logger = logging.getLogger(__name__) + + +class AcceleratorValidator: + OPERAND_REGEX = r"^I[12]$|^O$" + DIMENSION_REGEX = r"^D\d$" + PORT_REGEX = r"^[r]?[w]?_port_\d+$" + + # Intermediate output operand. Hard coded, and must be specified by the user as such + OUTPUT_OPERAND_STR = "O" + # Final output operand after scaling. Hard coded, and must be specified by the user as such + FINAL_OUTPUT_OPERAND_STR = "O_final" + MEM_OP_1_STR = "I1" + MEM_OP_2_STR = "I2" + + SCHEMA = { + "name": {"type": "string", "required": True}, + "memories": { + "type": "dict", + "required": True, + "valuesrules": { + "type": "dict", + "schema": { + "size": {"type": "integer", "required": True}, + "r_bw": {"type": "integer", "required": True}, + "w_bw": {"type": "integer", "required": True}, + "r_cost": {"type": "float", "required": True}, + "w_cost": {"type": "float", "required": True}, + "area": {"type": "float", "required": True}, + "r_port": {"type": "integer", "required": True}, + "w_port": {"type": "integer", "required": True}, + "rw_port": {"type": "integer", "required": True}, + "latency": {"type": "integer", "required": True}, + "min_r_granularity": {"type": "integer", "required": False, "nullable": True, "default": None}, + "min_w_granularity": {"type": "integer", "required": False, "nullable": True, "default": None}, + "mem_type": {"type": "string", "required": False, "default": "sram"}, + "auto_cost_extraction": {"type": "boolean", "default": False}, + "operands": { + "type": "list", + "required": True, + "schema": {"type": "string", "regex": OPERAND_REGEX}, + }, + "ports": { + "type": "list", + "required": True, + "schema": { + "type": "dict", + "schema": { + "fh": {"type": "string", "required": False, "regex": PORT_REGEX}, + "tl": {"type": "string", "required": False, "regex": PORT_REGEX}, + "fl": {"type": "string", "required": False, "regex": PORT_REGEX}, + "th": {"type": "string", "required": False, "regex": PORT_REGEX}, + }, + }, + }, + "served_dimensions": { + "type": "list", + "required": True, + "schema": {"type": "string", "regex": DIMENSION_REGEX}, + }, + }, + }, + }, + "multipliers": { + "type": "dict", + "required": True, + "schema": { + "input_precision": { + "type": "list", + "required": True, + "schema": {"type": "integer"}, + "minlength": 2, + "maxlength": 2, + }, + "multiplier_energy": {"type": "float", "required": True}, + "multiplier_area": {"type": "float", "required": True}, + "dimensions": { + "type": "list", + "required": True, + "schema": {"type": "string", "regex": DIMENSION_REGEX}, + }, + "sizes": {"type": "list", "required": True, "schema": {"type": "integer", "min": 0}}, + }, + }, + "dataflows": { + "type": "dict", + "schema": { + "D1": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D2": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D3": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D4": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + }, + "required": False, + "nullable": False, + }, + } + + def __init__(self, data: Any): + """Initialize Validator object, assign schema and store normalize user-given data""" + self.validator = Validator() + self.validator.schema = AcceleratorValidator.SCHEMA + self.data: dict[str, Any] = self.validator.normalized(data) + self.is_valid = True + + def invalidate(self, extra_msg: str): + self.is_valid = False + logger.critical("User-defined accelerator is invalid. %s", extra_msg) + + def validate(self) -> bool: + """! Validate the user-provided accelerator data. Log a critical warning when invalid data is encountered and + return true iff valid. + """ + # Validate according to schema + validate_success = self.validator.validate(self.data) + errors = self.validator.errors + if not validate_success: + self.invalidate(f"The following restrictions apply: {errors}") + + # Extra validation rules outside of schema + + # Dimension sizes are consistent + oa_dims: list[str] = self.data["multipliers"]["dimensions"] + if len(oa_dims) != len(self.data["multipliers"]["sizes"]): + self.invalidate("Multiplier dimensions and sizes do not match.") + + for mem_name in self.data["memories"]: + self.validate_single_memory(mem_name, oa_dims) + + return self.is_valid + + def validate_single_memory(self, mem_name: str, expected_oa_dims: list[str]) -> None: + mem_data: dict[str, Any] = self.data["memories"][mem_name] + + # Number of port allocations is consistent with memory operands + nb_operands = len(mem_data["operands"]) + nb_ports = len(mem_data["ports"]) + if nb_ports != nb_operands: + self.invalidate( + f"Number of memory ports ({nb_ports}) does not equal number of operands ({nb_operands}) for {mem_name}" + ) + + # No unexpected served dimensions + for served_dimension in mem_data["served_dimensions"]: + if served_dimension not in expected_oa_dims: + self.invalidate(f"Invalid served dimension {served_dimension} in memory {mem_name}") + + # Number of allocated ports per type equals given number of ports + port_data: list[dict[str, str]] = mem_data["ports"] + r_ports: set[str] = set() + w_ports: set[str] = set() + rw_ports: set[str] = set() + for port_dict in port_data: + for port_name in port_dict.values(): + match port_name[0:2]: + case "r_": + r_ports.add(port_name) + case "w_": + w_ports.add(port_name) + case "rw": + rw_ports.add(port_name) + case _: + raise ValueError("Invalid port name") + if len(r_ports) != mem_data["r_port"]: + self.invalidate( + f"Number of given read ports ({mem_data['r_port']}) does not equal number of allocated read ports " + f"({len(r_ports)}) for {mem_name}" + ) + if len(w_ports) != mem_data["w_port"]: + self.invalidate( + f"Number of given write ports ({mem_data['w_port']}) does not equal number of allocated write ports " + f"({len(w_ports)}) for {mem_name}" + ) + if len(rw_ports) != mem_data["rw_port"]: + self.invalidate( + f"Number of given read/write ports ({mem_data['rw_port']}) does not equal number of allocated " + f"read/write ports ({len(rw_ports)}) for {mem_name}" + ) + + # Direction of ports is valid + for port_dict in port_data: + for direction, port_name in port_dict.items(): + if (direction == "fh" or direction == "fl") and (port_name.startswith("r_")): + self.invalidate(f"Read port given for write direction in {mem_name}") + if (direction == "th" or direction == "tl") and (port_name.startswith("w_")): + self.invalidate(f"Write port given for read direction in {mem_name}") + + # # Contains output operand - This is not required + # if AcceleratorValidator.OUTPUT_OPERAND_STR not in mem_data["operands"]: + # self.invalidate(f"{mem_name} does not contain output operand `{AcceleratorValidator.OUTPUT_OPERAND_STR}`") + + @property + def normalized_data(self) -> dict[str, Any]: + """Returns the user-provided data after normalization by the validator. (Normalization happens during + initialization)""" + return self.data diff --git a/zigzag/parser/MappingValidator.py b/zigzag/parser/MappingValidator.py new file mode 100644 index 00000000..ced1da99 --- /dev/null +++ b/zigzag/parser/MappingValidator.py @@ -0,0 +1,89 @@ +import logging +from typing import Any + +from zigzag.parser.UpgradedValidator import UpgradedValidator +from zigzag.parser.WorkloadValidator import WorkloadValidator + + +logger = logging.getLogger(__name__) + + +class MappingValidator: + + # Schema for a single operation, UpgradeValidator extrapolates to list of operations + SCHEMA_SINGLE = { + "name": {"type": "string", "required": True, "allowed": WorkloadValidator.ALLOWED_OPERATORS + ["default"]}, + "core_allocation": {"type": "list", "schema": {"type": "integer"}, "default": [0]}, + "spatial_mapping": { + "type": "dict", + "schema": { + "D1": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D2": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D3": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + "D4": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+, [0-9]+$"}, "required": False}, + }, + "required": False, + "nullable": True, + }, + "memory_operand_links": { + "type": "dict", + "schema": { + "O": {"type": "string", "required": True}, + "W": {"type": "string", "required": True}, + "I": {"type": "string", "required": True}, + }, + "default": {"O": "O", "I": "I1", "W": "I2"}, + }, + "spatial_mapping_hint": { + "type": "dict", + "schema": { + "D1": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+$"}, "required": False}, + "D2": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+$"}, "required": False}, + "D3": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+$"}, "required": False}, + "D4": {"type": "list", "schema": {"type": "string", "regex": r"^[A-Z]+$"}, "required": False}, + }, + "required": False, + }, + } + + def __init__(self, data: Any): + """Initialize Validator object, assign schema and store normalize user-given data""" + self.validator = UpgradedValidator(is_array=True) + self.schema = MappingValidator.SCHEMA_SINGLE + self.data: list[dict[str, Any]] = self.validator.normalize_list(data, schema=self.schema) + self.is_valid = True + + @property + def normalized_data(self): + """! Return normalized, user-provided data.""" + # Can only be called after __init__, where data is automatically normalized + return self.data + + def invalidate(self, extra_msg: str): + self.is_valid = False + logger.critical("User-defined mapping is invalid. %s", extra_msg) + + def validate(self) -> bool: + """! Validate the user-provided accelerator data. Log a critical warning when invalid data is encountered and + return true iff valid. + """ + # Validate according to schema + validate_success = self.validator.validate(self.data, schema=self.schema) + errors = self.validator.errors + if not validate_success: + self.invalidate(f"The following restrictions apply: {errors}") + + # Extra checks + if "default" not in map(lambda x: x["name"], self.data): + self.invalidate("No default mapping defined.") + + for mapping_data in self.data: + self.validate_single_mapping(mapping_data) + + return self.is_valid + + def validate_single_mapping(self, layer_data: dict[str, Any]) -> None: + """ + # TODO check that there are no OADimensions that are not defined in the architecture + """ + pass diff --git a/zigzag/parser/UpgradedValidator.py b/zigzag/parser/UpgradedValidator.py new file mode 100644 index 00000000..9a07b1bd --- /dev/null +++ b/zigzag/parser/UpgradedValidator.py @@ -0,0 +1,97 @@ +""" +Copyright jdotjdot (https://github.com/pyeve/cerberus/issues/220#issuecomment-205047415) +""" + +import copy +from typing import Any +from cerberus import Validator + +# using Cerberus 0.9.2 +import six + + +class UpgradedValidator(Validator): + """ + Subclass of Cerberus's Validator that adds some custom types and allows for the document to be a top-level array by + setting is_array=True + """ + + def __init__(self, *args, **kwargs): + self.is_array: bool = kwargs.get("is_array", False) + super(UpgradedValidator, self).__init__(*args, **kwargs) + + def validate( + self, + document: list[dict[str, Any]], + schema: dict[str, Any] | None = None, + update: bool = False, + context: Any | None = None, + ) -> bool: + + # This gets confusing because this method seems to be called internally for validation as well + # and we don't want to add "rows" to sub-schemas as well, only the + # top-level. + + if self.is_array and not context: # checking for "context" seems to help with not adding 'rows' to every dict + schema = schema or self.schema + + if "rows" not in schema: + if "type" in schema: # is a list + schema = {"rows": {"type": "list", "required": True, "schema": schema}} + else: # is a dict + schema = {"rows": {"type": "list", "required": True, "schema": {"type": "dict", "schema": schema}}} + + if "rows" not in document: + document_dict = {"rows": document} + else: + document_dict = document + return super(UpgradedValidator, self).validate(document_dict, schema, update, context) + + @property + def errors(self) -> dict[str, Any]: + errors = super(UpgradedValidator, self).errors + if self.is_array and "rows" in errors: + return errors["rows"] + else: + return errors + + _type_defaults = { + "integer": 0, + "list": [], + "dict": {}, + "string": "", + } + + def get_type_default(self, type_): + return self._type_defaults.get(type_) + + def get_default(self, field_schema): + if "default" in field_schema: + return field_schema.get("default") + + if field_schema.get("nullable", False): + return None + + return self.get_type_default(field_schema["type"]) + + def add_defaults_to_doc(self, document: dict[str, Any], doc_schema: dict[str, Any]) -> dict[str, Any]: + new_doc: dict[str, Any] = copy.deepcopy(document) + for field, field_schema in doc_schema.items(): + if field not in document: + new_doc[six.u(field)] = self.get_default(field_schema) + + return new_doc + + def normalize_list( + self, document: list[dict[str, Any]], schema: dict[str, Any] | None = None + ) -> list[dict[str, Any]]: + # Needed to write this because the .normalized() method doesn't come out until Cerberus 0.10 + # which has not yet been released + + # This is a bit lazy and assumes a list of dicts, since that's what + # this whole subclass was written for + + schema = schema or self.schema + schema = schema["rows"]["schema"] if "rows" in schema else schema + assert isinstance(document, (list, tuple, set)) + return [self.add_defaults_to_doc(doc, schema) for doc in document] diff --git a/zigzag/parser/WorkloadValidator.py b/zigzag/parser/WorkloadValidator.py new file mode 100644 index 00000000..4e8e758a --- /dev/null +++ b/zigzag/parser/WorkloadValidator.py @@ -0,0 +1,143 @@ +import logging +from typing import Any + +# from cerberus import Validator + +from zigzag.parser.UpgradedValidator import UpgradedValidator + + +logger = logging.getLogger(__name__) + + +class WorkloadValidator: + EQUATION_REGEX = r"^O(\[\w+\])+\+?=\w(\[\w+\])+[*+]\w(\[\w+\])+$" + LAYER_DIM_RELATION_REGEX = r"^(\w+)\s*=\s*(?:(\w+)\s*\*\s*)?(\w+)\s*\+\s*(?:(\w+)\s*\*\s*)?(\w+)$" + # TODO add more operators from ONNX + ALLOWED_OPERATORS: list[str] = [ + "Conv", + "Pooling", + "Add", + "Conv_downsample", + "Gemm", + "Pool", + "MaxPool", + "AveragePool", + "GlobalAveragePool", + # Used for testing + "layer_on_core0", + "layer_on_core1", + "layer_on_core2", + "layer_on_core3", + ] + + # Schema for a single layer, UpgradeValidator extrapolates to list of layers + LAYER_SCHEMA: dict[str, Any] = { + "id": {"type": "integer", "required": True}, + "operator_type": { + "type": "string", + "allowed": ALLOWED_OPERATORS, + "required": True, + }, + "equation": {"type": "string", "required": True, "regex": EQUATION_REGEX}, + "dimension_relations": { + "type": "list", + "schema": {"type": "string", "regex": LAYER_DIM_RELATION_REGEX}, + "required": False, + }, + "loop_dims": {"type": "list", "schema": {"type": "string"}, "required": True}, + "loop_sizes": {"type": "list", "schema": {"type": "integer"}, "required": True}, + "operand_precision": { + "type": "dict", + "required": True, + "schema": { + "I": {"type": "integer", "required": False}, + "W": {"type": "integer", "required": False}, + "O": {"type": "integer", "required": True}, + "O_final": {"type": "integer", "required": True}, + }, + }, + "operand_source": { + "type": "dict", + "required": False, + "schema": { + "W": {"type": "integer", "required": False}, + "I": {"type": "integer", "required": False}, + }, + }, + "pr_loop_dims": { + "type": "list", + "schema": {"type": "string"}, + "required": False, + "nullable": True, + "default": None, + }, + "pr_loop_sizes": { + "type": "list", + "schema": {"type": "integer"}, + "required": False, + "nullable": True, + "default": None, + }, + "padding": { + "type": "list", + "schema": { + "type": "list", + "schema": {"type": "integer"}, + "default": [0, 0], + "minlength": 2, + "maxlength": 2, + }, + "required": False, + "nullable": True, + "default": None, + }, + } + + def __init__(self, data: Any): + """Initialize Validator object, assign schema and store normalized user-given data""" + self.validator = UpgradedValidator(is_array=True) + self.schema = WorkloadValidator.LAYER_SCHEMA + self.data: list[dict[str, Any]] = self.validator.normalize_list(data, schema=self.schema) + + self.is_valid = True + + @property + def normalized_data(self): + """! Return normalized, user-provided data.""" + # Can only be called after __init__, where data is automatically normalized + return self.data + + def __invalidate(self, extra_msg: str): + self.is_valid = False + logger.critical("User-defined workload is invalid. %s", extra_msg) + + def validate(self) -> bool: + """! Validate the user-provided accelerator data. Log a critical warning when invalid data is encountered and + return true iff valid. + """ + # Validate according to schema + validate_success = self.validator.validate(self.data, schema=self.schema) + errors = self.validator.errors + if not validate_success: + self.__invalidate(f"The following restrictions apply: {errors}") + + for layer_data in self.data: + self.__validate_single_layer(layer_data) + + return self.is_valid + + def __validate_single_layer(self, layer_data: dict[str, Any]) -> None: + """Run extra checks on a single layer""" + + # Check PR loop dims + if "padding" in layer_data and layer_data["padding"] is not None: + if "pr_loop_dims" not in layer_data: + self.__invalidate("Padding defined, but no corresponding PR loop dimensions") + elif len(layer_data["padding"]) != len(layer_data["pr_loop_dims"]): + self.__invalidate("Number of PR loop dimensions not equal to number of corresponding paddings") + + if "pr_loop_sizes" in layer_data and layer_data["pr_loop_sizes"] is not None: + if "pr_loop_dims" not in layer_data: + self.__invalidate("PR loop sizes defined, but no corresponding PR loop dimensions") + elif len(layer_data["pr_loop_sizes"]) != len(layer_data["pr_loop_dims"]): + self.__invalidate("Number of PR loop dimensions not equal to number of corresponding sizes") diff --git a/zigzag/parser/accelerator_factory.py b/zigzag/parser/accelerator_factory.py new file mode 100644 index 00000000..35034f0f --- /dev/null +++ b/zigzag/parser/accelerator_factory.py @@ -0,0 +1,183 @@ +from typing import Any +from zigzag.datatypes import Constants, LayerDim, MemoryOperand, OADimension, UnrollFactor +from zigzag.hardware.architecture.Accelerator import Accelerator +from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy +from zigzag.hardware.architecture.MemoryInstance import MemoryInstance +from zigzag.hardware.architecture.memory_level import ServedMemDimensions +from zigzag.hardware.architecture.memory_port import DataDirection, PortAllocation +from zigzag.hardware.architecture.operational_array import MultiplierArray, OperationalArray +from zigzag.hardware.architecture.operational_unit import Multiplier +from zigzag.mapping.spatial_mapping import MappingSingleOADim, SpatialMapping + + +class AcceleratorFactory: + """! Converts valid user-provided accelerator data into an `Accelerator` instance""" + + def __init__(self, data: dict[str, Any]): + """! Generate an `Accelerator` instance from the validated user-provided data.""" + self.data = data + + def create(self) -> Accelerator: + """! Create an Accelerator instance from the user-provided data. + NOTE the memory instances must be defined from lowest to highest. + """ + core_factory = CoreFactory(self.data) + core = core_factory.create() + return Accelerator(name=self.data["name"], core_set={core}) + + +class CoreFactory: + """! Converts valid user-provided accelerator data into a `Core` instance""" + + def __init__(self, data: dict[str, Any]): + """! Generate an `Core` instance from the validated user-provided data.""" + self.data = data + + def create(self, core_id: int = 1) -> Core: + """! Create an Core instance from the user-provided data. + NOTE the memory instances must be defined from lowest to highest. + """ + operational_array = self.create_operational_array() + mem_graph = MemoryHierarchy(operational_array) + dataflows = self.create_dataflows() + + for mem_name in self.data["memories"]: + memory_factory = MemoryFactory(mem_name, self.data["memories"][mem_name]) + memory_factory.add_memory_to_graph(mem_graph) + + return Core( + core_id=core_id, operational_array=operational_array, memory_hierarchy=mem_graph, dataflows=dataflows + ) + + def create_operational_array(self) -> OperationalArray: + mul_data: dict[str, Any] = self.data["multipliers"] + multiplier = Multiplier( + input_precision=mul_data["input_precision"], + energy_cost=mul_data["multiplier_energy"], + area=mul_data["multiplier_area"], + ) + + oa_dims: list[str] = mul_data["dimensions"] + dimension_sizes: dict[OADimension, int] = { + OADimension(oa_dim): mul_data["sizes"][i] for i, oa_dim in enumerate(oa_dims) + } + multiplier_array = MultiplierArray(multiplier, dimension_sizes) + return multiplier_array + + def create_dataflows(self) -> SpatialMapping | None: + if "dataflows" not in self.data: + return None + if self.data["dataflows"] is None: + return None + + user_data: dict[str, list[str]] = self.data["dataflows"] + spatial_mapping_dict: dict[OADimension, MappingSingleOADim] = {} + + for oa_dim_str, unrolling_list in user_data.items(): + oa_dim = OADimension(oa_dim_str) + mapping_this_oa_dim = self.__create_dataflow_single_oa_dim(unrolling_list) + spatial_mapping_dict[oa_dim] = mapping_this_oa_dim + + return SpatialMapping(spatial_mapping_dict) + + def __create_dataflow_single_oa_dim(self, mapping_data: list[str]) -> MappingSingleOADim: + mapping_dict: dict[LayerDim, UnrollFactor] = {} + + for single_unrolling in mapping_data: + layer_dim_str = single_unrolling.split(",")[0] + unrolling = int(single_unrolling.split(",")[-1]) + layer_dim = LayerDim(layer_dim_str) + mapping_dict[layer_dim] = unrolling + + return MappingSingleOADim(mapping_dict) + + +class MemoryFactory: + """! Create MemoryInstances and adds them to memory hierarchy.""" + + def __init__(self, name: str, mem_data: dict[str, Any]): + self.data = mem_data + self.name = name + + def create_memory_instance(self) -> MemoryInstance: + return MemoryInstance( + name=self.name, + size=self.data["size"], + r_bw=self.data["r_bw"], + w_bw=self.data["w_bw"], + r_cost=self.data["r_cost"], + w_cost=self.data["w_cost"], + area=self.data["area"], + r_port=self.data["r_port"], + w_port=self.data["w_port"], + rw_port=self.data["rw_port"], + latency=self.data["latency"], + min_r_granularity=self.data["min_r_granularity"], + min_w_granularity=self.data["min_w_granularity"], + ) + + def add_memory_to_graph(self, mem_graph: MemoryHierarchy) -> None: + """Create a new MemoryInstance and add it to the given MemoryHierarchy""" + instance = self.create_memory_instance() + + operands: list[MemoryOperand] = [MemoryOperand(x) for x in self.data["operands"]] + port_allocation = self.create_port_allocation() + served_dimensions = self.create_served_mem_dimensions() + + mem_graph.add_memory( + memory_instance=instance, + operands=operands, + port_alloc=port_allocation, + served_dimensions=served_dimensions, + ) + + def create_served_mem_dimensions(self) -> ServedMemDimensions: + data = {OADimension(oa_dim_str) for oa_dim_str in self.data["served_dimensions"]} + return ServedMemDimensions(data) + + def create_port_allocation(self) -> PortAllocation: + """The order of the port allocations matches the order of the MemoryOperands. + # TODO support empty allocation -> return default configuration + """ + port_data: list[dict[str, str]] = self.data["ports"] + + data: dict[MemoryOperand, dict[DataDirection, str]] = { + MemoryOperand(mem_op_str): { + self.translate_to_data_direction(direction): port_name + for direction, port_name in port_data[idx].items() + } + for idx, mem_op_str in enumerate(self.data["operands"]) + } + return PortAllocation(data) + + def create_default_port_allocation(self) -> PortAllocation: + data: dict[MemoryOperand, dict[DataDirection, str]] = dict() + for mem_op_str in self.data["operands"]: + mem_op = MemoryOperand(mem_op_str) + if mem_op == Constants.OUTPUT_MEM_OP: + data[mem_op] = { + DataDirection.WR_IN_BY_HIGH: "w_port_1", + DataDirection.WR_IN_BY_LOW: "w_port_1", + DataDirection.RD_OUT_TO_HIGH: "r_port_1", + DataDirection.RD_OUT_TO_LOW: "r_port_1", + } + else: + data[mem_op] = { + DataDirection.WR_IN_BY_HIGH: "w_port_1", + DataDirection.RD_OUT_TO_LOW: "r_port_1", + } + return PortAllocation(data) + + def translate_to_data_direction(self, x: str) -> DataDirection: + match x: + case "fh": + return DataDirection.WR_IN_BY_HIGH + case "fl": + return DataDirection.WR_IN_BY_LOW + case "th": + return DataDirection.RD_OUT_TO_HIGH + case "tl": + return DataDirection.RD_OUT_TO_LOW + case _: + raise ValueError(f"Data direction must be either `fh`, `th`, `fl`, or `tl`. Not {x}") diff --git a/zigzag/parser/onnx/ConvParser.py b/zigzag/parser/onnx/ConvParser.py new file mode 100644 index 00000000..4510c4a0 --- /dev/null +++ b/zigzag/parser/onnx/ConvParser.py @@ -0,0 +1,167 @@ +from math import ceil +from typing import Any + +from onnx import ModelProto, NodeProto + + +from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser +from zigzag.parser.onnx.utils import ( + get_attribute_ints_with_name, + get_node_input_output_dimension_shapes, +) +from zigzag.parser.workload_factory import LayerNodeFactory +from zigzag.workload.layer_node import LayerNode + + +class ConvParser(ONNXOperatorParser): + """! Parser for ONNX Conv and QLinearConv nodes into LayerNode.""" + + def __init__( + self, + node_id: int, + node: NodeProto, + nodes_outputs: dict[int, Any], + mapping_data: list[dict[str, Any]], + onnx_model: ModelProto, + ) -> None: + super().__init__(node_id, node, nodes_outputs, onnx_model) + self.mapping_data = mapping_data + self.onnx_model = onnx_model + + def run(self) -> LayerNode: + """! Run the parser and return the created LayerNode object""" + return self.generate_layer_node_for_conv() + + # def get_weight_name(self, node: NodeProto): + # """! Return the name of the weight input of this node depending on its operator type + # @param node (NodeProto): The node + # """ + # op_type = node.op_type # 'Conv', 'QLinearConv', ... + # if op_type == "Conv": + # return node.input[1] + # elif op_type == "QLinearConv": + # return node.input[3] + # else: + # raise NotImplementedError(f"Retrieving weight name for onnx node of type {op_type} is not supported.") + + # def get_input_output_weight_data_type(self): + # """! Return the data type of the input, output and weight tensors of this node.""" + # input_name = self.node.input[0] + # output_name = self.node.output[0] + # weight_name = self.get_weight_name(self.node) + + # input_elem_type = get_onnx_tensor_type(input_name, self.onnx_model).elem_type + # output_elem_type = get_onnx_tensor_type(output_name, self.onnx_model).elem_type + # weight_elem_type = get_onnx_tensor_type(weight_name, self.onnx_model).elem_type + + # return input_elem_type, output_elem_type, weight_elem_type + + def get_layer_node_input_format( + self, + kernel_shape: list[int], + strides: list[int], + dilations: list[int], + group_size: int, + padding: list[int], + ia_shape: list[int], + oa_shape: list[int], + prev_node_id: int | None = None, + ) -> dict[str, Any]: + """! Generate the necessary dictionary items required for the LayerNode creation. If there is no data for a + given Layer Attribute, the Layer Attribute is not included in the returned dict. + """ + + data: dict[str, Any] = {} + data["id"] = self.node_id + data["name"] = f"Layer{self.node_id}" + data["operator_type"] = self.node.op_type + # IMPORTANT: If any of the input loops require padding, they should be defined as the rightmost dimensions + # in the equation. This is because we construct the dimensionality order and then add the padding to those last + # dimensions in the order + data["equation"] = "O[b][g][k][oy][ox]+=W[g][k][c][fy][fx]*I[b][g][c][iy][ix]" + + # Get dimension sizes from input parameters + assert ( + ia_shape[0] == oa_shape[0] + ), "Batch size is different for input and output activations." + batch_size = oa_shape[0] if oa_shape[0] > 0 else 1 + size_k = ceil(oa_shape[1] / group_size) + size_ox = oa_shape[3] + size_oy = oa_shape[2] + size_c = ceil(ia_shape[1] / group_size) + size_ix = ia_shape[3] + size_iy = ia_shape[2] + size_fx = kernel_shape[0] + size_fy = kernel_shape[1] + data["loop_dims"] = ["B", "K", "G", "OX", "OY", "C", "FX", "FY"] + data["loop_sizes"] = [ + batch_size, + size_k, + group_size, + size_ox, + size_oy, + size_c, + size_fx, + size_fy, + ] + data["dimension_relations"] = [ + f"ix={strides[0]}*ox+{dilations[0]}*fx", + f"iy={strides[1]}*oy+{dilations[1]}*fy", + ] + data["operand_precision"] = {"O": 16, "O_final": 8, "W": 8, "I": 8} + # Constant operand + data["operand_source"] = {"W": self.node_id} + if prev_node_id is not None: + data["operand_source"]["I"] = prev_node_id + + # Add padding information + data["pr_loop_dims"] = ["IX", "IY"] + data["pr_loop_sizes"] = [size_ix, size_iy] + data["padding"] = [ + [padding[0], padding[2]], + [padding[1], padding[3]], + ] + + return data + + def generate_layer_node_for_conv(self): + + attrs = self.node.attribute + kernel_shape: list[int] = get_attribute_ints_with_name("kernel_shape", attrs, default=None) # type: ignore + strides: list[int] = get_attribute_ints_with_name("strides", attrs, default=[1, 1]) # type: ignore + dilations: list[int] = get_attribute_ints_with_name("dilations", attrs, default=[1, 1]) # type: ignore + group_size: int = get_attribute_ints_with_name("group", attrs, default=1) # type: ignore + padding: list[int] = get_attribute_ints_with_name("pads", attrs, default=[0, 0, 0, 0]) # type: ignore + + # Get the input and output activation shapes + ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes( + self.node, self.onnx_model + ) + + # Get the input and output activation and weight data type (precision) # TODO this is not used + # ia_data_type, oa_data_type, w_data_type = self.get_input_output_weight_data_type() + + # Compute node input source + predecessors: list[int] = [] + for node_input in self.node.input: + for n in self.nodes_outputs: + if node_input in self.nodes_outputs[n]: + predecessors.append(n) + assert len(predecessors) <= 1, "Only a single layer operand source expected" + prev_node_id = None if len(predecessors) == 0 else predecessors.pop() + + # Create LayerNode + layer_data = self.get_layer_node_input_format( + kernel_shape, + strides, + dilations, + group_size, + padding, + ia_dimension_shape, + oa_dimension_shape, + prev_node_id, + ) + factory = LayerNodeFactory(layer_data, self.mapping_data) + layer_node = factory.create() + + return layer_node diff --git a/zigzag/io/onnx/DefaultNodeParser.py b/zigzag/parser/onnx/DefaultNodeParser.py similarity index 52% rename from zigzag/io/onnx/DefaultNodeParser.py rename to zigzag/parser/onnx/DefaultNodeParser.py index 466ee8e4..f45cbb3a 100644 --- a/zigzag/io/onnx/DefaultNodeParser.py +++ b/zigzag/parser/onnx/DefaultNodeParser.py @@ -1,32 +1,28 @@ -from typing import Any - -from onnx import NodeProto -from zigzag.io.onnx.Parser import Parser +from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.workload.DummyNode import DummyNode -class DefaultNodeParser(Parser): +class DefaultNodeParser(ONNXOperatorParser): """! This class parses an ONNX node into a DummyNode.""" - def __init__(self, node_id: int, node: NodeProto, nodes_outputs: dict[int, Any]) -> None: - - super().__init__(node_id, node, nodes_outputs, mapping=None, onnx_model=None) - def run(self) -> DummyNode: """! Run the parser""" - dummy_node = self.generate_dummy_node() - return dummy_node + return self.generate_dummy_node() def generate_dummy_node(self) -> DummyNode: - preds: list[int] = [] + predecessors: list[int] = [] for node_input in self.node.input: for n in self.nodes_outputs: if node_input in self.nodes_outputs[n]: - preds.append(n) + predecessors.append(n) + + # TODO DummyLayer cannot deal with two operand sources + # assert len(predecessors) <= 1, "Only a single layer operand source expected" + prev_node_id = None if len(predecessors) == 0 else predecessors.pop() node_obj = DummyNode( self.node_id, - preds, + prev_node_id, node_name=self.node.name, type=self.node.op_type.lower(), ) diff --git a/zigzag/parser/onnx/GemmParser.py b/zigzag/parser/onnx/GemmParser.py new file mode 100644 index 00000000..826eecc9 --- /dev/null +++ b/zigzag/parser/onnx/GemmParser.py @@ -0,0 +1,117 @@ +from typing import Any + +from onnx import ModelProto, NodeProto +from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser +from zigzag.parser.onnx.utils import ( + get_node_input_output_dimension_shapes, + get_attribute_ints_with_name, +) +from zigzag.parser.workload_factory import LayerNodeFactory +from zigzag.workload.layer_node import LayerNode + + +class GemmParser(ONNXOperatorParser): + """! Parses an ONNX Gemm operator into a LayerNode""" + + def __init__( + self, + node_id: int, + node: NodeProto, + nodes_outputs: dict[int, Any], + mapping_data: list[dict[str, Any]], + onnx_model: ModelProto, + ) -> None: + super().__init__(node_id, node, nodes_outputs, onnx_model) + self.mapping_data = mapping_data + self.onnx_model = onnx_model + + def run(self) -> LayerNode: + """! Run the parser""" + return self.generate_layer_node_for_gemm() + + def get_layer_node_input_format( + self, + batch_size: int, + size_in: int, + size_out: int, + prev_node_id: int | None = None, + ) -> dict[str, Any]: + """! Generate the necessary dictionary items required for the Node creation.""" + + data: dict[str, Any] = {} + data["id"] = self.node_id + data["name"] = f"Layer{self.node_id}" + data["operator_type"] = self.node.op_type + data["equation"] = "O[b][k]+=W[k][c]*I[b][c]" + data["loop_dims"] = ["B", "C", "K"] + data["loop_sizes"] = [batch_size, size_in, size_out] + + data["dimension_relations"] = [] + data["operand_precision"] = {"O": 16, "O_final": 8, "W": 8, "I": 8} + # Constant operand + data["operand_source"] = {"W": self.node_id} + if prev_node_id is not None: + data["operand_source"]["I"] = prev_node_id + + return data + + def generate_layer_node_for_gemm(self): + ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) + + # The Gemm node includes flags for transpose of both of its inputs. + # If the first input is transposed, we need to transpose its shape here. + transA = get_attribute_ints_with_name("transA", self.node.attribute, default=0) + if transA: + assert len(ia_dimension_shape) == 2 + ia_dimension_shape = (ia_dimension_shape[1], ia_dimension_shape[0]) + + # If the input activations are empty (which can happen if there is a shape operator in the path) + # we try to extract the weights from the model graph initializer to get the correct input activation size + # TODO having a shape operator in the ONNX graph should be dealt with at a higher level + if not ia_dimension_shape: + weight_name = self.node.input[1] + initializer_names = [i.name for i in self.onnx_model.graph.initializer] + weight_name_index = initializer_names.index(weight_name) + # Get the weight dimensions + weights = self.onnx_model.graph.initializer[weight_name_index] + weight_dims = list(weights.dims) + assert ( + len(weight_dims) == 2 + ), f"There are {len(weight_dims)} weight dimensions for Gemm node {self.node.name}" + # Check if the weights are transposed + transB = get_attribute_ints_with_name("transB", self.node.attribute, default=0) + if transB: + weight_dims = [weight_dims[1], weight_dims[0]] + assert ( + len(oa_dimension_shape) == 2 + ), "Can't infer ia_dimension_shape if oa_dimension_shape is also not known." + size_b = oa_dimension_shape[0] + size_c = weight_dims[0] + ia_dimension_shape = [size_b, size_c] + + # Unpack sizes + # First element is batch size, second is input/output channel + assert len(ia_dimension_shape) == len(oa_dimension_shape) == 2 + # Batch size should be the same for input and output + assert ia_dimension_shape[0] == oa_dimension_shape[0] + # If the batch size is 0, we discard it by setting it to 1 internally inside ZigZag + batch_size = ia_dimension_shape[0] + size_b = 1 if batch_size == 0 else batch_size + size_c = ia_dimension_shape[1] + size_k = oa_dimension_shape[1] + + # Compute node input source + predecessors: list[int] = [] + for node_input in self.node.input: + for n in self.nodes_outputs: + if node_input in self.nodes_outputs[n]: + predecessors.append(n) + assert len(predecessors) <= 1, "Only a single layer operand source expected" + prev_node_id = None if len(predecessors) == 0 else predecessors.pop() + + # Create LayerNode + layer_data = self.get_layer_node_input_format(size_b, size_c, size_k, prev_node_id) + factory = LayerNodeFactory(layer_data, self.mapping_data) + layer_node = factory.create() + + return layer_node diff --git a/zigzag/parser/onnx/MatMulParser.py b/zigzag/parser/onnx/MatMulParser.py new file mode 100644 index 00000000..cf83500d --- /dev/null +++ b/zigzag/parser/onnx/MatMulParser.py @@ -0,0 +1,94 @@ +from typing import Any +from onnx import ModelProto +from onnx import NodeProto + +from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser +from zigzag.parser.onnx.utils import get_node_input_output_dimension_shapes +from zigzag.parser.workload_factory import LayerNodeFactory +from zigzag.workload.layer_node import LayerNode + +import logging + +logger = logging.getLogger(__name__) + + +class MatMulParser(ONNXOperatorParser): + """! Parses an ONNX MatMul operator into a LayerNode. + # TODO this is identical to GemmParser + """ + + def __init__( + self, + node_id: int, + node: NodeProto, + nodes_outputs: dict[int, Any], + mapping_data: list[dict[str, Any]], + onnx_model: ModelProto, + ) -> None: + super().__init__(node_id, node, nodes_outputs, onnx_model) + self.mapping_data = mapping_data + self.onnx_model = onnx_model + + def run(self) -> LayerNode: + """Run the parser""" + layer_node = self.generate_layer_node_for_matmul() + return layer_node + + def get_layer_node_input_format( + self, + batch_size: int, + size_in: int, + size_out: int, + prev_node_id: int | None = None, + ) -> dict[str, Any]: + """! Generate the necessary dictionary items required for the Node creation. + # TODO this is identical to the one from `GemmParser` + """ + + data: dict[str, Any] = {} + data["id"] = self.node_id + data["name"] = f"Layer{self.node_id}" + data["operator_type"] = self.node.op_type + data["equation"] = "O[b][k]+=W[k][c]*I[b][c]" + data["loop_dims"] = ["B", "C", "K"] + data["loop_sizes"] = [batch_size, size_in, size_out] + + data["dimension_relations"] = [] + data["operand_precision"] = {"O": 16, "O_final": 8, "W": 8, "I": 8} + # Constant operand + data["operand_source"] = {"W": self.node_id} + if prev_node_id is not None: + data["operand_source"]["I"] = prev_node_id + + return data + + def generate_layer_node_for_matmul(self): + + ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) + + # TODO it should be able to deal with tensors + # First element is batch size, second is input/output channel + assert len(ia_dimension_shape) == len(oa_dimension_shape) == 2 + # Batch size should be the same for input and output + assert ia_dimension_shape[0] == oa_dimension_shape[0] + # If the batch size is 0, we discard it by setting it to 1 internally inside ZigZag + batch_size = ia_dimension_shape[0] + size_b = 1 if batch_size == 0 else batch_size + size_c = ia_dimension_shape[1] + size_k = oa_dimension_shape[1] + + # Compute node input source + predecessors: list[int] = [] + for node_input in self.node.input: + for n in self.nodes_outputs: + if node_input in self.nodes_outputs[n]: + predecessors.append(n) + assert len(predecessors) <= 1, "Only a single layer operand source expected" + prev_node_id = None if len(predecessors) == 0 else predecessors.pop() + + # Create LayerNode + layer_data = self.get_layer_node_input_format(size_b, size_c, size_k, prev_node_id) + factory = LayerNodeFactory(layer_data, self.mapping_data) + layer_node = factory.create() + + return layer_node diff --git a/zigzag/io/onnx/ONNXModelParser.py b/zigzag/parser/onnx/ONNXModelParser.py similarity index 51% rename from zigzag/io/onnx/ONNXModelParser.py rename to zigzag/parser/onnx/ONNXModelParser.py index 06c52fda..2aba4432 100644 --- a/zigzag/io/onnx/ONNXModelParser.py +++ b/zigzag/parser/onnx/ONNXModelParser.py @@ -1,71 +1,47 @@ from typing import Any from onnx import ModelProto -from zigzag.io.onnx.DefaultNodeParser import DefaultNodeParser -from zigzag.io.onnx.GemmParser import GemmParser -from zigzag.io.onnx.MatMulParser import MatMulParser -from zigzag.io.onnx.ConvParser import ConvParser -from zigzag.io.onnx.utils import ( - parse_mapping_from_path, +from zigzag.parser.onnx.DefaultNodeParser import DefaultNodeParser +from zigzag.parser.onnx.GemmParser import GemmParser +from zigzag.parser.onnx.MatMulParser import MatMulParser +from zigzag.parser.onnx.ConvParser import ConvParser +from zigzag.parser.onnx.utils import ( parse_onnx_model_from_path, parse_dynamic_onnx_model, ) +from zigzag.stages.WorkloadParserStage import WorkloadParserStage from zigzag.workload.ONNXWorkload import ONNXWorkload import logging + logger = logging.getLogger(__name__) class ONNXModelParser: - """! Parse the ONNX model into a workload.""" + """! Parses the ONNX model into a workload.""" - def __init__(self, onnx_model: str | ModelProto, mapping_path: str | dict[str, dict[str, Any]] | None) -> None: + def __init__(self, onnx_model: str | ModelProto, mapping_yaml_path: str) -> None: + assert isinstance(onnx_model, (str, ModelProto)), f"Given onnx_model is of type {type(onnx_model)}." + assert isinstance(mapping_yaml_path, str) and mapping_yaml_path.split(".")[-1] == "yaml" - # Sanity checks on given onnx_model if isinstance(onnx_model, str): - self.onnx_model_path = onnx_model - self.onnx_model = None - elif isinstance(onnx_model, ModelProto): - self.onnx_model_path = None - self.onnx_model = onnx_model - else: - raise TypeError(f"Given onnx_model is of type {type(onnx_model)}.") - - # Sanity checks on given mapping - if isinstance(mapping_path, str): - self.mapping_path = mapping_path - self.mapping = None - elif isinstance(mapping_path, dict): - self.mapping_path = None - self.mapping = mapping_path - elif mapping_path is None: - self.mapping_path = None - self.mapping = None + self.onnx_model: ModelProto = parse_onnx_model_from_path(onnx_model) else: - raise TypeError(f"Given mapping is of type {type(mapping_path)}.") + self.onnx_model = onnx_model self.workload = None + self.mapping_yaml_path = mapping_yaml_path + + def run(self) -> ONNXWorkload: + """! Iterate through the onnx model and generate the workload consisting of LayerNodes and DummyNodes""" - def run(self): - """! Run the parser - - parse the onnx_model_path into an onnx model - - parse the mapping_path into a mapping dict - - iterate through the onnx model and generate the workload consisting of LayerNodes and DummyNodes - """ - if self.onnx_model_path is not None: - self.onnx_model = parse_onnx_model_from_path(self.onnx_model_path) - # Check for dynamicity in the model in the form of if statements assert self.onnx_model is not None self.onnx_model = parse_dynamic_onnx_model(self.onnx_model) + self.mapping_data = WorkloadParserStage.parse_mapping_data(self.mapping_yaml_path) - if not self.mapping: - mapping = parse_mapping_from_path(self.mapping_path) - self.mapping = mapping - - workload = self.parse_workload_from_onnx_model_and_mapping() - self.workload = workload + return self.parse_workload_from_onnx_model_and_mapping() def parse_workload_from_onnx_model_and_mapping(self): """! Converts an onnx model into a workload object. @@ -74,7 +50,8 @@ def parse_workload_from_onnx_model_and_mapping(self): If the model isn't in the format with external data, it will be slow to manipulate it, so better to work with raw models with external data. The line below accomplishes this. - onnx.save_model(model, 'model_external.onnx', save_as_external_data=True, all_tensors_to_one_file=True, location='model_external_raw_data', size_threshold=1024, convert_attribute=False) + onnx.save_model(model, 'model_external.onnx', save_as_external_data=True, all_tensors_to_one_file=True, + location='model_external_raw_data', size_threshold=1024, convert_attribute=False) In the future, assume we will have a model saved with external data, then we have to execute the code below if the model isn't inferred yet @@ -98,28 +75,22 @@ def parse_workload_from_onnx_model_and_mapping(self): nodes_outputs[node_id] = node.output if node.op_type in ["QLinearConv", "Conv"]: - parser = ConvParser(node_id, node, nodes_outputs, self.mapping, self.onnx_model) + parser = ConvParser(node_id, node, nodes_outputs, self.mapping_data, self.onnx_model) elif node.op_type in ["MatMul"]: - parser = MatMulParser(node_id, node, nodes_outputs, self.mapping, self.onnx_model) + parser = MatMulParser(node_id, node, nodes_outputs, self.mapping_data, self.onnx_model) elif node.op_type in ["Gemm"]: - parser = GemmParser(node_id, node, nodes_outputs, self.mapping, self.onnx_model) - else: # it is not a convolutional node, so create a DummyNode - parser = DefaultNodeParser(node_id, node, nodes_outputs) + parser = GemmParser(node_id, node, nodes_outputs, self.mapping_data, self.onnx_model) + # it is not a convolutional node, so create a DummyNode + else: + parser = DefaultNodeParser(node_id, node, nodes_outputs, self.onnx_model) + node_obj = parser.run() # Add the node_obj to the ONNXWorkload workload.add(node_id, node_obj) logger.info( # pylint: disable=W1203 - f"Created ONNXWorkload graph with {workload.number_of_nodes()} nodes and {workload.number_of_edges()} edges." + f"Created ONNXWorkload graph with {workload.number_of_nodes()} nodes and " + f"{workload.number_of_edges()} edges." # type: ignore ) return workload - - def get_onnx_model(self): - return self.onnx_model - - def get_mapping(self): - return self.mapping - - def get_workload(self): - return self.workload diff --git a/zigzag/parser/onnx/ONNXOperatorParser.py b/zigzag/parser/onnx/ONNXOperatorParser.py new file mode 100644 index 00000000..27e52e29 --- /dev/null +++ b/zigzag/parser/onnx/ONNXOperatorParser.py @@ -0,0 +1,50 @@ +from abc import ABCMeta, abstractmethod +from typing import Any + +from onnx import ModelProto, NodeProto + +from zigzag.parser.onnx.utils import get_onnx_tensor_type +from zigzag.workload.LayerNodeABC import LayerNodeABC + + +class ONNXOperatorParser(metaclass=ABCMeta): + """! Abstract base class that represents a parser of an onnx operator. Example: Conv, MatMul, etc.""" + + def __init__( + self, + node_id: int, + node: NodeProto, + nodes_outputs: dict[int, Any], + onnx_model: ModelProto, + ) -> None: + self.node_id = node_id + self.node = node + self.nodes_outputs = nodes_outputs + self.onnx_model = onnx_model + + @abstractmethod + def run(self) -> LayerNodeABC: ... + + def get_input_output_weight_data_type(self): + """! Return the data type of the input, output and weight tensors of this node.""" + input_name = self.node.input[0] + output_name = self.node.output[0] + weight_name = self.get_weight_name(self.node) + + input_elem_type = get_onnx_tensor_type(input_name, self.onnx_model).elem_type + output_elem_type = get_onnx_tensor_type(output_name, self.onnx_model).elem_type + weight_elem_type = get_onnx_tensor_type(weight_name, self.onnx_model).elem_type + + return input_elem_type, output_elem_type, weight_elem_type + + def get_weight_name(self, node: NodeProto): + """! Return the name of the weight input of this node depending on its operator type + @param node (NodeProto): The node + """ + op_type = node.op_type # 'Conv', 'QLinearConv', ... + if op_type == "Conv": + return node.input[1] + elif op_type == "QLinearConv": + return node.input[3] + else: + raise NotImplementedError(f"Retrieving weight name for onnx node of type {op_type} is not supported.") diff --git a/zigzag/io/onnx/utils.py b/zigzag/parser/onnx/utils.py similarity index 96% rename from zigzag/io/onnx/utils.py rename to zigzag/parser/onnx/utils.py index 2d91a1dc..6d6c558c 100644 --- a/zigzag/io/onnx/utils.py +++ b/zigzag/parser/onnx/utils.py @@ -1,18 +1,13 @@ -import enum import importlib import logging from dataclasses import dataclass from enum import auto from os import path from typing import Any, List +from enum import Enum import pickle - import onnx -from onnx import AttributeProto, helper, compose -from onnx import ModelProto -from onnx import GraphProto -from onnx import NodeProto -from onnx import TypeProto +from onnx import AttributeProto, helper, compose, ModelProto, GraphProto, NodeProto, TypeProto logger = logging.getLogger(__name__) @@ -121,7 +116,7 @@ def parse_dynamic_onnx_model(model: ModelProto) -> ModelProto: return new_model -def get_attribute_ints_with_name(name: str, attrs: Any, default: Any = None): +def get_attribute_ints_with_name(name: str, attrs: Any, default: list[int] | int | None = None) -> list[int] | int: """! Retrieves the attrs[name_idx].ints from attrs. If attrs[name_idx] is of type INTS, attrs[name_idx].ints is returned. If attrs[name_idx] is of type INT, attrs[name_idx].i is returned. @@ -134,9 +129,9 @@ def get_attribute_ints_with_name(name: str, attrs: Any, default: Any = None): name_idx = attrs_names.index(name) attr_type = attrs[name_idx].type if attr_type == AttributeProto.AttributeType.INT: - return attrs[name_idx].i + return int(attrs[name_idx].i) elif attr_type == AttributeProto.AttributeType.INTS: - return attrs[name_idx].ints + return list(attrs[name_idx].ints) else: raise NotImplementedError(f"Attribute extraction of type {attr_type} not supported.") except ValueError: @@ -146,7 +141,7 @@ def get_attribute_ints_with_name(name: str, attrs: Any, default: Any = None): raise ValueError(f"attrs has no attribute called {name} and no default was given. Names = {attrs_names}.") -class OnnxTensorCategory(enum.Enum): +class OnnxTensorCategory(Enum): Input = auto() Output = auto() diff --git a/zigzag/parser/workload_factory.py b/zigzag/parser/workload_factory.py new file mode 100644 index 00000000..c56bcf34 --- /dev/null +++ b/zigzag/parser/workload_factory.py @@ -0,0 +1,241 @@ +import re +import logging +from typing import Any + +from zigzag.datatypes import LayerDim, LayerOperand, MemoryOperand, OADimension, UnrollFactor, UnrollFactorInt +from zigzag.mapping.spatial_mapping import MappingSingleOADim, SpatialMapping, SpatialMappingHint +from zigzag.parser.WorkloadValidator import WorkloadValidator +from zigzag.utils import UniqueMessageFilter +from zigzag.workload.DNNWorkload import DNNWorkload +from zigzag.workload.layer_attributes import ( + InputOperandSource, + LayerDimRelation, + LayerDimSizes, + LayerEquation, + LayerOperandPrecision, + LayerPadding, + LayerTemporalOrdering, + MemoryOperandLinks, +) +from zigzag.workload.layer_node import LayerNode, LayerNodeAttributes + +logger = logging.getLogger(__name__) +logger.addFilter(UniqueMessageFilter()) + + +class WorkloadFactory: + """! Generates a `Workload` instance from the validated and normalized user-provided data.""" + + def __init__(self, workload_data: list[dict[str, Any]], mapping_data: list[dict[str, Any]]): + self.workload_data = workload_data + self.mapping_data = mapping_data + + def create(self) -> DNNWorkload: + node_list: list[LayerNode] = [] + + for layer_data in self.workload_data: + layer_node_factory = LayerNodeFactory(layer_data, self.mapping_data) + layer_node = layer_node_factory.create() + node_list.append(layer_node) + + return DNNWorkload(node_list) + + +class LayerNodeFactory: + """Creates a LayerNode instance from a validated and normalized user definition of a single workload layer""" + + def __init__(self, node_data: dict[str, Any], mapping_data: list[dict[str, Any]]): + """! + @node_data validated and normalized user-defined data for a single workload layer + @mapping_data validated and normalized user-defined data for all mappings + """ + self.node_data = node_data + self.mapping_data = mapping_data + + def create(self) -> LayerNode: + layer_id: int = self.node_data["id"] + node_name: str = f"Layer{layer_id}" + node_attr = self.create_node_attr() + + return LayerNode(layer_id=layer_id, node_name=node_name, node_attr=node_attr) + + def create_node_attr(self) -> LayerNodeAttributes: + # From node data + layer_type: str = self.node_data["operator_type"] + equation = self.create_equation() + layer_dim_sizes = self.create_layer_dim_sizes() + operand_precision = self.create_operand_precision() + dimension_relations = self.create_layer_dim_relations() + constant_operands = self.create_constant_operands() + input_operand_source = self.create_operand_source() + padding = self.create_padding() + pr_layer_dim_sizes = self.create_pr_layer_dim_sizes() + + # From mapping data + mapping_factory = MappingFactory(layer_type, self.mapping_data) + spatial_mapping = mapping_factory.create_spatial_mapping() + spatial_mapping_hint = mapping_factory.create_spatial_mapping_hint() + core_allocation = mapping_factory.get_core_allocation() + memory_operand_links = mapping_factory.create_memory_operand_links() + temporal_ordering = mapping_factory.create_temporal_ordering() + + return LayerNodeAttributes( + layer_type=layer_type, + equation=equation, + layer_dim_sizes=layer_dim_sizes, + operand_precision=operand_precision, + dimension_relations=dimension_relations, + constant_operands=constant_operands, + input_operand_source=input_operand_source, + spatial_mapping=spatial_mapping, + spatial_mapping_hint=spatial_mapping_hint, + core_allocation=core_allocation, + memory_operand_links=memory_operand_links, + temporal_ordering=temporal_ordering, + padding=padding, + pr_layer_dim_sizes=pr_layer_dim_sizes, + ) + + def create_equation(self) -> LayerEquation: + equation: str = self.node_data["equation"] + equation = equation.replace("+=", "=") + equation = equation.replace("++", "+") + equation = equation.replace("*", " * ") + equation = equation.replace("=", " = ") + equation = equation.replace("+", " + ") + return LayerEquation(equation) + + def create_layer_dim_sizes(self) -> LayerDimSizes: + loop_dims = [LayerDim(x) for x in self.node_data["loop_dims"]] + loop_sizes: list[UnrollFactorInt] = self.node_data["loop_sizes"] + + data = {dim: size for dim, size in zip(loop_dims, loop_sizes)} + return LayerDimSizes(data) + + def create_operand_precision(self) -> LayerOperandPrecision: + precisions: dict[str, int] = self.node_data["operand_precision"] + data: dict[LayerOperand, int] = {LayerOperand(operand_str): size for operand_str, size in precisions.items()} + return LayerOperandPrecision(data) + + def create_layer_dim_relations(self) -> list[LayerDimRelation]: + relations: list[LayerDimRelation] = [] + for relation_str in self.node_data["dimension_relations"]: + match = re.search(WorkloadValidator.LAYER_DIM_RELATION_REGEX, relation_str) + assert match is not None + dim_1, coef_2, dim_2, coef_3, dim_3 = match.groups() + layer_dim_relation = LayerDimRelation( + dim_1=LayerDim(dim_1), + dim_2=LayerDim(dim_2), + dim_3=LayerDim(dim_3), + coef_2=int(coef_2) if coef_2 is not None else 1, + coef_3=int(coef_3) if coef_3 is not None else 1, + ) + relations.append(layer_dim_relation) + + return relations + + def create_constant_operands(self) -> list[LayerOperand]: + operand_sources: dict[str, int] = self.node_data["operand_source"] + constant_operands: list[str] = [op for op, source in operand_sources.items() if source == self.node_data["id"]] + return [LayerOperand(layer_op_str) for layer_op_str in constant_operands] + + def create_operand_source(self) -> InputOperandSource: + operand_sources: dict[str, int] = self.node_data["operand_source"] + return { + LayerOperand(layer_dim_str): source + for layer_dim_str, source in operand_sources.items() + if source != self.node_data["id"] + } + + def create_padding(self) -> LayerPadding: + if "pr_loop_dims" not in self.node_data or self.node_data["pr_loop_dims"] is None: + return LayerPadding.empty() + if "padding" not in self.node_data or self.node_data["padding"] is None: + return LayerPadding.empty() + + pr_layer_dims: list[LayerDim] = [LayerDim(x) for x in self.node_data["pr_loop_dims"]] + # length of the inner list equals 2 + padding_data: list[list[int]] = self.node_data["padding"] + padding_dict: dict[LayerDim, tuple[int, int]] = { + layer_dim: (padding_data[i][0], padding_data[i][1]) for i, layer_dim in enumerate(pr_layer_dims) + } + return LayerPadding(padding_dict) + + def create_pr_layer_dim_sizes(self) -> LayerDimSizes | None: + if "pr_loop_dims" not in self.node_data or self.node_data["pr_loop_dims"] is None: + return None + if "pr_loop_sizes" not in self.node_data or self.node_data["pr_loop_sizes"] is None: + return None + + pr_layer_dims: list[LayerDim] = [LayerDim(x) for x in self.node_data["pr_loop_dims"]] + pr_sizes: list[int] = self.node_data["pr_loop_sizes"] + size_dict = {layer_dim: size for layer_dim, size in zip(pr_layer_dims, pr_sizes)} + return LayerDimSizes(size_dict) + + +class MappingFactory: + def __init__(self, operation_type: str, mapping_data: list[dict[str, Any]]): + """ + @param operation_type Name of the layer operation for which the Mapping is being constructed. + @param mapping_data user-given, validated and normalized mapping data for all operation types. + """ + if operation_type in map(lambda x: x["name"], mapping_data): + self.mapping_data: dict[str, Any] = next(filter(lambda x: x["name"] == operation_type, mapping_data)) + else: + self.mapping_data = next(filter(lambda x: x["name"] == "default", mapping_data)) + logger.warning("Operator %s not defined in mapping. Using default mapping instead.", operation_type) + + def get_core_allocation(self) -> list[int]: + return self.mapping_data["core_allocation"] + + def create_spatial_mapping(self) -> SpatialMapping: + if self.mapping_data["spatial_mapping"] is None: + return SpatialMapping.empty() + + user_data: dict[str, list[str]] = self.mapping_data["spatial_mapping"] + spatial_mapping_dict: dict[OADimension, MappingSingleOADim] = {} + + for oa_dim_str, unrolling_list in user_data.items(): + oa_dim = OADimension(oa_dim_str) + mapping_this_oa_dim = self.create_mapping_single_oa_dim(unrolling_list) + spatial_mapping_dict[oa_dim] = mapping_this_oa_dim + + return SpatialMapping(spatial_mapping_dict) + + def create_mapping_single_oa_dim(self, mapping_data: list[str]) -> MappingSingleOADim: + mapping_dict: dict[LayerDim, UnrollFactor] = {} + + for single_unrolling in mapping_data: + layer_dim_str = single_unrolling.split(",")[0] + unrolling = int(single_unrolling.split(",")[-1]) + layer_dim = LayerDim(layer_dim_str) + mapping_dict[layer_dim] = unrolling + + return MappingSingleOADim(mapping_dict) + + def create_spatial_mapping_hint(self) -> SpatialMappingHint: + if "spatial_mapping_hint" not in self.mapping_data or self.mapping_data["spatial_mapping_hint"] is None: + return SpatialMappingHint.empty() + + user_data: dict[str, list[str]] = self.mapping_data["spatial_mapping_hint"] + mapping_hint_dict: dict[OADimension, set[LayerDim]] = { + OADimension(oa_dim_str): {LayerDim(layer_dim_str) for layer_dim_str in hint_list} + for oa_dim_str, hint_list in user_data.items() + } + return SpatialMappingHint(mapping_hint_dict) + + def create_memory_operand_links(self) -> MemoryOperandLinks: + user_data: dict[str, str] = self.mapping_data["memory_operand_links"] + links_dict = { + LayerOperand(layer_op_str): MemoryOperand(mem_op_str) for layer_op_str, mem_op_str in user_data.items() + } + return MemoryOperandLinks(links_dict) + + def create_temporal_ordering(self) -> LayerTemporalOrdering: + """! This attribute lacks support within the MappingValidator. Returns an empty instance in case it is not + provided (to be compatible with older code) or raises an error if it is present in the user-provided data. + """ + if "temporal_ordering" not in self.mapping_data or self.mapping_data["temporal_ordering"] is None: + return LayerTemporalOrdering.empty() + + raise NotImplementedError() diff --git a/zigzag/stages/AcceleratorParserStage.py b/zigzag/stages/AcceleratorParserStage.py new file mode 100644 index 00000000..faede8dd --- /dev/null +++ b/zigzag/stages/AcceleratorParserStage.py @@ -0,0 +1,39 @@ +from typing import Any + +from zigzag.hardware.architecture.Accelerator import Accelerator +from zigzag.parser.accelerator_factory import AcceleratorFactory +from zigzag.parser.AcceleratorValidator import AcceleratorValidator +from zigzag.stages.Stage import Stage, StageCallable + +import logging + +from zigzag.utils import open_yaml + +logger = logging.getLogger(__name__) + + +class AcceleratorParserStage(Stage): + """! Parse to parse an accelerator from a user-defined yaml file.""" + + def __init__(self, list_of_callables: list[StageCallable], *, accelerator: str, **kwargs: Any): + super().__init__(list_of_callables, **kwargs) + assert accelerator.split(".")[-1] == "yaml", "Expected a yaml file as accelerator input" + self.accelerator_yaml_path = accelerator + + def run(self): + accelerator = self.parse_accelerator() + sub_stage = self.list_of_callables[0](self.list_of_callables[1:], accelerator=accelerator, **self.kwargs) + for cme, extra_info in sub_stage.run(): + yield cme, extra_info + + def parse_accelerator(self) -> Accelerator: + accelerator_data = open_yaml(self.accelerator_yaml_path) + + validator = AcceleratorValidator(accelerator_data) + accelerator_data = validator.normalized_data + validate_success = validator.validate() + if not validate_success: + raise ValueError("Failed to validate user provided accelerator.") + + factory = AcceleratorFactory(accelerator_data) + return factory.create() diff --git a/zigzag/stages/CostModelStage.py b/zigzag/stages/CostModelStage.py index c1faadf3..477e6474 100644 --- a/zigzag/stages/CostModelStage.py +++ b/zigzag/stages/CostModelStage.py @@ -40,7 +40,7 @@ def __init__( def run(self): """! Run the cost model stage by calling the internal zigzag cost model with the correct inputs.""" - core_id = self.layer.core_allocation + core_id = self.layer.core_allocation[0] core = self.accelerator.get_core(core_id) operational_array = core.operational_array pe_type = getattr(operational_array, "pe_type", None) diff --git a/zigzag/stages/ONNXModelParserStage.py b/zigzag/stages/ONNXModelParserStage.py index c62555d9..49efc130 100644 --- a/zigzag/stages/ONNXModelParserStage.py +++ b/zigzag/stages/ONNXModelParserStage.py @@ -1,7 +1,7 @@ from typing import Any -from zigzag.io.onnx.ONNXModelParser import ONNXModelParser +from zigzag.parser.onnx.ONNXModelParser import ONNXModelParser from zigzag.stages.Stage import Stage, StageCallable import logging @@ -16,9 +16,8 @@ def __init__(self, list_of_callables: list[StageCallable], *, workload: str, map self.onnx_model_parser = ONNXModelParser(workload, mapping) def run(self): - self.onnx_model_parser.run() - onnx_model = self.onnx_model_parser.get_onnx_model() - workload = self.onnx_model_parser.get_workload() + workload = self.onnx_model_parser.run() + onnx_model = self.onnx_model_parser.onnx_model sub_stage = self.list_of_callables[0]( self.list_of_callables[1:], diff --git a/zigzag/stages/PEArrayScalingStage.py b/zigzag/stages/PEArrayScalingStage.py index 274e9e7d..a6e9af63 100644 --- a/zigzag/stages/PEArrayScalingStage.py +++ b/zigzag/stages/PEArrayScalingStage.py @@ -10,6 +10,7 @@ from zigzag.hardware.architecture.Core import Core from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy from zigzag.hardware.architecture.MemoryInstance import MemoryInstance +from zigzag.hardware.architecture.memory_port import PortAllocation from zigzag.hardware.architecture.operational_array import OperationalArray from zigzag.hardware.architecture.operational_unit import OperationalUnit from zigzag.utils import pickle_deepcopy @@ -80,9 +81,9 @@ def generate_scaled_accelerator(self): """ # Get the relevant accelerator attributes core = next(iter(self.accelerator.cores)) - operational_array = core.operational_array + operational_array: OperationalArray = core.operational_array operational_unit = operational_array.unit - dimension_sizes = operational_array.dimension_sizes + dimension_sizes = operational_array.oa_dim_sizes memory_hierarchy = core.memory_hierarchy # Create new operational array @@ -107,7 +108,7 @@ def generate_scaled_accelerator(self): new_memory_instance: MemoryInstance = pickle_deepcopy(memory_instance) new_operands: tuple[str] = pickle_deepcopy(operands) - new_port_alloc: tuple[dict] = pickle_deepcopy(port_alloc) + new_port_alloc: PortAllocation = pickle_deepcopy(port_alloc) new_served_dimensions = pickle_deepcopy(served_dimensions) new_memory_hierarchy.add_memory( memory_instance=new_memory_instance, diff --git a/zigzag/stages/SearchUnusedMemoryStage.py b/zigzag/stages/SearchUnusedMemoryStage.py index e5ec2c05..79b5a5d9 100644 --- a/zigzag/stages/SearchUnusedMemoryStage.py +++ b/zigzag/stages/SearchUnusedMemoryStage.py @@ -223,7 +223,7 @@ def update_top_mem_level(self): curr_id = self.layer_list[layer] # current layer id (key) in mem_update_list # weight representation in memory if len(layer.constant_operands) == 1: - const_operand: MemoryOperand = layer.memory_operand_links[layer.constant_operands[0]] + const_operand = layer.memory_operand_links[layer.constant_operands[0]] # act representation in memory act_operand: MemoryOperand = layer.memory_operand_links[ [operand for operand in layer.input_operands if operand not in layer.constant_operands][0] @@ -388,7 +388,7 @@ def check_if_mem_serve_all_oa_dims(self, mem: MemoryLevel, accelerator: Accelera core = accelerator.cores[0] operational_array = core.operational_array oa_dim_nb = len(operational_array.oa_dim_sizes) - mem_served_oa_dim_nb = mem.served_dimensions.nb_dims() + mem_served_oa_dim_nb = mem.served_dimensions.nb_dims return mem_served_oa_dim_nb == oa_dim_nb def update_mem_level_for_loading_data(self): @@ -403,8 +403,10 @@ def update_mem_level_for_loading_data(self): self.remove_dummy_nodes_in_workload() # remove dummy nodes for the ease of telling the branch starting or final nodes # Update mem_update_list and mem_update_weight - for id, layer in enumerate(nx.topological_sort(self.workload)): - layer: LayerNode + for id, layer in enumerate(self.workload.topological_sort()): + if isinstance(layer, DummyNode): + continue + # act representation act_operand = layer.memory_operand_links[ [operand for operand in layer.input_operands if operand not in layer.constant_operands][0] diff --git a/zigzag/stages/SpatialMappingConversionStage.py b/zigzag/stages/SpatialMappingConversionStage.py index 46af6c42..106b3dba 100644 --- a/zigzag/stages/SpatialMappingConversionStage.py +++ b/zigzag/stages/SpatialMappingConversionStage.py @@ -35,7 +35,7 @@ def __init__( self.layer = layer self.accelerator = accelerator self.memory_operand_links = layer.memory_operand_links - self.user_spatial_mapping = self.layer.user_spatial_mapping + self.user_spatial_mapping = self.layer.spatial_mapping assert ( self.user_spatial_mapping.oa_dim_sizes is not None @@ -189,9 +189,7 @@ def generate_mapping_per_mem_lvl(self, user_spatial_mapping: SpatialMapping) -> # TODO This should be a class """ mapping_per_mem_lvl: SpatialMappingPerMemLvl = {} - # layer_to_mem_op = self.layer.memory_operand_links - # mem_to_layer_op = {mem_op: layer_op for (layer_op, mem_op) in layer_to_mem_op.items()} - core_id = self.layer.core_allocation + core_id = self.layer.core_allocation[0] mem_hierarchy = self.accelerator.get_core(core_id).memory_hierarchy for layer_op in self.memory_operand_links.layer_operands: mem_op = self.memory_operand_links.layer_to_mem_op(layer_op) diff --git a/zigzag/stages/SpatialMappingGeneratorStage.py b/zigzag/stages/SpatialMappingGeneratorStage.py index 8f9b7dfa..fffda43c 100644 --- a/zigzag/stages/SpatialMappingGeneratorStage.py +++ b/zigzag/stages/SpatialMappingGeneratorStage.py @@ -17,7 +17,7 @@ SpatialMappingConversionStage, ) from zigzag.workload.layer_node import LayerNode -from zigzag.utils import pickle_deepcopy +from zigzag.utils import UniqueMessageFilter, pickle_deepcopy from zigzag.workload.layer_attributes import MemoryOperandLinks from zigzag.mapping.spatial_mapping import ( SpatialMapping, @@ -26,6 +26,7 @@ ) logger = logging.getLogger(__name__) +logger.addFilter(UniqueMessageFilter()) class SpatialMappingGeneratorStage(Stage): @@ -60,7 +61,7 @@ def __init__( self.accelerator = accelerator self.layer = layer - self.provided_mapping = self.layer.user_spatial_mapping + self.provided_mapping = self.layer.spatial_mapping # Control parameters self.enable_mix_spatial_mapping_generation = enable_mix_spatial_mapping_generation @@ -68,12 +69,12 @@ def __init__( self.nb_mappings_generated = nb_mappings_generated self.layer_dim_sizes = self.layer.layer_dim_sizes - core_id = layer.core_allocation + core_id = layer.core_allocation[0] self.core = self.accelerator.get_core(core_id) self.oa_dim_sizes = self.core.operational_array.oa_dim_sizes self.memory_hierarchy = self.core.memory_hierarchy - self.spatial_mapping_hint: SpatialMappingHint = self.layer.user_spatial_mapping_hint + self.spatial_mapping_hint: SpatialMappingHint = self.layer.spatial_mapping_hint self.spatial_mapping_hint.complete_with_defaults(self.oa_dim_sizes, set(self.layer.layer_dims)) def run(self): @@ -84,8 +85,13 @@ def run(self): assert nb_generated_mappings > 0, "No SpatialMappings found" for i, generated_mapping in enumerate(generated_mappings): - logger.info( # pylint: disable=W1203 - f"Launching spatial mapping {i+1}/{nb_generated_mappings}: {generated_mapping}." + self.layer.spatial_mapping = generated_mapping + logger.info( + "%s: Launching spatial mapping %i/%i :%s.", + self.layer.name, + (i + 1), + nb_generated_mappings, + generated_mapping, ) # Modify the size of lower input mem to support weight diagonal spatial unrolling (for OX/OY) @@ -103,7 +109,7 @@ def run(self): ) # Set the generated_mapping in the layer, as this is required by SpatialMappingConversionStage - self.layer.user_spatial_mapping = generated_mapping + self.layer.spatial_mapping = generated_mapping for cme, extra_info in spatial_mapping_conversion_stage.run(): # recover back the accelerator in case the memory size had been adjusted @@ -117,7 +123,7 @@ def generate_spatial_mappings(self) -> Generator[SpatialMapping, None, None]: max_unrollings = self.get_max_unrolling() # Start from the given mapping - mapping_template = self.provided_mapping + mapping_template = copy.deepcopy(self.provided_mapping) mapping_template.initialize_oa_dims(self.oa_dim_sizes) mapping_template.check_and_reduce(max_unrollings, self.layer_dim_sizes.data) diff --git a/zigzag/stages/TemporalOrderingConversionStage.py b/zigzag/stages/TemporalOrderingConversionStage.py index 58e72fea..ebe3f9b5 100644 --- a/zigzag/stages/TemporalOrderingConversionStage.py +++ b/zigzag/stages/TemporalOrderingConversionStage.py @@ -38,7 +38,7 @@ def run(self): """! Run this stage by converting the user-defined temporal loop ordering to the memory-level based temporal mapping representation. """ - temporal_mapping = self.convert_user_temporal_mapping(self.layer.user_temporal_ordering) + temporal_mapping = self.convert_user_temporal_mapping(self.layer.temporal_ordering) kwargs = self.kwargs.copy() kwargs["temporal_mapping"] = temporal_mapping kwargs["spatial_mapping"] = self.spatial_mapping @@ -48,9 +48,9 @@ def run(self): for cme, extra_info in substage.run(): yield cme, extra_info - def convert_user_temporal_mapping(self, user_temporal_mapping: LayerTemporalOrdering | None) -> TemporalMapping: + def convert_user_temporal_mapping(self, user_temporal_mapping: LayerTemporalOrdering) -> TemporalMapping: """! - # TODO move to `LayerTemporalOrdering`, fix types. What is user_temporal_mapping is None? + # TODO move to `LayerTemporalOrdering`, fix types. """ spatial_mapping = self.spatial_mapping layer = self.layer diff --git a/zigzag/stages/WorkloadParserStage.py b/zigzag/stages/WorkloadParserStage.py new file mode 100644 index 00000000..4f2a12e1 --- /dev/null +++ b/zigzag/stages/WorkloadParserStage.py @@ -0,0 +1,57 @@ +from typing import Any + +from zigzag.parser.MappingValidator import MappingValidator +from zigzag.parser.WorkloadValidator import WorkloadValidator +from zigzag.parser.workload_factory import WorkloadFactory +from zigzag.stages.Stage import Stage, StageCallable +from zigzag.utils import open_yaml +from zigzag.workload.DNNWorkload import DNNWorkload + +import logging + +logger = logging.getLogger(__name__) + + +class WorkloadParserStage(Stage): + """! Parses a user-provided workload from a yaml file.""" + + def __init__(self, list_of_callables: list[StageCallable], *, workload: str, mapping: str, **kwargs: Any): + super().__init__(list_of_callables, **kwargs) + self.workload_yaml_path = workload + self.mapping_yaml_path = mapping + + def run(self): + workload = self.parse_workload() + sub_stage = self.list_of_callables[0](self.list_of_callables[1:], workload=workload, **self.kwargs) + for cme, extra_info in sub_stage.run(): + yield cme, extra_info + + def parse_workload(self) -> DNNWorkload: + workload_data = self._parse_workload_data() + mapping_data = self._parse_mapping_data() + factory = WorkloadFactory(workload_data, mapping_data) + return factory.create() + + def _parse_workload_data(self) -> list[dict[str, Any]]: + """! Parse, validate and normalize workload""" + workload_data = open_yaml(self.workload_yaml_path) + workload_validator = WorkloadValidator(workload_data) + workload_data = workload_validator.normalized_data + workload_validate_succes = workload_validator.validate() + if not workload_validate_succes: + raise ValueError("Failed to validate user provided workload.") + return workload_data + + def _parse_mapping_data(self) -> list[dict[str, Any]]: + return self.parse_mapping_data(self.mapping_yaml_path) + + @staticmethod + def parse_mapping_data(mapping_yaml_path: str) -> list[dict[str, Any]]: + """Parse, validate and normalize workload mapping from a given yaml file path""" + mapping_data = open_yaml(mapping_yaml_path) + mapping_validator = MappingValidator(mapping_data) + mapping_data = mapping_validator.normalized_data + mapping_validate_succes = mapping_validator.validate() + if not mapping_validate_succes: + raise ValueError("Failed to validate user provided mapping.") + return mapping_data diff --git a/zigzag/stages/WorkloadStage.py b/zigzag/stages/WorkloadStage.py index cb5965df..ad3c962b 100644 --- a/zigzag/stages/WorkloadStage.py +++ b/zigzag/stages/WorkloadStage.py @@ -25,18 +25,18 @@ def __init__( self.accelerator = accelerator def run(self): - for id, layer in enumerate(self.workload.topological_sort()): + for layer in self.workload.topological_sort(): # skip the DummyNodes if isinstance(layer, DummyNode): continue # Skip a layer if the layer type is "Pooling" and the hardware template is an IMC core. # This wil have impact when the workload is defined manually. # If the workload is from onnx, no skipping will be done. - core_id: int = layer.core_allocation + core_id: int = layer.core_allocation[0] core = self.accelerator.get_core(core_id) operational_array = core.operational_array - pe_type = getattr(operational_array, "pe_type", None) # return None if it does not exist - layer_type: str | None = layer.layer_attrs.parse_operator_type() + pe_type = getattr(operational_array, "pe_type", None) + layer_type = layer.type if (pe_type in ["in_sram_computing"]) and (layer_type in ["Pooling", "Add"]): continue @@ -44,9 +44,8 @@ def run(self): kwargs = self.kwargs.copy() kwargs["layer"] = layer kwargs["accelerator"] = self.accelerator - layer_name = layer.name if layer.name is not None else id - logger.info(f"Processing {layer_name}...") + logger.info(f"Processing {layer.name}...") sub_stage = self.list_of_callables[0](self.list_of_callables[1:], **kwargs) for cme, extra_info in sub_stage.run(): yield cme, (layer, extra_info) diff --git a/zigzag/stages/input_parser_stages.py b/zigzag/stages/input_parser_stages.py deleted file mode 100644 index 24add01a..00000000 --- a/zigzag/stages/input_parser_stages.py +++ /dev/null @@ -1,68 +0,0 @@ -import importlib -from typing import Any - - -from zigzag.io.AcceleratorParser import AcceleratorParser -from zigzag.stages.Stage import Stage, StageCallable -from zigzag.workload.DNNWorkload import DNNWorkload -from zigzag.utils import pickle_deepcopy - -import logging - -logger = logging.getLogger(__name__) - - -class AcceleratorParserStage(Stage): - - def __init__(self, list_of_callables: list[StageCallable], *, accelerator: str, **kwargs: Any): - super().__init__(list_of_callables, **kwargs) - self.accelerator_parser = AcceleratorParser(accelerator) - - def run(self): - self.accelerator_parser.run() - accelerator = self.accelerator_parser.get_accelerator() - sub_stage = self.list_of_callables[0](self.list_of_callables[1:], accelerator=accelerator, **self.kwargs) - for cme, extra_info in sub_stage.run(): - yield cme, extra_info - - -class WorkloadParserStage(Stage): - - def __init__(self, list_of_callables: list[StageCallable], *, workload: str, mapping: str, **kwargs: Any): - super().__init__(list_of_callables, **kwargs) - self.workload = workload - self.mapping = mapping - - def run(self): - workload = self.parse_workload_from_path_or_from_module(self.workload, self.mapping) - sub_stage = self.list_of_callables[0](self.list_of_callables[1:], workload=workload, **self.kwargs) - for cme, extra_info in sub_stage.run(): - yield cme, extra_info - - def parse_workload_from_path_or_from_module( - self, workload: str | dict[int, dict[str, Any]], mapping: str | dict[str, dict[str, Any]] - ) -> DNNWorkload: - """! Parse the input workload residing in workload_path. - The "workload" dict is converted to a NetworkX graph. - """ - if isinstance(workload, str): # load from path - module = importlib.import_module(workload) - workload_dict: dict[int, dict[str, Any]] = module.workload - else: - workload_dict = workload - - if isinstance(mapping, str): # load from path - module = importlib.import_module(mapping) - mapping_dict: dict[str, dict[str, Any]] = module.mapping - else: - mapping_dict = mapping - - # make a copy here to prevent later it is being changed in the following stages - workload_copy: dict[int, dict[str, Any]] = pickle_deepcopy(workload_dict) - workload_converted = DNNWorkload(workload_copy, mapping_dict) - logger.info( - f"""Created workload graph with {workload_converted.number_of_nodes()} nodes and - {workload_converted.number_of_edges()} edges.""" - ) - - return workload_converted diff --git a/zigzag/stages/reduce_stages.py b/zigzag/stages/reduce_stages.py index ad4b22d9..a3d61417 100644 --- a/zigzag/stages/reduce_stages.py +++ b/zigzag/stages/reduce_stages.py @@ -134,22 +134,3 @@ def run(self): total_cme += cme all_cmes.append((cme, extra_info)) yield total_cme, all_cmes - - -# -# class ListifyStage: -# """! Class yields all the cost model evaluations yielded by its substages as a single list instead of as a generator. -# NOTE this cannot inherit from `Stage` because the return type of `run` is different""" - -# def __init__(self, list_of_callables : list[StageCallable], **kwargs : Any) -> None: -# """! Initialize the compare stage.""" -# super().__init__(list_of_callables, **kwargs) -# self.list: list[tuple[CostModelEvaluation, Any]] = [] - -# def run(self) -> Generator[tuple[list[tuple[CostModelEvaluation, Any]], Any], None, None]: -# """! Run the compare stage by comparing a new cost model output with the current best found result.""" -# substage = self.list_of_callables[0](self.list_of_callables[1:], **self.kwargs) - -# for cme, extra_info in substage.run(): -# self.list.append((cme, extra_info)) -# yield self.list, None diff --git a/zigzag/utils.py b/zigzag/utils.py index 940c872b..202283ca 100644 --- a/zigzag/utils.py +++ b/zigzag/utils.py @@ -1,8 +1,10 @@ +import logging import pickle from copy import deepcopy from typing import Any import numpy as np +import yaml def pickle_deepcopy(to_copy: Any) -> Any: @@ -25,6 +27,12 @@ def pickle_load(path: str): return obj +def open_yaml(path: str): + with open(path, encoding="utf-8") as f: + data = yaml.safe_load(f) + return data + + def json_repr_handler(obj: Any, simple: bool = False) -> Any: """! Recursively converts objects into a json representation""" attr = "__simplejsonrepr__" if simple else "__jsonrepr__" @@ -50,3 +58,18 @@ def json_repr_handler(obj: Any, simple: bool = False) -> Any: return tuple(json_repr_handler(x, simple) for x in obj) raise TypeError(f"Object of type {type(obj)} is not serializable. Create a {attr} method.") + + +class UniqueMessageFilter(logging.Filter): + """! Prevents the logger from filtering duplicate messages""" + + def __init__(self): + self.recorded_messages: set[str] = set() + + def filter(self, record): + message = record.getMessage() + if message in self.recorded_messages: + return False # Skip this message + else: + self.recorded_messages.add(message) + return True diff --git a/zigzag/visualization/graph/memory_hierarchy.py b/zigzag/visualization/graph/memory_hierarchy.py index 03aedd04..f1956e12 100644 --- a/zigzag/visualization/graph/memory_hierarchy.py +++ b/zigzag/visualization/graph/memory_hierarchy.py @@ -3,7 +3,6 @@ import matplotlib.pyplot as plt from networkx import Graph -from zigzag.hardware.architecture.MemoryHierarchy import MemoryHierarchy from zigzag.hardware.architecture.memory_level import MemoryLevel diff --git a/zigzag/visualization/results/plot_cme.py b/zigzag/visualization/results/plot_cme.py index 5675e5b8..cd058d7d 100644 --- a/zigzag/visualization/results/plot_cme.py +++ b/zigzag/visualization/results/plot_cme.py @@ -3,7 +3,7 @@ import matplotlib.pyplot as plt from matplotlib.colors import hsv_to_rgb import numpy as np -from zigzag.cost_model.cost_model import CostModelEvaluation +from zigzag.cost_model.cost_model import CostModelEvaluation, CostModelEvaluationABC, CumulativeCME from zigzag.datatypes import LayerOperand from zigzag.hardware.architecture.MemoryInstance import MemoryInstance from zigzag.hardware.architecture.memory_level import MemoryLevel @@ -80,7 +80,7 @@ def bar_plot_cost_model_evaluations_total( def bar_plot_cost_model_evaluations_breakdown( - cmes: list[CostModelEvaluation], save_path: str, xtick_rotation: int = 90 + cmes: list[CostModelEvaluationABC], save_path: str, xtick_rotation: int = 90 ): memory_word_access_summed: dict[int, defaultdict[LayerOperand, defaultdict[str, FourWayDataMoving]]] = { idx: defaultdict(lambda: defaultdict(lambda: FourWayDataMoving(0, 0, 0, 0))) for idx in range(len(cmes)) @@ -100,7 +100,11 @@ def bar_plot_cost_model_evaluations_breakdown( la_tot: dict[int, float] = {idx: 0 for idx in range(len(cmes))} for idx, cme in enumerate(cmes): - mem_hierarchy = cme.accelerator.get_core(cme.layer.core_allocation).memory_hierarchy + if isinstance(cme, CumulativeCME): + continue + assert isinstance(cme, CostModelEvaluation) + + mem_hierarchy = cme.accelerator.get_core(cme.layer.core_allocation[0]).memory_hierarchy mac_costs[idx] = cme.MAC_energy la_break_down[idx]["Ideal computation"] = cme.ideal_cycle la_break_down[idx]["Spatial stall"] = cme.ideal_temporal_cycle - cme.ideal_cycle diff --git a/zigzag/visualization/results/print_mapping.py b/zigzag/visualization/results/print_mapping.py index 2b223302..c4487a16 100644 --- a/zigzag/visualization/results/print_mapping.py +++ b/zigzag/visualization/results/print_mapping.py @@ -1,4 +1,4 @@ -from zigzag.cost_model.cost_model import CostModelEvaluation +from zigzag.cost_model.cost_model import CostModelEvaluation, CostModelEvaluationABC, CumulativeCME from zigzag.datatypes import Constants, LayerDim, LayerOperand, UnrollFactor from zigzag.utils import pickle_deepcopy @@ -13,7 +13,7 @@ def get_temporal_spatial_loops( """ # TODO documentation, split this up into multiple, sensible functions """ - core = cme.accelerator.get_core(cme.layer.core_allocation) + core = cme.accelerator.get_core(cme.layer.core_allocation[0]) operand_links = cme.layer.memory_operand_links tm: dict[LayerOperand, list[list[tuple[LayerDim, UnrollFactor]]]] = pickle_deepcopy( @@ -48,13 +48,18 @@ def get_temporal_spatial_loops( return temporal_loops, spatial_loops, memories -def print_mapping(cme: CostModelEvaluation, offsets: int = 2): +def print_mapping(cme: CostModelEvaluationABC, offsets: int = 2): """ Prints a structured representation of a CostModelEvaluation mapping. :param cme: The CostModelEvaluation to print the mapping of. :param offsets: The number of spaces to offset nested loops. """ + # Skip CumulativeCMEs + if isinstance(cme, CumulativeCME): + return + assert isinstance(cme, CostModelEvaluation) + # Extract the temporal loops, spatial loops, and memories from the cme temporal_loops, spatial_loops, memories = get_temporal_spatial_loops(cme) loop_column_width = ( @@ -99,8 +104,8 @@ def print_header(text: str, operands: list[str]): print("".ljust(loop_column_width + 3 * memory_column_width, "=")) print(f"{text.ljust(loop_column_width)}", end="") print( - f"""{operands[0]:<{memory_column_width}}{operands[1]:<{memory_column_width}} - {operands[2]:<{memory_column_width}}""" + f"{operands[0]:<{memory_column_width}}{operands[1]:<{memory_column_width}}" + f"{operands[2]:<{memory_column_width}}" ) print("".ljust(loop_column_width + 3 * memory_column_width, "=")) diff --git a/zigzag/workload/DNNWorkload.py b/zigzag/workload/DNNWorkload.py index c4c13e1e..a59ea601 100644 --- a/zigzag/workload/DNNWorkload.py +++ b/zigzag/workload/DNNWorkload.py @@ -1,51 +1,29 @@ from zigzag.workload.Workload import Workload -from zigzag.workload.layer_attributes import LayerAttributes from zigzag.workload.layer_node import LayerNode from typing import Any class DNNWorkload(Workload): - def __init__(self, workload: dict[int, dict[str, Any]], mapping: dict[str, dict[str, Any]], **attr: Any): - """! Collect all the algorithmic workload information here. + def __init__(self, nodes: list[LayerNode], **attr: Any): + """! @return (self): Directed Graph with nodes the layers and edges the connections between layers. """ super().__init__(**attr) - layer_id_to_obj: dict[int, LayerNode] = {} # Lookup dict for id to LayerNode object translation - self.layer_node_list: list[LayerNode] = [] + layer_id_to_obj: dict[int, LayerNode] = {} + self.layer_node_list = nodes - for layer_id, layer in workload.items(): - # TODO Support other type of layers, such as concatenation, max pooling, BN, etc. - # What is special about max pooling? - # elif type(layer_id) == str and layer_id[0:6] == 'concat': - # continue - layer_name = layer.get("name", f"Layer {layer_id}") - operator_type = layer.get("operator_type", None) - if layer_name in mapping.keys(): - for attr_name, attr_va in mapping[layer_name].items(): - layer[attr_name] = attr_va - elif operator_type in mapping.keys(): - for attr_name, attr_va in mapping[operator_type].items(): - layer[attr_name] = attr_va - else: - for attr_name, attr_va in mapping["default"].items(): - layer[attr_name] = attr_va + for layer_node in nodes: + layer_id_to_obj[layer_node.id] = layer_node - # For each item in the dict generate the LayerNode and add it to the dnn graph G - layer_attributes = LayerAttributes.parse_user_input(layer) - layer_node = LayerNode(layer_id, layer_attributes, node_name=layer_name) - # Save this layer_id and LayerNode pair in the layer_id_to_obj dict - layer_id_to_obj[layer_id] = layer_node - # self.add_node(layer_id, info=layer_node) self.add_workload_node(layer_node) - self.layer_node_list.append(layer_node) # Find all of its operand sources and add edges accordingly edges: list[tuple[LayerNode, LayerNode]] = [] - for _, parent_list in layer.get("operand_source", {}).items(): - for parent_id in parent_list: - assert parent_id in layer_id_to_obj, f"Illegal reference to non-existent layer with id {parent_id}" - parent_layer = layer_id_to_obj[parent_id] - edges.append((parent_layer, layer_node)) - # layer_node.input_operand_source[op] = parent_layer # TODO This feature is not used? + for _, parent_id in layer_node.input_operand_source.items(): + # for parent_id in parent_list: + assert parent_id in layer_id_to_obj, f"Illegal reference to non-existent layer with id {parent_id}" + parent_layer = layer_id_to_obj[parent_id] + edges.append((parent_layer, layer_node)) + self.add_workload_edges_from(edges) diff --git a/zigzag/workload/DummyNode.py b/zigzag/workload/DummyNode.py index e85ce0bb..143f01fb 100644 --- a/zigzag/workload/DummyNode.py +++ b/zigzag/workload/DummyNode.py @@ -1,25 +1,28 @@ from zigzag.datatypes import LayerOperand +from zigzag.workload.LayerNodeABC import LayerNodeABC from zigzag.workload.layer_attributes import InputOperandSource -class DummyNode: +class DummyNode(LayerNodeABC): """! A class to represent an ONNX node that is not "accelerateable". This node is created to preserve the original ONNX model graph structure, but will be skipped by the underlying engines, treating it as a 0 HW cost node. """ - def __init__(self, id: int, preds: list[int], node_name: str = "", type: str | None = None) -> None: + def __init__(self, layer_id: int, predecessor: int | None, node_name: str = "", type: str | None = None) -> None: """ Initialize the DummyNode by setting its id, the node's predecessors and optionally giving it a name. @param id (int): id for this node - @param preds (list): list of ids of this node's predecessor nodes + @param predecessor (list): list of ids of this node's predecessor nodes @param node_name (str, optional): a name for this node, e.g. the node's name within the onnx model """ - self.id = id - self.input_operand_source: InputOperandSource = {LayerOperand("I"): preds} - self.name = node_name + super().__init__(layer_id, node_name) + self.input_operand_source: InputOperandSource = ( + {LayerOperand("I"): predecessor} if predecessor is not None else {} + ) self.type = type - self.core_allocation = -1 # We assume these nodes are mapped on a core with id -1 + # We assume these nodes are mapped on a core with id -1 + self.core_allocation = -1 self.runtime = 0 self.start = None self.end = None @@ -27,13 +30,6 @@ def __init__(self, id: int, preds: list[int], node_name: str = "", type: str | N def __str__(self): return f"DummyNode({self.id})" - def __repr__(self) -> str: - return str(self) - - def __jsonrepr__(self): - """! JSON representation used for saving this object to a json file.""" - return {"id": self.id} - def set_start(self, start: int): """! Set the start time in ccyles of this node @param start : start time in cycles diff --git a/zigzag/workload/LayerAttribute.py b/zigzag/workload/LayerAttribute.py index 8bac4f65..4ff94ed6 100644 --- a/zigzag/workload/LayerAttribute.py +++ b/zigzag/workload/LayerAttribute.py @@ -4,9 +4,7 @@ class LayerAttribute(metaclass=ABCMeta): - """! Abstract Base Class to represent any layer attribute - # TODO make this `user_defined_attribute` - """ + """! Abstract Base Class to represent any layer attribute""" @abstractmethod def __init__(self, data: Any): @@ -33,6 +31,5 @@ def __repr__(self): def __jsonrepr__(self) -> Any: return json_repr_handler(self.data) - @staticmethod - @abstractmethod - def parse_user_input(x: Any) -> "LayerAttribute": ... + def __eq__(self, other: object): + return isinstance(other, LayerAttribute) and self.data == other.data diff --git a/zigzag/workload/LayerNodeABC.py b/zigzag/workload/LayerNodeABC.py new file mode 100644 index 00000000..c0de748d --- /dev/null +++ b/zigzag/workload/LayerNodeABC.py @@ -0,0 +1,22 @@ +from abc import ABCMeta + +from zigzag.workload.layer_attributes import InputOperandSource + + +class LayerNodeABC(metaclass=ABCMeta): + """Represents a single layer of a workload in any form.""" + + def __init__(self, node_id: int, node_name: str): + self.id = node_id + self.name = node_name + self.input_operand_source: InputOperandSource + + def __repr__(self) -> str: + return str(self) + + def __str__(self) -> str: + return self.name + + def __jsonrepr__(self): + """! JSON representation used for saving this object to a json file.""" + return {"id": self.id} diff --git a/zigzag/workload/ONNXWorkload.py b/zigzag/workload/ONNXWorkload.py index 7c57c662..d3246f06 100644 --- a/zigzag/workload/ONNXWorkload.py +++ b/zigzag/workload/ONNXWorkload.py @@ -1,8 +1,7 @@ from typing import Any +from zigzag.workload.LayerNodeABC import LayerNodeABC from zigzag.workload.Workload import Workload -from zigzag.workload.DummyNode import DummyNode -from zigzag.workload.layer_node import LayerNode class ONNXWorkload(Workload): @@ -11,10 +10,10 @@ def __init__(self, **attr: Any): """! Collect all the algorithmic workload information here.""" super().__init__(**attr) - self.node_id_to_obj: dict[int, LayerNode | DummyNode] = {} - self.node_list: list[LayerNode | DummyNode] = [] + self.node_id_to_obj: dict[int, LayerNodeABC] = {} + self.node_list: list[LayerNodeABC] = [] - def add(self, node_id: int, node_obj: LayerNode | DummyNode): + def add(self, node_id: int, node_obj: LayerNodeABC): """! Add a node object to the ONNX workload graph. This can be a different object based on if it's an "accelerateable" node or not. """ @@ -22,10 +21,9 @@ def add(self, node_id: int, node_obj: LayerNode | DummyNode): self.node_id_to_obj[node_id] = node_obj self.add_workload_node(node_obj) - edges: list[tuple[LayerNode | DummyNode, LayerNode | DummyNode]] = [] - for _, parents in node_obj.input_operand_source.items(): - for parent_id in parents: - parent_node_obj = self.node_id_to_obj[parent_id] - edges.append((parent_node_obj, node_obj)) - # node_obj.input_operand_source[op] = parent_node_obj # TODO This feature is not used? + edges: list[tuple[LayerNodeABC, LayerNodeABC]] = [] + for _, parent_id in node_obj.input_operand_source.items(): + # for parent_id in parents: + parent_node_obj = self.node_id_to_obj[parent_id] + edges.append((parent_node_obj, node_obj)) self.add_workload_edges_from(edges) diff --git a/zigzag/workload/Workload.py b/zigzag/workload/Workload.py index bfae517f..9c185b03 100644 --- a/zigzag/workload/Workload.py +++ b/zigzag/workload/Workload.py @@ -3,8 +3,7 @@ from typing import Any, Iterator, Sequence from networkx import DiGraph -from zigzag.workload.DummyNode import DummyNode -from zigzag.workload.layer_node import LayerNode +from zigzag.workload.LayerNodeABC import LayerNodeABC class Workload(DiGraph, metaclass=ABCMeta): @@ -13,17 +12,21 @@ class Workload(DiGraph, metaclass=ABCMeta): def __init__(self, **attr: Any): super().__init__(**attr) # type: ignore - def topological_sort(self) -> Iterator[LayerNode | DummyNode]: + def topological_sort(self) -> Iterator[LayerNodeABC]: return nx.topological_sort(self) # type: ignore - def add_workload_node(self, node: LayerNode | DummyNode) -> None: + def add_workload_node(self, node: LayerNodeABC) -> None: self.add_node(node) # type: ignore - def add_workload_edges_from(self, edges: Sequence[tuple[LayerNode | DummyNode, LayerNode | DummyNode]]) -> None: + def add_workload_edges_from(self, edges: Sequence[tuple[LayerNodeABC, LayerNodeABC]]) -> None: self.add_edges_from(edges) # type: ignore - def get_node_with_id(self, node_id: int) -> LayerNode | DummyNode: - for node in self.nodes: # type: ignore - if node.id == node_id: # type: ignore - return node # type: ignore + def get_node_with_id(self, node_id: int) -> LayerNodeABC: + for node in self.node_iterator: + if node.id == node_id: + return node raise ValueError(f"Node with id {node_id} not found in workload") + + @property + def node_iterator(self) -> Iterator[LayerNodeABC]: + return self.nodes() diff --git a/zigzag/workload/layer_attributes.py b/zigzag/workload/layer_attributes.py index 74844398..b0e475b8 100644 --- a/zigzag/workload/layer_attributes.py +++ b/zigzag/workload/layer_attributes.py @@ -1,9 +1,8 @@ import math import re -from typing import Any, TypeAlias +from typing import TypeAlias -from zigzag.mapping.spatial_mapping import SpatialMapping, SpatialMappingHint from zigzag.workload.LayerAttribute import LayerAttribute from zigzag.datatypes import ( Constants, @@ -17,7 +16,7 @@ UnrollFactorInt, ) -InputOperandSource: TypeAlias = dict[LayerOperand, list[int]] +InputOperandSource: TypeAlias = dict[LayerOperand, int] class LayerEquation(LayerAttribute): @@ -43,26 +42,14 @@ def get_contained_operands(self) -> list[LayerOperand]: def get_r_layer_dims(self, layer_op: LayerOperand) -> list[LayerDim]: """! Return a list with all LayerDims that are `relevant` for the given LayerOperand""" layer_operands = self.get_contained_operands() - assert layer_op in layer_operands + assert layer_op in layer_operands, f"Given LayerOperand {layer_op} is not part of this equation" layer_op_idx = layer_operands.index(layer_op) - slice_indices = self.__get_operand_start_indices() + [len(self.disassembly)] + slice_indices = self.__get_operand_start_indices() + [len(self.disassembly) + 1] disassembly_start_idx = slice_indices[layer_op_idx] + 1 disassembly_end_idx = slice_indices[layer_op_idx + 1] - 1 equation_slice = self.disassembly[disassembly_start_idx:disassembly_end_idx] return [LayerDim(x.upper()) for x in equation_slice] - @staticmethod - def parse_user_input(x: str) -> "LayerEquation": - assert isinstance(x, str) - assert " " not in x, f"Please remove all spaces from `equation` {x}" - x = x.replace("+=", "=") - x = x.replace("++", "+") - x = x.replace("*", " * ") - x = x.replace("=", " = ") - x = x.replace("+", " + ") - - return LayerEquation(x) - class LayerDimSizes(LayerAttribute): """! Contains the size of each computation loop as defined in the workload, @@ -91,13 +78,8 @@ def __setitem__(self, key: LayerDim, value: int): def __delitem__(self, key: LayerDim): del self.data[key] - @staticmethod - def parse_user_input(x: dict[str, UnrollFactor]): - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all([isinstance(k, UnrollFactor) for k in x.values()]) - data = {LayerDim(layer_dim_str): size for layer_dim_str, size in x.items()} - return LayerDimSizes(data) + def __add__(self, other: "LayerDimSizes"): + return LayerDimSizes(self.data | other.data) class LayerOperandPrecision(LayerAttribute): @@ -113,17 +95,6 @@ def final_output_precision(self) -> int: return self.data[Constants.FINAL_OUTPUT_LAYER_OP] return self.data[Constants.OUTPUT_LAYER_OP] - @staticmethod - def parse_user_input(x: dict[str, int]): - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all([isinstance(k, int) for k in x.values()]) - assert ( - Constants.OUTPUT_OPERAND_STR in x or Constants.FINAL_OUTPUT_OPERAND_STR in x - ), "Operand precision does not contain `O` or `O_final` as operand" - data = {LayerOperand(operand_str): size for operand_str, size in x.items()} - return LayerOperandPrecision(data) - class MemoryOperandLinks(LayerAttribute): """! Links LayerOperand to MemoryOperand.""" @@ -164,69 +135,51 @@ def copy(self): def __str__(self): return str({str(k): str(v) for k, v in self.data.items()}) - @staticmethod - def parse_user_input(x: dict[str, str]): - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all([isinstance(k, str) for k in x.values()]) - data = {LayerOperand(layer_op_str): MemoryOperand(mem_op_str) for layer_op_str, mem_op_str in x.items()} - return MemoryOperandLinks(data) - -class LayerDimRelations(LayerAttribute): - """! For the operand dimension that is not directly a loop dimension, a set of specific relation equations between - them (operand dimension and loop dimension) is required, e.g. ['ix=ox+fx-1', 'iy=oy+fy-1'] +class LayerDimRelation(LayerAttribute): + """! For the operand dimension that is not directly a loop dimension, a relation equations between them (operand + dimension) and the loop dimension is required. e.g. `dim1 = coef2*dim2 + coef3*dim3` """ - def __init__(self, data: list[str]): - self.data = data + def __init__(self, dim_1: LayerDim, dim_2: LayerDim, dim_3: LayerDim, coef_2: int, coef_3: int): + self.dim_1 = dim_1 + self.dim_2 = dim_2 + self.dim_3 = dim_3 + self.coef_2 = coef_2 + self.coef_3 = coef_3 + self.data = f"{dim_1} = {coef_2}*{dim_2} + {coef_3}*{dim_3}" - def extract_pr_loop_info(self) -> tuple[PrLoop, LoopList, PrScalingFactors]: + @staticmethod + def extract_pr_loop_info(relations: list["LayerDimRelation"]) -> tuple[PrLoop, LoopList, PrScalingFactors]: """! # TODO requires cleanup and documentation """ pr_loop: PrLoop = {} pr_loop_list: LoopList = [] pr_scaling_factors: PrScalingFactors = {} - # Regex pattern to find dimensions and coefficients of form dim1 = coef_2*dim2 + coef_3*dim3 - pattern = r"(\w+)\s*=\s*(?:(\w+)\s*\*\s*)?(\w+)\s*\+\s*(?:(\w+)\s*\*\s*)?(\w+)" - for relation in self.data: - match = re.search(pattern, relation) - if match: - dim1, coef_2, dim2, coef_3, dim3 = match.groups() - dim1, dim2, dim3 = LayerDim(dim1), LayerDim(dim2), LayerDim(dim3) - coef_2 = int(coef_2) if coef_2 is not None else 1 - coef_3 = int(coef_3) if coef_3 is not None else 1 - else: - raise ValueError(f"Please make sure {relation} is of the form 'dim1 = a*dim2 + b*dim3'") - - key = dim1 - val = [dim2, dim3] + + for relation in relations: + key = relation.dim_1 + val = [relation.dim_2, relation.dim_3] pr_loop[key] = val pr_loop_list.extend([key] + val) - scaling_factors = {dim2: coef_2, dim3: coef_3} + scaling_factors = {relation.dim_2: relation.coef_2, relation.dim_3: relation.coef_3} pr_scaling_factors[key] = scaling_factors return pr_loop, pr_loop_list, pr_scaling_factors - @staticmethod - def parse_user_input(x: list[str]): - assert isinstance(x, list) - assert all([isinstance(elem, str) for elem in x]) - return LayerDimRelations(x) - class LayerTemporalOrdering(LayerAttribute): + """ + # TODO is this ever used? + """ + def __init__(self, data: dict[LayerOperand, UnrollFactorInt]): self.data = data @staticmethod - def parse_user_input(x: dict[str, int]): - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all([isinstance(v, int) for v in x.values()]) - data = {LayerOperand(layer_op_str): factor for layer_op_str, factor in x.items()} - return LayerTemporalOrdering(data) + def empty(): + return LayerTemporalOrdering({}) def __delitem__(self, x: LayerOperand): del self.data[x] @@ -242,136 +195,5 @@ def __getitem__(self, key: LayerDim) -> tuple[int, int]: return self.data[key] if key in self.data else LayerPadding.DEFAULT @staticmethod - def parse_user_input(x: dict[str, tuple[int, int]]): - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all( - [isinstance(v, tuple) and len(v) == 2 and all([isinstance(elem, int) for elem in v]) for v in x.values()] - ) - data = {LayerDim(layer_op_str): value for layer_op_str, value in x.items()} - return LayerPadding(data) - - -class LayerConstantOperands(LayerAttribute): - # TODO maybe this class is excessive and should just be list[LayerOperand] or empty list - def __init__(self, data: list[LayerOperand]): - self.data = data - - @staticmethod - def parse_user_input(x: list[str]): - # TODO should this check wether the list is empty? - assert isinstance(x, list) - assert all([isinstance(elem, str) for elem in x]) - data = [LayerOperand(layer_op_str) for layer_op_str in x] - return LayerConstantOperands(data) - - -class LayerAttributes: - """! Represents the layer attributes as given by the user and contains methods to parse each attribute. - Rationale: only this class contains the (hard-coded) layer attribute strings from the user input format. - """ - - def __init__(self, data: dict[str, Any]): - self.data = data - - def parse_equation(self) -> LayerEquation: - key = "equation" - assert key in self, f"Workload does not contain `{key}` definition" - return LayerEquation.parse_user_input(self.data[key]) - - def parse_layer_dim_sizes(self) -> LayerDimSizes: - key = "loop_dim_size" - assert key in self, f"Workload does not contain `{key}` definition" - return LayerDimSizes.parse_user_input(self.data[key]) - - def parse_pr_layer_dim_sizes(self) -> LayerDimSizes | None: - key = "pr_loop_dim_size" - # Fail soft - if key not in self: - return None - return LayerDimSizes.parse_user_input(self.data[key]) - - def parse_operand_precision(self) -> LayerOperandPrecision: - key = "operand_precision" - assert key in self, f"Workload does not contain `{key}` definition" - return LayerOperandPrecision.parse_user_input(self.data[key]) - - def parse_operand_source(self) -> InputOperandSource: - key: str = "operand_source" - assert key in self, f"Workload does not contain `{key}` definition" - x: dict[str, list[int]] = self.data[key] - assert isinstance(x, dict) - assert all([isinstance(k, str) for k in x.keys()]) - assert all([isinstance(v, list) for v in x.values()]) - assert all([all([isinstance(elem, int) for elem in v]) for v in x.values()]) - return {LayerOperand(k): [elem for elem in v] for k, v in x.items()} - - def parse_layer_dim_relations(self) -> LayerDimRelations | None: - key = "dimension_relations" - # Fail soft - if key not in self: - return None - return LayerDimRelations.parse_user_input(self.data[key]) - - def parse_spatial_mapping(self) -> SpatialMapping: - key = "spatial_mapping" - assert key in self, f"Workload does not contain `{key}` definition" - return SpatialMapping.parse_user_input(self.data[key]) - - def parse_spatial_mapping_hint(self) -> SpatialMappingHint: - key = "spatial_mapping_hint" - # Fail soft - if key not in self: - return SpatialMappingHint.empty() - return SpatialMappingHint.parse_user_input(self.data[key]) - - def parse_core_allocation(self) -> int: - key = "core_allocation" - assert key in self, f"Workload does not contain `{key}` definition" - value = self.data[key] - assert isinstance(value, int) - return value - - def parse_mem_operand_links(self) -> MemoryOperandLinks: - key = "memory_operand_links" - assert key in self, f"Workload does not contain `{key}` definition" - return MemoryOperandLinks.parse_user_input(self.data[key]) - - def parse_temporal_ordering(self) -> LayerTemporalOrdering | None: - key = "temporal_ordering" - if key not in self: - return None - return LayerTemporalOrdering.parse_user_input(self.data[key]) - - def parse_padding(self) -> LayerPadding | None: - key = "padding" - # Fail soft - if key not in self: - return None - return LayerPadding.parse_user_input(self.data[key]) - - def parse_constant_operands(self) -> list[LayerOperand]: - key = "constant_operands" - # Fail soft - if key not in self: - return list() - x = self.data[key] - x: list[str] - assert isinstance(x, list) - assert all([isinstance(elem, str) for elem in x]) - return [LayerOperand(layer_op_str) for layer_op_str in x] - - def parse_operator_type(self) -> str | None: - key = "operator_type" - if key not in self: - return None - return self.data[key] - - def __contains__(self, x: str): - return x in self.data - - @staticmethod - def parse_user_input(x: dict[str, Any]) -> "LayerAttributes": - assert isinstance(x, dict) - assert all([isinstance(elem, str) for elem in x]) - return LayerAttributes(x) + def empty(): + return LayerPadding({}) diff --git a/zigzag/workload/layer_node.py b/zigzag/workload/layer_node.py index f1b0cb10..cb053a80 100644 --- a/zigzag/workload/layer_node.py +++ b/zigzag/workload/layer_node.py @@ -1,4 +1,5 @@ from copy import deepcopy +from dataclasses import dataclass from math import gcd import logging as _logging import math @@ -13,10 +14,10 @@ UnrollFactor, ) from zigzag.mapping.spatial_mapping import SpatialMapping, SpatialMappingHint +from zigzag.workload.LayerNodeABC import LayerNodeABC from zigzag.workload.layer_attributes import ( InputOperandSource, - LayerAttributes, - LayerDimRelations, + LayerDimRelation, LayerDimSizes, LayerEquation, LayerOperandPrecision, @@ -95,12 +96,28 @@ def extract_relevancy_info( return self -class LayerNode: +@dataclass +class LayerNodeAttributes: + layer_type: str + equation: LayerEquation + layer_dim_sizes: LayerDimSizes + operand_precision: LayerOperandPrecision + dimension_relations: list[LayerDimRelation] + spatial_mapping: SpatialMapping + spatial_mapping_hint: SpatialMappingHint + core_allocation: list[int] + memory_operand_links: MemoryOperandLinks + temporal_ordering: LayerTemporalOrdering + padding: LayerPadding + constant_operands: list[LayerOperand] + input_operand_source: InputOperandSource + pr_layer_dim_sizes: LayerDimSizes | None + + +class LayerNode(LayerNodeABC): """! Represents a single layer in a workload.""" - def __init__( - self, layer_id: int, layer_attrs: LayerAttributes, node_name: str | None = None, layer_type: str | None = None - ): + def __init__(self, layer_id: int, node_name: str, node_attr: LayerNodeAttributes): """ To construct each layer node, algorithm equation/dimension/indirect relation are parsed. This parser collects information of operand, loop dimension, and loop relevance. @@ -110,25 +127,23 @@ def __init__( # TODO clean up this method. Too many lines for a clean init method. """ - self.id = layer_id - self.layer_attrs = layer_attrs - self.name = node_name - self.type: str | None = layer_type - - # Parsed attributes - self.equation: LayerEquation = layer_attrs.parse_equation() - self.layer_dim_sizes: LayerDimSizes = layer_attrs.parse_layer_dim_sizes() - self.operand_precision: LayerOperandPrecision = layer_attrs.parse_operand_precision() - self.dimension_relations: LayerDimRelations | None = layer_attrs.parse_layer_dim_relations() - self.user_spatial_mapping: SpatialMapping = layer_attrs.parse_spatial_mapping() - self.user_spatial_mapping_hint: SpatialMappingHint = layer_attrs.parse_spatial_mapping_hint() - self.core_allocation: int = layer_attrs.parse_core_allocation() - self.memory_operand_links: MemoryOperandLinks = layer_attrs.parse_mem_operand_links() - self.user_temporal_ordering: LayerTemporalOrdering | None = layer_attrs.parse_temporal_ordering() - self.padding: LayerPadding | None = layer_attrs.parse_padding() - self.constant_operands: list[LayerOperand] = layer_attrs.parse_constant_operands() - pr_layer_dim_sizes: LayerDimSizes | None = layer_attrs.parse_pr_layer_dim_sizes() - self.input_operand_source: InputOperandSource = layer_attrs.parse_operand_source() + LayerNodeABC.__init__(self, node_id=layer_id, node_name=node_name) + + # Unpack attributes + self.type = node_attr.layer_type + self.equation = node_attr.equation + self.layer_dim_sizes = node_attr.layer_dim_sizes + self.operand_precision = node_attr.operand_precision + self.dimension_relations = node_attr.dimension_relations + self.spatial_mapping = node_attr.spatial_mapping + self.spatial_mapping_hint = node_attr.spatial_mapping_hint + self.core_allocation = node_attr.core_allocation + self.memory_operand_links = node_attr.memory_operand_links + self.temporal_ordering = node_attr.temporal_ordering + self.padding = node_attr.padding + self.constant_operands = node_attr.constant_operands + self.input_operand_source = node_attr.input_operand_source + pr_layer_dim_sizes = node_attr.pr_layer_dim_sizes # Derived attributes self.layer_operands = self.equation.get_contained_operands() @@ -155,18 +170,15 @@ def build_pr_funcs(self) -> tuple[PrLoop, LoopList, PrScalingFactors]: """! # TODO requires documentation """ - if self.dimension_relations is not None and len(self.dimension_relations) > 0: - pr_loop, pr_loop_list, pr_scaling_factors = self.dimension_relations.extract_pr_loop_info() + if len(self.dimension_relations) > 0: + pr_loop, pr_loop_list, pr_scaling_factors = LayerDimRelation.extract_pr_loop_info(self.dimension_relations) else: pr_loop, pr_loop_list, pr_scaling_factors = {}, [], {} return pr_loop, pr_loop_list, pr_scaling_factors def __str__(self): - return f"LayerNode_{self.name}" - - def __repr__(self): - return str(self) + return self.name def __jsonrepr__(self): """! JSON representation used for saving this object to a json file.""" @@ -177,7 +189,7 @@ def __jsonrepr__(self): "loop_dimensions": self.layer_dim_sizes, "operand_precision": self.operand_precision, "core_allocation": self.core_allocation, - "user_spatial_mapping": self.user_spatial_mapping, + "user_spatial_mapping": self.spatial_mapping, "memory_operand_links": self.memory_operand_links, # "source_storage_level": self.source_storage_level, # NOTE not used? } @@ -204,9 +216,10 @@ def calc_tensor_dim(self, dim: LayerDim, layer_dim_sizes: LayerDimSizes): pr_dim_size = min(self.pr_layer_dim_sizes[dim], pr_dim_size) return pr_dim_size elif dim in self.layer_dim_sizes: - assert ( - self.layer_dim_sizes[dim] == 1 - ), "This line should only be reached when the dim has a size of 1 in the layer." + # This case is possible when the `layer_dim_sizes` is used to scope which LayerDims should be accounted for + # assert ( + # self.layer_dim_sizes[dim] == 1 + # ), "This line should only be reached when the dim has a size of 1 in the layer." return 1 else: raise ValueError("Something went wrong in the initialization of the layer, or in the caller function.") @@ -237,7 +250,7 @@ def calc_pr_dimension_size_total(self, dim: LayerDim) -> int: total_pr_dim_size = self.calc_pr_dimension_size(*args) # Partially relevant loop dimensions can also have padding, so get the padding for this pr dimension and # subtract - padding = LayerPadding.DEFAULT if self.padding is None else self.padding[dim] + padding = LayerPadding.DEFAULT if dim not in self.padding else self.padding[dim] total_pr_dim_size_without_padding = int(total_pr_dim_size - sum(padding)) return total_pr_dim_size_without_padding @@ -278,3 +291,25 @@ def extract_layer_info(self): def get_operand_irrelevant_layer_dims(self, layer_op: LayerOperand) -> list[LayerDim]: """! Return the irrelevant dimensions of layer operand 'layer_op'.""" return self.loop_relevancy_info.get_ir_layer_dims(layer_op) + + def extract_node_attr(self) -> LayerNodeAttributes: + """Pack this layer node's attributes in a LayerNodeAttributes instance. Useful for instantiating new layer nodes + (used in Stream)""" + attributes = LayerNodeAttributes( + layer_type=self.type, + equation=self.equation, + layer_dim_sizes=self.layer_dim_sizes, + operand_precision=self.operand_precision, + dimension_relations=self.dimension_relations, + spatial_mapping=self.spatial_mapping, + spatial_mapping_hint=self.spatial_mapping_hint, + core_allocation=self.core_allocation, + memory_operand_links=self.memory_operand_links, + temporal_ordering=self.temporal_ordering, + padding=self.padding, + constant_operands=self.constant_operands, + input_operand_source=self.input_operand_source, + pr_layer_dim_sizes=self.pr_layer_dim_sizes, + ) + # Make sure the new attributes don't simply point to the old instances + return deepcopy(attributes)