diff --git a/lab3/inputs/hardware/accelerator1.yaml b/lab3/inputs/hardware/accelerator1.yaml new file mode 100644 index 00000000..ea1fffca --- /dev/null +++ b/lab3/inputs/hardware/accelerator1.yaml @@ -0,0 +1,125 @@ +name: accelerator1 + +multipliers: + input_precision: [8, 8] + multiplier_energy: 1 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2] + sizes: [32, 32] + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] # Fully unrolled over all multipliers + + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D2] # One RF per column + + rf_4B: + size: 32 + r_bw: 32 + w_bw: 32 + r_cost: 0.021 # TODO + w_cost: 0.021 # TODO + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D1] # One RF per row + + sram_16KB_I2: + size: 131072 + r_bw: 128 + w_bw: 128 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2] + + sram_64KB_I1_O: + size: 524288 + r_bw: 512 + w_bw: 512 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 2 + latency: 1 + operands: [I1, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_2 + th: rw_port_2 + served_dimensions: [D1, D2] + + dram: + size: 4294967296 + r_bw: 64 + w_bw: 64 + r_cost: 700 # TODO + w_cost: 750 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2] diff --git a/lab3/inputs/hardware/accelerator2.yaml b/lab3/inputs/hardware/accelerator2.yaml new file mode 100644 index 00000000..57e8dec7 --- /dev/null +++ b/lab3/inputs/hardware/accelerator2.yaml @@ -0,0 +1,125 @@ +name: accelerator2 + +multipliers: + input_precision: [8, 8] + multiplier_energy: 1 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2] + sizes: [32, 32] + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1] # One per column + + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D2] # One per row + + rf_4B: + size: 32 + r_bw: 32 + w_bw: 32 + r_cost: 0.021 # TODO + w_cost: 0.021 # TODO + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [] # One per PE + + sram_16KB_I2: + size: 131072 + r_bw: 128 + w_bw: 128 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2] + + sram_64KB_I1_O: + size: 524288 + r_bw: 512 + w_bw: 512 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 2 + latency: 1 + operands: [I1, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_2 + th: rw_port_2 + served_dimensions: [D1, D2] + + dram: + size: 4294967296 + r_bw: 64 + w_bw: 64 + r_cost: 700 # TODO + w_cost: 750 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2] diff --git a/lab3/inputs/hardware/accelerator3.yaml b/lab3/inputs/hardware/accelerator3.yaml new file mode 100644 index 00000000..20918664 --- /dev/null +++ b/lab3/inputs/hardware/accelerator3.yaml @@ -0,0 +1,125 @@ +name: accelerator3 + +multipliers: + input_precision: [8, 8] + multiplier_energy: 1 # pJ + multiplier_area: 1 # unit + dimensions: [D1, D2, D3] + sizes: [64, 4, 4] + +memories: + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] # One per PE + + rf_1B: + size: 8 + r_bw: 8 + w_bw: 8 + r_cost: 0.095 # TODO + w_cost: 0.095 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + auto_cost_extraction: False + operands: [I1] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [] # One per PE + + rf_4B: + size: 32 + r_bw: 32 + w_bw: 32 + r_cost: 0.021 # TODO + w_cost: 0.021 # TODO + area: 0 + r_port: 2 + w_port: 2 + rw_port: 0 + latency: 1 + operands: [O] + ports: + - fh: w_port_1 + tl: r_port_1 + fl: w_port_2 + th: r_port_2 + served_dimensions: [D2, D3] # One per PE + + sram_16KB_I2: + size: 131072 + r_bw: 128 + w_bw: 128 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 1 + w_port: 1 + rw_port: 0 + latency: 1 + operands: [I2] + ports: + - fh: w_port_1 + tl: r_port_1 + served_dimensions: [D1, D2, D3] + + sram_64KB_I1_O: + size: 524288 + r_bw: 512 + w_bw: 512 + r_cost: 416.16 # TODO + w_cost: 378.4 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 2 + latency: 1 + operands: [I1, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_2 + th: rw_port_2 + served_dimensions: [D1, D2, D3] + + dram: + size: 4294967296 + r_bw: 64 + w_bw: 64 + r_cost: 700 # TODO + w_cost: 750 # TODO + area: 0 + r_port: 0 + w_port: 0 + rw_port: 1 + latency: 1 + operands: [I1, I2, O] + ports: + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + - fh: rw_port_1 + tl: rw_port_1 + fl: rw_port_1 + th: rw_port_1 + served_dimensions: [D1, D2, D3] diff --git a/lab3/inputs/hardware/c_k.py b/lab3/inputs/hardware/c_k.py deleted file mode 100644 index aff05a22..00000000 --- a/lab3/inputs/hardware/c_k.py +++ /dev/null @@ -1,222 +0,0 @@ -from zigzag.classes.hardware.architecture.accelerator import Accelerator -from zigzag.classes.hardware.architecture.operational_unit import Multiplier -from zigzag.classes.hardware.architecture.operational_array import MultiplierArray -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_hierarchy import MemoryHierarchy -from zigzag.classes.hardware.architecture.core import Core - - -def get_multiplier_array(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 1.0 - multiplier_area = 0 - dimensions = {"D1": 32, "D2": 32} - - multiplier = Multiplier( - multiplier_input_precision, multiplier_energy, multiplier_area - ) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def get_memory_hierarchy(multiplier_array): - - """Memory hierarchy variables""" - """ size=#bit, bw=#bit""" - # Defintion of register file for inputs and weights - rf_1B = MemoryInstance( - name="rf_1B", - mem_type="rf", - size=1 * 8, - r_bw=1 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of register file for outputs - rf_2B = MemoryInstance( - name="rf_4B", - mem_type="rf", - size=4 * 8, - r_bw=4 * 8, - r_port=2, - w_port=2, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l1_w = MemoryInstance( - name="l1_w", - mem_type="sram", - size=16384 * 8, - r_bw=16 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l1_io = MemoryInstance( - name="l1_io", - mem_type="sram", - size=65536 * 8, - r_bw=64 * 8, - r_port=0, - w_port=0, - rw_port=2, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l2_w = MemoryInstance( - name="l2_w", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=32 * 8, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l2_io = MemoryInstance( - name="l2_io", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=32 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - dram = MemoryInstance( - name="dram", - mem_type="dram", - size=1073741824 * 8, - r_bw=8 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high = - fl: from low = wr_in_by_low - th: to high = rd_out_to_high = - tl: to low = rd_out_to_low = - """ - # Register file for weight - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions=set(), - ) - # Register file for input - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions={(0, 1)}, - ) - # Register file for output - memory_hierarchy_graph.add_memory( - memory_instance=rf_2B, - operands=("O",), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"}, - ), - served_dimensions={(1, 0)}, - ) - # First SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l1_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - - # First SRAM for inputs and outputs - memory_hierarchy_graph.add_memory( - memory_instance=l1_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - # Second SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l2_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - # Second SRAM for inputs and output - memory_hierarchy_graph.add_memory( - memory_instance=l2_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_2", - "fl": "rw_port_1", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - return memory_hierarchy_graph - - -def get_dataflows(): - return [ - {"D1": ("C", 32), "D2": ("K", 32)}, - {"D1": ("G", 32)}, - ] - - -def get_core(id): - operational_array = get_multiplier_array() - memory_hierarchy = get_memory_hierarchy(operational_array) - core = Core(id, operational_array, memory_hierarchy) - return core - - -cores = {get_core(id=0)} -name = "accelerator-c-k" -accelerator = Accelerator(name, cores) diff --git a/lab3/inputs/hardware/ox_fx_fy.py b/lab3/inputs/hardware/ox_fx_fy.py deleted file mode 100644 index 47ee6b82..00000000 --- a/lab3/inputs/hardware/ox_fx_fy.py +++ /dev/null @@ -1,216 +0,0 @@ -from zigzag.classes.hardware.architecture.accelerator import Accelerator -from zigzag.classes.hardware.architecture.operational_unit import Multiplier -from zigzag.classes.hardware.architecture.operational_array import MultiplierArray -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_hierarchy import MemoryHierarchy -from zigzag.classes.hardware.architecture.core import Core - - -def get_multiplier_array(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 1.0 - multiplier_area = 0 - - dimensions = {"D1": 64, "D2": 4, "D3": 4} - - multiplier = Multiplier( - multiplier_input_precision, multiplier_energy, multiplier_area - ) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def get_memory_hierarchy(multiplier_array): - - """Memory hierarchy variables""" - """ size=#bit, bw=#bit""" - # Defintion of register file for inputs and weights - rf_1B = MemoryInstance( - name="rf_1B", - mem_type="rf", - size=1 * 8, - r_bw=1 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of rRegister file for outputs - rf_2B = MemoryInstance( - name="rf_4B", - mem_type="rf", - size=4 * 8, - r_bw=4 * 8, - r_port=2, - w_port=2, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l1_w = MemoryInstance( - name="l1_w", - mem_type="sram", - size=16384 * 8, - r_bw=16 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l1_io = MemoryInstance( - name="l1_io", - mem_type="sram", - size=65536 * 8, - r_bw=64 * 8, - r_port=0, - w_port=0, - rw_port=2, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l2_w = MemoryInstance( - name="l2_w", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=32 * 8, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l2_io = MemoryInstance( - name="l2_io", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=32 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - dram = MemoryInstance( - name="dram", - mem_type="dram", - size=1073741824 * 8, - r_bw=8 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high = - fl: from low = wr_in_by_low - th: to high = rd_out_to_high = - tl: to low = rd_out_to_low = - """ - # Register file for input - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions=set(), - ) - # Register file for weight - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions=set(), - ) - # Register file for output - memory_hierarchy_graph.add_memory( - memory_instance=rf_2B, - operands=("O",), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"}, - ), - served_dimensions={(0, 1, 0), (0, 0, 1)}, - ) - # First SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l1_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - - # First SRAM for inputs and outputs - memory_hierarchy_graph.add_memory( - memory_instance=l1_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - # Second SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l2_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - # Second SRAM for inputs and output - memory_hierarchy_graph.add_memory( - memory_instance=l2_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_2", - "fl": "rw_port_1", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - return memory_hierarchy_graph - - -def get_core(id, quad_core=False): - operational_array = get_multiplier_array() - memory_hierarchy = get_memory_hierarchy(operational_array) - core = Core(id, operational_array, memory_hierarchy) - return core - - -cores = {get_core(id=0)} -name = "accelerator-ox-fx-fy" -accelerator = Accelerator(name, cores) diff --git a/lab3/inputs/hardware/ox_k.py b/lab3/inputs/hardware/ox_k.py deleted file mode 100644 index b5ac6ab1..00000000 --- a/lab3/inputs/hardware/ox_k.py +++ /dev/null @@ -1,215 +0,0 @@ -from zigzag.classes.hardware.architecture.accelerator import Accelerator -from zigzag.classes.hardware.architecture.operational_unit import Multiplier -from zigzag.classes.hardware.architecture.operational_array import MultiplierArray -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_instance import MemoryInstance -from zigzag.classes.hardware.architecture.memory_hierarchy import MemoryHierarchy -from zigzag.classes.hardware.architecture.core import Core - - -def get_multiplier_array(): - """Multiplier array variables""" - multiplier_input_precision = [8, 8] - multiplier_energy = 1.0 - multiplier_area = 0 - dimensions = {"D1": 32, "D2": 32} - - multiplier = Multiplier( - multiplier_input_precision, multiplier_energy, multiplier_area - ) - multiplier_array = MultiplierArray(multiplier, dimensions) - - return multiplier_array - - -def get_memory_hierarchy(multiplier_array): - - """Memory hierarchy variables""" - """ size=#bit, bw=#bit""" - # Defintion of register file for inputs and weights - rf_1B = MemoryInstance( - name="rf_1B", - mem_type="rf", - size=1 * 8, - r_bw=1 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of rRegister file for outputs - rf_2B = MemoryInstance( - name="rf_4B", - mem_type="rf", - size=4 * 8, - r_bw=4 * 8, - r_port=2, - w_port=2, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l1_w = MemoryInstance( - name="l1_w", - mem_type="sram", - size=16384 * 8, - r_bw=16 * 8, - r_port=1, - w_port=1, - rw_port=0, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l1_io = MemoryInstance( - name="l1_io", - mem_type="sram", - size=65536 * 8, - r_bw=64 * 8, - r_port=0, - w_port=0, - rw_port=2, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for weights - l2_w = MemoryInstance( - name="l2_w", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=32 * 8, - r_port=1, - w_port=1, - rw_port=0, - latency=1, - auto_cost_extraction=True, - ) - # Defintion of first SRAM for inputs and outputs - l2_io = MemoryInstance( - name="l2_io", - mem_type="sram", - size=1048576 * 8, # 1 MB - r_bw=16 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - dram = MemoryInstance( - name="dram", - mem_type="dram", - size=1073741824 * 8, - r_bw=8 * 8, - r_port=0, - w_port=0, - rw_port=2, - latency=1, - auto_cost_extraction=True, - ) - - memory_hierarchy_graph = MemoryHierarchy(operational_array=multiplier_array) - - """ - fh: from high = wr_in_by_high = - fl: from low = wr_in_by_low - th: to high = rd_out_to_high = - tl: to low = rd_out_to_low = - """ - # Register file for input - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I1",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions={(0, 1)}, - ) - # Register file for weight - memory_hierarchy_graph.add_memory( - memory_instance=rf_1B, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions={(1, 0)}, - ) - # Register file for output - memory_hierarchy_graph.add_memory( - memory_instance=rf_2B, - operands=("O",), - port_alloc=( - {"fh": "w_port_1", "tl": "r_port_1", "fl": "w_port_2", "th": "r_port_2"}, - ), - served_dimensions=set(), - ) - # First SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l1_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - - # First SRAM for inputs and outputs - memory_hierarchy_graph.add_memory( - memory_instance=l1_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - # Second SRAM for weights - memory_hierarchy_graph.add_memory( - memory_instance=l2_w, - operands=("I2",), - port_alloc=({"fh": "w_port_1", "tl": "r_port_1", "fl": None, "th": None},), - served_dimensions="all", - ) - # Second SRAM for inputs and output - memory_hierarchy_graph.add_memory( - memory_instance=l2_io, - operands=("I1", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_1", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_1", - "fl": "rw_port_2", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - memory_hierarchy_graph.add_memory( - memory_instance=dram, - operands=("I1", "I2", "O"), - port_alloc=( - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - {"fh": "rw_port_1", "tl": "rw_port_2", "fl": None, "th": None}, - { - "fh": "rw_port_1", - "tl": "rw_port_2", - "fl": "rw_port_1", - "th": "rw_port_2", - }, - ), - served_dimensions="all", - ) - - return memory_hierarchy_graph - - -def get_core(id): - operational_array = get_multiplier_array() - memory_hierarchy = get_memory_hierarchy(operational_array) - core = Core(id, operational_array, memory_hierarchy) - return core - - -cores = {get_core(id=0)} -name = "accelerator-ox-k" -accelerator = Accelerator(name, cores) diff --git a/lab3/inputs/mapping/accelerator1.yaml b/lab3/inputs/mapping/accelerator1.yaml new file mode 100644 index 00000000..2c00cc40 --- /dev/null +++ b/lab3/inputs/mapping/accelerator1.yaml @@ -0,0 +1,11 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - C, 32 + D2: + - K, 32 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/lab3/inputs/mapping/accelerator2.yaml b/lab3/inputs/mapping/accelerator2.yaml new file mode 100644 index 00000000..e2c00c4e --- /dev/null +++ b/lab3/inputs/mapping/accelerator2.yaml @@ -0,0 +1,11 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - OX, 32 + D2: + - K, 32 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/lab3/inputs/mapping/accelerator3.yaml b/lab3/inputs/mapping/accelerator3.yaml new file mode 100644 index 00000000..c2d4b272 --- /dev/null +++ b/lab3/inputs/mapping/accelerator3.yaml @@ -0,0 +1,13 @@ +- name: default + core_allocation: [1] + spatial_mapping: + D1: + - OX, 64 + D2: + - FX, 4 + D3: + - FY, 4 + memory_operand_links: + O: O + W: I2 + I: I1 diff --git a/lab3/inputs/mapping/mapping_c_k.py b/lab3/inputs/mapping/mapping_c_k.py deleted file mode 100644 index 0b6b65ae..00000000 --- a/lab3/inputs/mapping/mapping_c_k.py +++ /dev/null @@ -1,7 +0,0 @@ -mapping = { - "default": { - "core_allocation": 0, - "spatial_mapping": {"D1": ("C", 32), "D2": ("K", 32)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/lab3/inputs/mapping/mapping_ox_fx_fy.py b/lab3/inputs/mapping/mapping_ox_fx_fy.py deleted file mode 100644 index e3b73fb0..00000000 --- a/lab3/inputs/mapping/mapping_ox_fx_fy.py +++ /dev/null @@ -1,7 +0,0 @@ -mapping = { - "default": { - "core_allocation": 0, - "spatial_mapping": {"D1": ("OX", 64), "D2": ("FX", 4), "D3": ("FY", 4)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/lab3/inputs/mapping/mapping_ox_k.py b/lab3/inputs/mapping/mapping_ox_k.py deleted file mode 100644 index 35f8e435..00000000 --- a/lab3/inputs/mapping/mapping_ox_k.py +++ /dev/null @@ -1,7 +0,0 @@ -mapping = { - "default": { - "core_allocation": 0, - "spatial_mapping": {"D1": ("OX", 32), "D2": ("K", 32)}, - "memory_operand_links": {"O": "O", "W": "I2", "I": "I1"}, - }, -} diff --git a/lab3/main.py b/lab3/main.py index f4eb5726..ea2086fd 100644 --- a/lab3/main.py +++ b/lab3/main.py @@ -6,39 +6,40 @@ from zigzag.api import get_hardware_performance_zigzag from zigzag.visualization.results.plot_cme import bar_plot_cost_model_evaluations_total -from inputs.hardware.c_k import accelerator as accelerator_c_k -from inputs.hardware.ox_k import accelerator as accelerator_ox_k -from inputs.hardware.ox_fx_fy import accelerator as accelerator_ox_fx_fy -from inputs.mapping.mapping_c_k import mapping as mapping_c_k -from inputs.mapping.mapping_ox_k import mapping as mapping_ox_k -from inputs.mapping.mapping_ox_fx_fy import mapping as mapping_ox_fx_fy - # Path to the workload onnx model # onnx_model_path = "zigzag/inputs/examples/workload/resnet18.onnx" onnx_model_path = "lab1/resnet18_first_layer.onnx" -# List of hardware architectures we run our experiment for -hardwares = [accelerator_c_k, accelerator_ox_k, accelerator_ox_fx_fy] -# List of mappings for each hardware (encodes the spatial dataflow) -mappings = [mapping_c_k, mapping_ox_k, mapping_ox_fx_fy] +# List of accelerators architectures we run our experiment for +hardwares = [ + "lab3/inputs/hardware/accelerator1.yaml", + "lab3/inputs/hardware/accelerator2.yaml", + "lab3/inputs/hardware/accelerator3.yaml", +] +# List of mappings for each accelerator +mappings = [ + "lab3/inputs/mapping/accelerator1.yaml", + "lab3/inputs/mapping/accelerator2.yaml", + "lab3/inputs/mapping/accelerator3.yaml", +] cmes = [] -for (hardware, mapping) in zip(hardwares, mappings): +for i, (hardware, mapping) in enumerate(zip(hardwares, mappings)): # Pickle filename to save list of cmes - pickle_filename = "lab3/outputs/list_of_cmes.pickle" + pickle_filename = f"lab3/outputs/list_of_cmes_{i}.pickle" # Call the zigzag api, using a provided accelerator and mapping energy, latency, results = get_hardware_performance_zigzag( onnx_model_path, hardware, mapping, opt="latency", - dump_filename_pattern=f"lab3/outputs/{hardware.name}.json", + dump_filename_pattern=f"lab3/outputs/accelerator{i}.json", pickle_filename=pickle_filename, ) cmes.append(results[0][0]) -x_labels = [hardware.name for hardware in hardwares] +x_labels = [f"accelerator{i}" for i in range(len(hardwares))] bar_plot_cost_model_evaluations_total( cmes, labels=x_labels,