Merge remote-tracking branch 'origin/master'

KULeuven-MICAS · Feb 21, 2024 · 13f48a7 · 13f48a7
2 parents a94ab34 + 9032261
commit 13f48a7
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 25 deletions.
diff --git a/zigzag/api.py b/zigzag/api.py
@@ -1,4 +1,6 @@
 from zigzag.classes.stages import *
+from zigzag.classes.cost_model.cost_model import CostModelEvaluation
+from typing import Type
 import re
 from onnx import ModelProto
 
@@ -9,6 +11,8 @@ def get_hardware_performance_zigzag(
     opt="latency",
     dump_filename_pattern="outputs/{datetime}.json",
     pickle_filename="outputs/list_of_cmes.pickle",
+    lpf_limit: int = 6,
+    cost_model_class: Type = CostModelEvaluation,
 ):
     # Initialize the logger
     import logging as _logging
@@ -61,12 +65,13 @@ def get_hardware_performance_zigzag(
         mapping=mapping,  # required by workload_parser_stage
         dump_filename_pattern=dump_filename_pattern,  # output file save pattern
         pickle_filename=pickle_filename,  # filename for pickled list of cmes
-        loma_lpf_limit=6,  # required by LomaStage
+        loma_lpf_limit=lpf_limit,  # required by LomaStage
         loma_show_progress_bar=True,
         # If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
         # take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
         # By default, if the parameter is not defined, it will be set as False internally.
         access_same_data_considered_as_no_access=True,
+        cost_model_class=cost_model_class,
     )
 
     # Launch the MainStage

diff --git a/zigzag/classes/hardware/architecture/memory_instance.py b/zigzag/classes/hardware/architecture/memory_instance.py
@@ -18,6 +18,7 @@ class MemoryInstance:
     # @param min_w_granularity (int): The minimal number of bits that can be written in a clock cycle (can be less than w_bw)
     # @param mem_type (str): The type of memory. Used for CACTI cost extraction.
     # @param auto_cost_extraction (bool): Automatically extract the read cost, write cost and area using CACTI.
+    # @param double_buffering_support (bool): Support for double buffering on this memory instance.
     def __init__(
         self,
         name: str,
@@ -35,6 +36,7 @@ def __init__(
         min_w_granularity=None,
         mem_type: str = "sram",
         auto_cost_extraction: bool = False,
+        double_buffering_support: bool = False,
     ):
         if auto_cost_extraction:
             # Size must be a multiple of 8 when using CACTI
@@ -74,6 +76,7 @@ def __init__(
         self.w_port = w_port
         self.rw_port = rw_port
         self.latency = latency
+        self.double_buffering_support = double_buffering_support
         if not min_r_granularity:
             self.r_bw_min = r_bw
         else:

diff --git a/zigzag/classes/opt/temporal/loma/memory_allocator.py b/zigzag/classes/opt/temporal/loma/memory_allocator.py
@@ -119,7 +119,8 @@ def allocate_node(self, node: MemoryLevel, top_levels: List[MemoryLevel]):
         mem_ops = node.operands
         # Then select only the mem operands that are required for this layer (e.g. pooling has no weights so one mem op less)
         mem_ops = [mem_op for mem_op in mem_ops if mem_op in self.mem_ops]
-
+        # Does this node support double buffering
+        db_support=node.memory_instance.double_buffering_support
         # Get the capacity of this memory node (in bits)
         mem_capacity = node.memory_instance.size
 
@@ -178,7 +179,8 @@ def allocate_node(self, node: MemoryLevel, top_levels: List[MemoryLevel]):
     # slices of the unallocated loops, with 'mem_capacity' as an upper bound.
     # @param mem_op
     # @param mem_capacity Capacity of the memory node in bits.
-    def calc_size_slices(self, mem_op: str, mem_capacity: int):
+    # @param db_support Double buffering support of this node
+    def calc_size_slices(self, mem_op: str, mem_capacity: int, db_support: bool=False):
 
         # Already allocated loops for this mem_op
         allocated_loops = self.allocated[mem_op]
@@ -187,6 +189,11 @@ def calc_size_slices(self, mem_op: str, mem_capacity: int):
         unallocated_loops = self.unallocated[mem_op]
         sizes = []
 
+        # If this memory supports double buffering get the size it would take to allocate everything
+        if db_support:
+            all_loops=(allocated_loops+unallocated_loops[:len(unallocated_loops)+1])
+            all_loops_size=self.calc_loops_size(all_loops,mem_op,unallocated_loops)
+
         for i in range(
             len(unallocated_loops) + 1
         ):  # Go through all slices (includes empty slice)
@@ -197,6 +204,10 @@ def calc_size_slices(self, mem_op: str, mem_capacity: int):
                 allocated_loops + unallocated_slice
             )  # Join them with already allocated loops
             size = self.calc_loops_size(loops, mem_op, unallocated_loops)
+            # double size allocated if the node uses double buffering
+            if db_support:
+                if len(unallocated_loops[i:])>0 and size<all_loops_size:
+                    size*=2
             if size <= mem_capacity:
                 sizes.append(size)
             else:

diff --git a/zigzag/classes/stages/CostModelStage.py b/zigzag/classes/stages/CostModelStage.py
@@ -33,6 +33,7 @@ def __init__(
         spatial_mapping_int,
         temporal_mapping,
         access_same_data_considered_as_no_access=True,
+        cost_model_class=CostModelEvaluation,
         **kwargs
     ):
         super().__init__(list_of_callables, **kwargs)
@@ -51,10 +52,11 @@ def __init__(
             temporal_mapping,
             access_same_data_considered_as_no_access,
         )
+        self.cost_model_class=cost_model_class
 
     ## Run the cost model stage by calling the internal zigzag cost model with the correct inputs.
     def run(self) -> Generator[Tuple[CostModelEvaluation, Any], None, None]:
-        self.cme = CostModelEvaluation(
+        self.cme = self.cost_model_class(
             accelerator=self.accelerator,
             layer=self.layer,
             spatial_mapping=self.spatial_mapping,

diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py
@@ -55,25 +55,6 @@ def run(self):
         spatial_mapping, spatial_mapping_int = self.convert_user_spatial_mapping(
             user_spatial_mapping
         )
-        # Since the spatial_mapping may be modified in the previous step,
-        # we have to update this change to self.layer
-        updated_user_spatial_mapping = {}
-        for oa_dim, sm_loop in user_spatial_mapping.items():
-            if self.is_nested_tuple(sm_loop):  # a mix sm loop
-                sm_comb = []
-                for sub_sm_loop in sm_loop:
-                    sm_layer_dim = sub_sm_loop[0]
-                    for sm_element in spatial_mapping.spatial_loop_dim_size:
-                        if sm_element[0] == sm_layer_dim:
-                            sm_comb.append(sm_element)
-                sm_comb = tuple(sm_comb)
-                updated_user_spatial_mapping[oa_dim] = sm_comb
-            else:
-                sm_layer_dim = sm_loop[0]
-                for sm_element in spatial_mapping.spatial_loop_dim_size:
-                    if sm_element[0] == sm_layer_dim:
-                        updated_user_spatial_mapping[oa_dim] = sm_element
-        self.layer.user_spatial_mapping = updated_user_spatial_mapping
 
         kwargs = self.kwargs.copy()
         kwargs["spatial_mapping"] = spatial_mapping

diff --git a/zigzag/classes/stages/SpatialMappingGeneratorStage.py b/zigzag/classes/stages/SpatialMappingGeneratorStage.py
@@ -215,11 +215,12 @@ def modify_innermost_input_mem_size(self, core_id, user_spatial_mapping):
             if layer_op_to_mem_op[act_operand] in mem_ops:
                 act_innermost_mem_level = memory_level
                 act_served_oa_dim: set = memory_level.served_dimensions
-                act_served_oa_dim_name = list(act_served_oa_dim)[0].name
         # check if act is not served in the innermost memories, or it is uti-casting for act.
         # keep the spatial loop as it was if act is not served.
-        if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) == 0:
+        if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) != 1:
             return input_mem_size_updated, self.accelerator
+        else:
+            act_served_oa_dim_name = list(act_served_oa_dim)[0].name
         # get the mem scaling factor if OX, OY exist
         mem_scaling_factor = 1
         if (