Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
asyms committed Feb 21, 2024
2 parents a94ab34 + 9032261 commit 13f48a7
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 25 deletions.
7 changes: 6 additions & 1 deletion zigzag/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from zigzag.classes.stages import *
from zigzag.classes.cost_model.cost_model import CostModelEvaluation
from typing import Type
import re
from onnx import ModelProto

Expand All @@ -9,6 +11,8 @@ def get_hardware_performance_zigzag(
opt="latency",
dump_filename_pattern="outputs/{datetime}.json",
pickle_filename="outputs/list_of_cmes.pickle",
lpf_limit: int = 6,
cost_model_class: Type = CostModelEvaluation,
):
# Initialize the logger
import logging as _logging
Expand Down Expand Up @@ -61,12 +65,13 @@ def get_hardware_performance_zigzag(
mapping=mapping, # required by workload_parser_stage
dump_filename_pattern=dump_filename_pattern, # output file save pattern
pickle_filename=pickle_filename, # filename for pickled list of cmes
loma_lpf_limit=6, # required by LomaStage
loma_lpf_limit=lpf_limit, # required by LomaStage
loma_show_progress_bar=True,
# If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
# take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
# By default, if the parameter is not defined, it will be set as False internally.
access_same_data_considered_as_no_access=True,
cost_model_class=cost_model_class,
)

# Launch the MainStage
Expand Down
3 changes: 3 additions & 0 deletions zigzag/classes/hardware/architecture/memory_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class MemoryInstance:
# @param min_w_granularity (int): The minimal number of bits that can be written in a clock cycle (can be less than w_bw)
# @param mem_type (str): The type of memory. Used for CACTI cost extraction.
# @param auto_cost_extraction (bool): Automatically extract the read cost, write cost and area using CACTI.
# @param double_buffering_support (bool): Support for double buffering on this memory instance.
def __init__(
self,
name: str,
Expand All @@ -35,6 +36,7 @@ def __init__(
min_w_granularity=None,
mem_type: str = "sram",
auto_cost_extraction: bool = False,
double_buffering_support: bool = False,
):
if auto_cost_extraction:
# Size must be a multiple of 8 when using CACTI
Expand Down Expand Up @@ -74,6 +76,7 @@ def __init__(
self.w_port = w_port
self.rw_port = rw_port
self.latency = latency
self.double_buffering_support = double_buffering_support
if not min_r_granularity:
self.r_bw_min = r_bw
else:
Expand Down
15 changes: 13 additions & 2 deletions zigzag/classes/opt/temporal/loma/memory_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def allocate_node(self, node: MemoryLevel, top_levels: List[MemoryLevel]):
mem_ops = node.operands
# Then select only the mem operands that are required for this layer (e.g. pooling has no weights so one mem op less)
mem_ops = [mem_op for mem_op in mem_ops if mem_op in self.mem_ops]

# Does this node support double buffering
db_support=node.memory_instance.double_buffering_support
# Get the capacity of this memory node (in bits)
mem_capacity = node.memory_instance.size

Expand Down Expand Up @@ -178,7 +179,8 @@ def allocate_node(self, node: MemoryLevel, top_levels: List[MemoryLevel]):
# slices of the unallocated loops, with 'mem_capacity' as an upper bound.
# @param mem_op
# @param mem_capacity Capacity of the memory node in bits.
def calc_size_slices(self, mem_op: str, mem_capacity: int):
# @param db_support Double buffering support of this node
def calc_size_slices(self, mem_op: str, mem_capacity: int, db_support: bool=False):

# Already allocated loops for this mem_op
allocated_loops = self.allocated[mem_op]
Expand All @@ -187,6 +189,11 @@ def calc_size_slices(self, mem_op: str, mem_capacity: int):
unallocated_loops = self.unallocated[mem_op]
sizes = []

# If this memory supports double buffering get the size it would take to allocate everything
if db_support:
all_loops=(allocated_loops+unallocated_loops[:len(unallocated_loops)+1])
all_loops_size=self.calc_loops_size(all_loops,mem_op,unallocated_loops)

for i in range(
len(unallocated_loops) + 1
): # Go through all slices (includes empty slice)
Expand All @@ -197,6 +204,10 @@ def calc_size_slices(self, mem_op: str, mem_capacity: int):
allocated_loops + unallocated_slice
) # Join them with already allocated loops
size = self.calc_loops_size(loops, mem_op, unallocated_loops)
# double size allocated if the node uses double buffering
if db_support:
if len(unallocated_loops[i:])>0 and size<all_loops_size:
size*=2
if size <= mem_capacity:
sizes.append(size)
else:
Expand Down
4 changes: 3 additions & 1 deletion zigzag/classes/stages/CostModelStage.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(
spatial_mapping_int,
temporal_mapping,
access_same_data_considered_as_no_access=True,
cost_model_class=CostModelEvaluation,
**kwargs
):
super().__init__(list_of_callables, **kwargs)
Expand All @@ -51,10 +52,11 @@ def __init__(
temporal_mapping,
access_same_data_considered_as_no_access,
)
self.cost_model_class=cost_model_class

## Run the cost model stage by calling the internal zigzag cost model with the correct inputs.
def run(self) -> Generator[Tuple[CostModelEvaluation, Any], None, None]:
self.cme = CostModelEvaluation(
self.cme = self.cost_model_class(
accelerator=self.accelerator,
layer=self.layer,
spatial_mapping=self.spatial_mapping,
Expand Down
19 changes: 0 additions & 19 deletions zigzag/classes/stages/SpatialMappingConversionStage.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,6 @@ def run(self):
spatial_mapping, spatial_mapping_int = self.convert_user_spatial_mapping(
user_spatial_mapping
)
# Since the spatial_mapping may be modified in the previous step,
# we have to update this change to self.layer
updated_user_spatial_mapping = {}
for oa_dim, sm_loop in user_spatial_mapping.items():
if self.is_nested_tuple(sm_loop): # a mix sm loop
sm_comb = []
for sub_sm_loop in sm_loop:
sm_layer_dim = sub_sm_loop[0]
for sm_element in spatial_mapping.spatial_loop_dim_size:
if sm_element[0] == sm_layer_dim:
sm_comb.append(sm_element)
sm_comb = tuple(sm_comb)
updated_user_spatial_mapping[oa_dim] = sm_comb
else:
sm_layer_dim = sm_loop[0]
for sm_element in spatial_mapping.spatial_loop_dim_size:
if sm_element[0] == sm_layer_dim:
updated_user_spatial_mapping[oa_dim] = sm_element
self.layer.user_spatial_mapping = updated_user_spatial_mapping

kwargs = self.kwargs.copy()
kwargs["spatial_mapping"] = spatial_mapping
Expand Down
5 changes: 3 additions & 2 deletions zigzag/classes/stages/SpatialMappingGeneratorStage.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,12 @@ def modify_innermost_input_mem_size(self, core_id, user_spatial_mapping):
if layer_op_to_mem_op[act_operand] in mem_ops:
act_innermost_mem_level = memory_level
act_served_oa_dim: set = memory_level.served_dimensions
act_served_oa_dim_name = list(act_served_oa_dim)[0].name
# check if act is not served in the innermost memories, or it is uti-casting for act.
# keep the spatial loop as it was if act is not served.
if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) == 0:
if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) != 1:
return input_mem_size_updated, self.accelerator
else:
act_served_oa_dim_name = list(act_served_oa_dim)[0].name
# get the mem scaling factor if OX, OY exist
mem_scaling_factor = 1
if (
Expand Down

0 comments on commit 13f48a7

Please sign in to comment.