From 3944d8317cafa1cb9ef7d57ffa2029660ef0421b Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Fri, 10 Nov 2023 10:34:24 +0100 Subject: [PATCH 01/14] fix: filter out impossible spatial loop when one layer dim is mapped on multiple hardware dims; new: adjust the last oa mapping when updating the spatial mapping to be a multiple of the layer dim size; new: only yield 2 spatial mapping options when maximize_hardware_utilization=True (default) --- zigzag/classes/cost_model/cost_model.py | 1 + .../mapping/spatial/spatial_mapping.py | 3 + zigzag/classes/opt/spatial/generator.py | 27 ++++--- .../stages/SpatialMappingConversionStage.py | 70 +++++++++++++++++-- 4 files changed, 81 insertions(+), 20 deletions(-) diff --git a/zigzag/classes/cost_model/cost_model.py b/zigzag/classes/cost_model/cost_model.py index cf684c0c..2a312969 100644 --- a/zigzag/classes/cost_model/cost_model.py +++ b/zigzag/classes/cost_model/cost_model.py @@ -249,6 +249,7 @@ def __init__( self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int( self.spatial_mapping.mapping_dict_origin ) + self.spatial_mapping_dict_int = self.spatial_mapping.mapping_dict_origin # For constructing Mapping object, the last parameter "self.access_same_data_considered_as_no_access" is optional self.mapping = Mapping( diff --git a/zigzag/classes/mapping/spatial/spatial_mapping.py b/zigzag/classes/mapping/spatial/spatial_mapping.py index 7db9b6b0..2a4268d9 100644 --- a/zigzag/classes/mapping/spatial/spatial_mapping.py +++ b/zigzag/classes/mapping/spatial/spatial_mapping.py @@ -119,6 +119,9 @@ def calc_unit_count(self): """ ASSERT: The bottom level (MAC level) unit count must be the same for all operand """ bottom_unit_count = [unit_count[op][0] for op in unit_count.keys()] + for x in bottom_unit_count: + if not x == bottom_unit_count[0]: + pass assert all( x == bottom_unit_count[0] for x in bottom_unit_count ), f"The MAC level unit count is not the same for all operand {bottom_unit_count}, please correct the spatial mapping." diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index e10453eb..3743afb8 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -161,20 +161,6 @@ def generate_user_spatial_mappings( ) = self.sort_oa_dim_unrollings_in_the_order_of_utilization( oa_dim_unrollings, descending=True ) - if len(oa_dim_unrollings) > 0: # oa_dim_unrollings is not [] - # Then only keep the combs in oa_dim_unrollings that have the highest oa_dim mapping utilization - # The closer to the front, the higher the oa_dim utilization rate. - updated_oa_dim_unrollings = [oa_dim_unrollings[0]] - # Check if there are other sm loops that has the same utilization with the highest one. - for i in range(1, len(hardware_utilization)): - if hardware_utilization[i] == hardware_utilization[0]: - updated_oa_dim_unrollings.append(oa_dim_unrollings[i]) - # [Optional] To reduce the simulation time, when there are still too many spatial unrollings, - # We keep only the first two unrollings for each oa_dim. - # You can comment out the next two lines if you want to check all spatial unrollings. - if len(updated_oa_dim_unrollings) > 2: - updated_oa_dim_unrollings = updated_oa_dim_unrollings[0:2] - oa_dim_unrollings = updated_oa_dim_unrollings # In case there are no unrollings (of size > 1) possible, add a single unrolling of size 1. # The loop dimension we pick is randomly chosen as the first loop dimension in the layer. @@ -186,7 +172,13 @@ def generate_user_spatial_mappings( # Now we have for each operational array dimension the layer dimensions and size they can be unrolled without fractional remainder. # Now we have to combine them into user-defined spatial mappings. + # record down the number of yield + yield_count = 0 for combination in itertools.product(*unrollings): + if maximize_hardware_utilization and yield_count >= 2: + # 2 means: only check the top 2 spatial mapping with the highest hardware utilization + # Please modify "2" to other numbers if you want to check on more spatial mappings. + break # Zip the combination (which is a (layer_dim, layer_size) for each oa_dim with the oa_dim names. oa_dim_names = [oa_dim.name for oa_dim in oa_dims] # Extra check on the total unrolling size of a layer dim, if it is mapped on >=2 dimensions. @@ -194,6 +186,7 @@ def generate_user_spatial_mappings( layer_dim: layer_size for layer_dim, layer_size in self.layer.loop_dim_size.items() } + check_passed = True # initialization for unrolling_in_combination in combination: if unrolling_in_combination is None: continue @@ -223,7 +216,10 @@ def generate_user_spatial_mappings( for layer_dim, layer_size in combination_check.items(): if layer_size < 1: # the layer size/the unrolling size < 1 # It means the unrolling size > the layer size, which is incorrect and impossible. - continue + check_passed = False + break + if not check_passed: + continue user_spatial_mapping = { oa_dim_name: unrolling @@ -231,6 +227,7 @@ def generate_user_spatial_mappings( if unrolling is not None } yield user_spatial_mapping + yield_count += 1 def append_mix_spatial_unrollings( self, provided_oa_dim_unrollings, provided_oa_dim_unrolling_hints, oa_dim diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index 7707d172..44cbf89f 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -110,7 +110,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): for spatial_loop_element in spatial_loop: limited_user_spatial_mapping_to_check = ( self.generate_limited_user_spatial_mapping( - layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop_element + layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop_element, user_spatial_mapping ) ) if limited_user_spatial_mapping_to_check == None: @@ -131,7 +131,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): else: # single-dim sm loop limited_user_spatial_mapping_to_check = ( self.generate_limited_user_spatial_mapping( - layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop + layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping ) ) if limited_user_spatial_mapping_to_check == None: @@ -237,7 +237,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): ) def generate_limited_user_spatial_mapping( - self, layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop + self, layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping ): ## Do check on spatial mapping, and convert the mapping to a tuple (loop_dim_unrolled, loop_size_unrolled) = spatial_loop @@ -253,9 +253,69 @@ def generate_limited_user_spatial_mapping( layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size - temporal_remainder = int(np.ceil(layer_dim_size / loop_size_unrolled)) - loop_size_unrolled = layer_dim_size / temporal_remainder + # and if there is no more mapping for this layer dimension + no_more_mapping_for_current_layer_dim = self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping + ) + if no_more_mapping_for_current_layer_dim: + loop_size_unrolled_on_early_oa_dims = self.calc_unrolled_loop_size_on_early_oa_dims( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping + ) + temporal_remainder = int(np.ceil(layer_dim_size / (loop_size_unrolled*loop_size_unrolled_on_early_oa_dims))) + loop_size_unrolled = layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims return ( loop_dim_unrolled, loop_size_unrolled, ) + + def check_if_there_is_further_oa_mapping_for_current_layer_dim( + self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping + ): + # For the case when there is layer dimension that is mapped on multiple oa dimensions. + # We need to decide on which oa dimension to adjust the unrolling + # if the total unrolling size is not a multiple of the layer dimension size. + # In this case, we decide to only adjust the unrolling size on the last oa dimension, + # This function is to check if the current oa dimension is the last oa dimension for the current layer dim. + start_check_on_layer_dim_mapping = False + no_more_mapping_for_current_layer_dim = True + for oa_dim_name_private, spatial_loop_private in user_spatial_mapping.items(): + if oa_dim_name == oa_dim_name_private: + start_check_on_layer_dim_mapping = True + continue + if start_check_on_layer_dim_mapping: + if self.is_nested_tuple(spatial_loop_private): # mix sm loop + for spatial_loop_element in spatial_loop_private: + loop_dim_unrolled_private = spatial_loop_element[0] + if loop_dim_unrolled == loop_dim_unrolled_private: + no_more_mapping_for_current_layer_dim = False + break + else: + loop_dim_unrolled_private = spatial_loop_private[0] + if loop_dim_unrolled == loop_dim_unrolled_private: + no_more_mapping_for_current_layer_dim = False + if not no_more_mapping_for_current_layer_dim: # early exit if the flag is already False + break + return no_more_mapping_for_current_layer_dim + + def calc_unrolled_loop_size_on_early_oa_dims( + self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping + ): + # calculate the unrolled loop size for the specific layer dim on oa dims earlier than current oa dim + loop_unrolled_size_already = 1 + for oa_dim_name_private, spatial_loop_private in user_spatial_mapping.items(): + if oa_dim_name == oa_dim_name_private: + break + if self.is_nested_tuple(spatial_loop_private): # mix sm loop + for spatial_loop_element in spatial_loop_private: + (loop_dim_unrolled_private, loop_size_unrolled_private) = spatial_loop_element + if loop_dim_unrolled == loop_dim_unrolled_private: + loop_unrolled_size_already *= loop_size_unrolled_private + else: + (loop_dim_unrolled_private, loop_size_unrolled_private) = spatial_loop_private + if loop_dim_unrolled == loop_dim_unrolled_private: + loop_unrolled_size_already *= loop_size_unrolled_private + return loop_unrolled_size_already \ No newline at end of file From 3f0f389f99abafaa6b287281e30fe6cfdd2d093d Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Fri, 10 Nov 2023 10:35:21 +0100 Subject: [PATCH 02/14] remove code for debugging --- zigzag/classes/cost_model/cost_model.py | 6 +++--- zigzag/classes/mapping/spatial/spatial_mapping.py | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/zigzag/classes/cost_model/cost_model.py b/zigzag/classes/cost_model/cost_model.py index 2a312969..cf25622a 100644 --- a/zigzag/classes/cost_model/cost_model.py +++ b/zigzag/classes/cost_model/cost_model.py @@ -246,9 +246,9 @@ def __init__( """ generate the integer spatial mapping from fractional spatial mapping (due to greedy mapping support). Later the fractional one is used for calculating energy, and the integer one is used for calculating latency""" - self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int( - self.spatial_mapping.mapping_dict_origin - ) + # self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int( + # self.spatial_mapping.mapping_dict_origin + # ) self.spatial_mapping_dict_int = self.spatial_mapping.mapping_dict_origin # For constructing Mapping object, the last parameter "self.access_same_data_considered_as_no_access" is optional diff --git a/zigzag/classes/mapping/spatial/spatial_mapping.py b/zigzag/classes/mapping/spatial/spatial_mapping.py index 2a4268d9..7db9b6b0 100644 --- a/zigzag/classes/mapping/spatial/spatial_mapping.py +++ b/zigzag/classes/mapping/spatial/spatial_mapping.py @@ -119,9 +119,6 @@ def calc_unit_count(self): """ ASSERT: The bottom level (MAC level) unit count must be the same for all operand """ bottom_unit_count = [unit_count[op][0] for op in unit_count.keys()] - for x in bottom_unit_count: - if not x == bottom_unit_count[0]: - pass assert all( x == bottom_unit_count[0] for x in bottom_unit_count ), f"The MAC level unit count is not the same for all operand {bottom_unit_count}, please correct the spatial mapping." From 675fbfb8f26097049aeb1dd8325feb7295431a0f Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Fri, 10 Nov 2023 12:45:53 +0100 Subject: [PATCH 03/14] Fix: add spatial_mapping_hint completion when a partial spatial_mapping is provided. New: add assertion when no legal spatial mapping is found. --- .gitignore | 5 +- debug.py | 26 ++++++++ zigzag/classes/opt/spatial/generator.py | 6 +- .../.SpatialMappingGeneratorStage.py.swp | Bin 0 -> 16384 bytes .../stages/SpatialMappingGeneratorStage.py | 57 +++++++++++------- 5 files changed, 70 insertions(+), 24 deletions(-) create mode 100644 debug.py create mode 100644 zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp diff --git a/.gitignore b/.gitignore index 08983801..89807f12 100644 --- a/.gitignore +++ b/.gitignore @@ -149,4 +149,7 @@ docs/Makefile docs/make.bat # documentation output -html/ \ No newline at end of file +html/ + +# debug file +debug* \ No newline at end of file diff --git a/debug.py b/debug.py new file mode 100644 index 00000000..918bf98a --- /dev/null +++ b/debug.py @@ -0,0 +1,26 @@ +from zigzag.api import get_hardware_performance_zigzag + +opt = 'EDP' +model = "alexnet" +onnx_model_path = f"zigzag/inputs/examples/workload/{model}.onnx" +workload = onnx_model_path + +hwarchs = ["Edge_TPU_like", "Ascend_like", "Eyeriss_like", "Meta_prototype", "Tesla_NPU_like", "TPU_like"] + +for hwarch in hwarchs: + + mapping = f"zigzag.inputs.examples.mapping.default" + accelerator = f"zigzag.inputs.examples.hardware.{hwarch}" + + dump_filename_pattern=f"outputs/{hwarch}-{model}-layer_?.json" + pickle_filename = f"outputs/{hwarch}-{model}-saved_list_of_cmes.pickle" + + energy, latency, cme = get_hardware_performance_zigzag(workload=workload, + accelerator=accelerator, + mapping=mapping, + opt=opt, + dump_filename_pattern=dump_filename_pattern, + pickle_filename=pickle_filename) + print(f"Total network energy = {energy:.2e} pJ") + print(f"Total network latency = {latency:.2e} cycles") + print(f"Total edp = {energy*latency:.2e} pJ*cycles") \ No newline at end of file diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index 3743afb8..3e5fe140 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -177,7 +177,7 @@ def generate_user_spatial_mappings( for combination in itertools.product(*unrollings): if maximize_hardware_utilization and yield_count >= 2: # 2 means: only check the top 2 spatial mapping with the highest hardware utilization - # Please modify "2" to other numbers if you want to check on more spatial mappings. + # Modify "2" to other numbers if you want to check on more spatial mappings. break # Zip the combination (which is a (layer_dim, layer_size) for each oa_dim with the oa_dim names. oa_dim_names = [oa_dim.name for oa_dim in oa_dims] @@ -228,6 +228,10 @@ def generate_user_spatial_mappings( } yield user_spatial_mapping yield_count += 1 + # If yield_count==0, it means there is no legal spatial mapping found. + # The reason is that the spatial mapping provided by the user has exceeded the layer dim size, + # therefore the loop cannot pass the check. + assert yield_count > 0, "There is no legal spatial mapping found. Please make sure the provided spatial mappings do not exceed the layer dimension size." def append_mix_spatial_unrollings( self, provided_oa_dim_unrollings, provided_oa_dim_unrolling_hints, oa_dim diff --git a/zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp b/zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..967ac254e9fd94a420b53a665ecee44f6e05fdf8 GIT binary patch literal 16384 zcmeHNU8o&b72c^fCfaCets<$o&9U~}>$!7t4TxTHp)u7o(4=B=Qz6D-?#w>tOz)hT zlli$fC*fA;4+w%_tq6&&UrOM+L6ih1hrKtoqIA8@H{!_pnxP^qHntN_B|FMDw$!-!qeO5Oc*I_{14l)xTcPFKeX|>cW-@fF z*!6qnmK7=Us@uD2^;Qf2snYhUF1I}^iqJlatsZXYhG=SeXRP566iz8$g;pGd=jg35 z9E}Ii{Ana`Q%m5GIQGFcHuct#_sTo(*u1Hw(Jxfd=^aKq8a~;zVDm3zd=8r(06mKqG-h0*wS32{aOD zB+y8pk-+Ov0+u7hdoZr=Qo@h#|5^V3*S87r7vMV32iAdC-YUeefL{PV27Uy59e4)# z8qfzm1>6tZ0UQAK1K&O*#J7M5I0hU5Zrmosqre5=e&9pEVc>S)UvClO&%krQ=YbIr z0>^>7fPcJMh~EO&fy+P-xEnYO90dMwP>AmX-vhn?d=7X3_$csp;FSYHyafCJ_%iSi za1^*3I1K#oO+q{l41s%rm-Y+sEN~gv0FDFq0Dr#~asZwH-Us~djTkfV4d7v54R{qB zC(i?$z^m9~c^P;XcpUf$a4&Ed@Ds?N=HNRt53?_+FXg#i=|--PQxLwb+9G?A-4mS0 zFr)NrggHnLM?`W%UgQ_UFxYV&~ zfpx_lxL1|gw?b#v3YD3}uIFCG8I!=@h?LhgW>)D``XQ>yx3)ZG4%{n~0?#BeroK?s z`d%E>b$!gKp6!j9feXR3uR$>Us0bGnq<=hC{AEqT7LLIu|#EeZF~^VlwAMy@27*&TJHul(IKJ&UDYn)C87s3|=Hupd7Y^w0f&8Jo?6UqcRX~^WH=I90!f{~JgYExZ3cP$wQ z?5qZA5QL)@PwgmA((yM(YNdo<;^S>HRKrk3%8!{xgGshp@{@@V3Yc_06h}@_J3jJ* z#Mufw^T3MZD#=Lh_FcO#;p;Gi{*DS!NsO5eRh)#rJRc@%TqbdamK!N~!SWLIu`mq6 zc0N1w+6#I;6&iNv#yBA8wYx_iBtIdq@m%f6z7?@*l2ajLpeDw|ypn34 z4g869X~u9eLgj`u4|ZSK+a~wY=HKyz+?TSw&V0)vg$N4{MieSL_C|tOZKc2RbeAWQ z3DE*Luw37C{b3SE)U_YPP>{GIKW<@FrING{2z!DGYIb0~xoo*I$=lHru6tbJ4oN_ndkIcrrXJ)zs1D|8pyGZ|n19yknM)oNf98800Bp7Lr59A10LFfr9`Y?^I~ zRLr1jLhC(R^`nklUfxD1(Th?66g+nJB41`^N^*m&Ft59iI5t?_@m+kC|&*};8-ftGGMu^_t>ny$dzxw(Nm)IlAMrf?^2>{uAyHDGQV!U7*+#AzF2?Vx>7s zw!%fS=i=zNwAPUbrK<>;E|wE8N1?%kyt1@S)3*l7G?#>M2g4vFuZ+{)sfm@RazaNs zq8kPSnVu#$^l5T~#}yefTT0zidnN@sc%Ic{wW+P0c{2*G7gKy={hT`Fu`YY7oI1a;@PubX3qo`e2?P*Z7K~kg!N;-k!5#-4nRc^@>4psw7>k&r7|59r| z@K@9oFq|(F6m2dh`C?zhZ2D}FD=iF-=;@mI6jk?v{tsG_cSrnZS3dtFfcy!=Tv!|C z&sR|xRZYkzt{|QAbj|MqBEPmLGPdYmj)|${2zl_8EX}MgkF;&TO2~`8Ax|KxS}A0E zdS%FVf!Hz)68PQ-5e-)SfGjgNgD8zDiAf5xFw$<~+iW=aj~v?$l@+V$rh+UAkw@AqrE6Kw zf=EsnGAUkXI;l=(K-`v-%>_1txH5H?T_aa{jzm({q5akBhH6KuD{*Kc_vi*?RU&=m zc6kL-+)3>~kcf<*;%Ad1pmblhtk-42G_>Vqw!1W|$&-a_)NG;HwHb~O&iO@V8A?C+c3OT_JE+)ocD%?fz=Cw=S>M@wc~#GbHnI>+PGER(>*| zeNI(tl@WZ!n6GKWgY?Iny_&PkXX}ju%phOdTRUni>0u-7Y{1htV~MC=8L$YHdDU)%SU zmOM?7riBk)iVb^ndN!%I9CuxOeJkzkWLwWl;(MH64c0%^snb#B)Xz4xtWgBC=qH<8 zX8+sIH5?76afbffg57L Date: Fri, 10 Nov 2023 12:46:42 +0100 Subject: [PATCH 04/14] Remove temporal file generated by gvim --- .../stages/.SpatialMappingGeneratorStage.py.swp | Bin 16384 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp diff --git a/zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp b/zigzag/classes/stages/.SpatialMappingGeneratorStage.py.swp deleted file mode 100644 index 967ac254e9fd94a420b53a665ecee44f6e05fdf8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHNU8o&b72c^fCfaCets<$o&9U~}>$!7t4TxTHp)u7o(4=B=Qz6D-?#w>tOz)hT zlli$fC*fA;4+w%_tq6&&UrOM+L6ih1hrKtoqIA8@H{!_pnxP^qHntN_B|FMDw$!-!qeO5Oc*I_{14l)xTcPFKeX|>cW-@fF z*!6qnmK7=Us@uD2^;Qf2snYhUF1I}^iqJlatsZXYhG=SeXRP566iz8$g;pGd=jg35 z9E}Ii{Ana`Q%m5GIQGFcHuct#_sTo(*u1Hw(Jxfd=^aKq8a~;zVDm3zd=8r(06mKqG-h0*wS32{aOD zB+y8pk-+Ov0+u7hdoZr=Qo@h#|5^V3*S87r7vMV32iAdC-YUeefL{PV27Uy59e4)# z8qfzm1>6tZ0UQAK1K&O*#J7M5I0hU5Zrmosqre5=e&9pEVc>S)UvClO&%krQ=YbIr z0>^>7fPcJMh~EO&fy+P-xEnYO90dMwP>AmX-vhn?d=7X3_$csp;FSYHyafCJ_%iSi za1^*3I1K#oO+q{l41s%rm-Y+sEN~gv0FDFq0Dr#~asZwH-Us~djTkfV4d7v54R{qB zC(i?$z^m9~c^P;XcpUf$a4&Ed@Ds?N=HNRt53?_+FXg#i=|--PQxLwb+9G?A-4mS0 zFr)NrggHnLM?`W%UgQ_UFxYV&~ zfpx_lxL1|gw?b#v3YD3}uIFCG8I!=@h?LhgW>)D``XQ>yx3)ZG4%{n~0?#BeroK?s z`d%E>b$!gKp6!j9feXR3uR$>Us0bGnq<=hC{AEqT7LLIu|#EeZF~^VlwAMy@27*&TJHul(IKJ&UDYn)C87s3|=Hupd7Y^w0f&8Jo?6UqcRX~^WH=I90!f{~JgYExZ3cP$wQ z?5qZA5QL)@PwgmA((yM(YNdo<;^S>HRKrk3%8!{xgGshp@{@@V3Yc_06h}@_J3jJ* z#Mufw^T3MZD#=Lh_FcO#;p;Gi{*DS!NsO5eRh)#rJRc@%TqbdamK!N~!SWLIu`mq6 zc0N1w+6#I;6&iNv#yBA8wYx_iBtIdq@m%f6z7?@*l2ajLpeDw|ypn34 z4g869X~u9eLgj`u4|ZSK+a~wY=HKyz+?TSw&V0)vg$N4{MieSL_C|tOZKc2RbeAWQ z3DE*Luw37C{b3SE)U_YPP>{GIKW<@FrING{2z!DGYIb0~xoo*I$=lHru6tbJ4oN_ndkIcrrXJ)zs1D|8pyGZ|n19yknM)oNf98800Bp7Lr59A10LFfr9`Y?^I~ zRLr1jLhC(R^`nklUfxD1(Th?66g+nJB41`^N^*m&Ft59iI5t?_@m+kC|&*};8-ftGGMu^_t>ny$dzxw(Nm)IlAMrf?^2>{uAyHDGQV!U7*+#AzF2?Vx>7s zw!%fS=i=zNwAPUbrK<>;E|wE8N1?%kyt1@S)3*l7G?#>M2g4vFuZ+{)sfm@RazaNs zq8kPSnVu#$^l5T~#}yefTT0zidnN@sc%Ic{wW+P0c{2*G7gKy={hT`Fu`YY7oI1a;@PubX3qo`e2?P*Z7K~kg!N;-k!5#-4nRc^@>4psw7>k&r7|59r| z@K@9oFq|(F6m2dh`C?zhZ2D}FD=iF-=;@mI6jk?v{tsG_cSrnZS3dtFfcy!=Tv!|C z&sR|xRZYkzt{|QAbj|MqBEPmLGPdYmj)|${2zl_8EX}MgkF;&TO2~`8Ax|KxS}A0E zdS%FVf!Hz)68PQ-5e-)SfGjgNgD8zDiAf5xFw$<~+iW=aj~v?$l@+V$rh+UAkw@AqrE6Kw zf=EsnGAUkXI;l=(K-`v-%>_1txH5H?T_aa{jzm({q5akBhH6KuD{*Kc_vi*?RU&=m zc6kL-+)3>~kcf<*;%Ad1pmblhtk-42G_>Vqw!1W|$&-a_)NG;HwHb~O&iO@V8A?C+c3OT_JE+)ocD%?fz=Cw=S>M@wc~#GbHnI>+PGER(>*| zeNI(tl@WZ!n6GKWgY?Iny_&PkXX}ju%phOdTRUni>0u-7Y{1htV~MC=8L$YHdDU)%SU zmOM?7riBk)iVb^ndN!%I9CuxOeJkzkWLwWl;(MH64c0%^snb#B)Xz4xtWgBC=qH<8 zX8+sIH5?76afbffg57L Date: Fri, 10 Nov 2023 12:50:25 +0100 Subject: [PATCH 05/14] reformat generator.py for a black coding style --- zigzag/classes/opt/spatial/generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index 3e5fe140..5dfcd9f8 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -186,7 +186,7 @@ def generate_user_spatial_mappings( layer_dim: layer_size for layer_dim, layer_size in self.layer.loop_dim_size.items() } - check_passed = True # initialization + check_passed = True # initialization for unrolling_in_combination in combination: if unrolling_in_combination is None: continue @@ -231,7 +231,9 @@ def generate_user_spatial_mappings( # If yield_count==0, it means there is no legal spatial mapping found. # The reason is that the spatial mapping provided by the user has exceeded the layer dim size, # therefore the loop cannot pass the check. - assert yield_count > 0, "There is no legal spatial mapping found. Please make sure the provided spatial mappings do not exceed the layer dimension size." + assert ( + yield_count > 0 + ), "There is no legal spatial mapping found. Please make sure the provided spatial mappings do not exceed the layer dimension size." def append_mix_spatial_unrollings( self, provided_oa_dim_unrollings, provided_oa_dim_unrolling_hints, oa_dim From b9738e1b09df26540fa0bcfd3f546795439a15e8 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Fri, 10 Nov 2023 20:14:22 +0100 Subject: [PATCH 06/14] New feature: supporting ox/oy diagonal mapping. Fix: spatial_mapping_int in cost_model.py is fetched from SpatialGeneratorStage now to avoid the mapping inconsistence. --- zigzag/classes/cost_model/cost_model.py | 4 +- zigzag/classes/opt/spatial/generator.py | 410 ++++++++++++++++-- zigzag/classes/stages/CostModelStage.py | 4 + .../stages/SpatialMappingConversionStage.py | 124 +++--- .../stages/SpatialMappingGeneratorStage.py | 180 +++++++- 5 files changed, 610 insertions(+), 112 deletions(-) diff --git a/zigzag/classes/cost_model/cost_model.py b/zigzag/classes/cost_model/cost_model.py index cf25622a..d4296170 100644 --- a/zigzag/classes/cost_model/cost_model.py +++ b/zigzag/classes/cost_model/cost_model.py @@ -211,12 +211,14 @@ def __init__( accelerator, layer, spatial_mapping, + spatial_mapping_int, temporal_mapping, access_same_data_considered_as_no_access=True, ): self.accelerator = accelerator self.layer = layer self.spatial_mapping = spatial_mapping + self.spatial_mapping_int = spatial_mapping_int # the original spatial mapping without decimal self.temporal_mapping = temporal_mapping self.access_same_data_considered_as_no_access = ( access_same_data_considered_as_no_access @@ -249,7 +251,7 @@ def __init__( # self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int( # self.spatial_mapping.mapping_dict_origin # ) - self.spatial_mapping_dict_int = self.spatial_mapping.mapping_dict_origin + self.spatial_mapping_dict_int = self.spatial_mapping_int # For constructing Mapping object, the last parameter "self.access_same_data_considered_as_no_access" is optional self.mapping = Mapping( diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index 5dfcd9f8..c3e35582 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -6,6 +6,8 @@ from zigzag.classes.hardware.architecture.memory_hierarchy import MemoryHierarchy from zigzag.classes.hardware.architecture.operational_array import OperationalArray +import math + ## Class that generates valid user-format spatial mappings. class UserSpatialMappingGenerator: @@ -19,6 +21,7 @@ def __init__( defined_mapping=None, enable_mix_spatial_mapping_generation=False, maximize_hardware_utilization=True, + enable_weight_diagonal_mapping=False, ) -> None: self.layer = layer self.accelerator = accelerator @@ -27,11 +30,13 @@ def __init__( enable_mix_spatial_mapping_generation ) self.maximize_hardware_utilization = maximize_hardware_utilization + self.enable_weight_diagonal_mapping = enable_weight_diagonal_mapping def run(self): return self.generate_user_spatial_mappings( enable_mix_spatial_mapping_generation=self.enable_mix_spatial_mapping_generation, maximize_hardware_utilization=self.maximize_hardware_utilization, + enable_weight_diagonal_mapping=self.enable_weight_diagonal_mapping, ) ## Generator that yields user-defined spatial mappings. @@ -51,7 +56,10 @@ def run(self): # layer_dim can be unrolled if the BW allows it (assumes flexible "bus" reads) # \endcode def generate_user_spatial_mappings( - self, enable_mix_spatial_mapping_generation, maximize_hardware_utilization + self, + enable_mix_spatial_mapping_generation, + maximize_hardware_utilization, + enable_weight_diagonal_mapping, ): core_id = self.layer.core_allocation core: Core = self.accelerator.get_core(core_id=core_id) @@ -179,53 +187,30 @@ def generate_user_spatial_mappings( # 2 means: only check the top 2 spatial mapping with the highest hardware utilization # Modify "2" to other numbers if you want to check on more spatial mappings. break + + legal_spatial_loop, left_layer_dim_size = self.check_spatial_loop_legality( + combination=combination, layer=self.layer + ) + if not legal_spatial_loop: + continue # Zip the combination (which is a (layer_dim, layer_size) for each oa_dim with the oa_dim names. oa_dim_names = [oa_dim.name for oa_dim in oa_dims] - # Extra check on the total unrolling size of a layer dim, if it is mapped on >=2 dimensions. - combination_check = { - layer_dim: layer_size - for layer_dim, layer_size in self.layer.loop_dim_size.items() - } - check_passed = True # initialization - for unrolling_in_combination in combination: - if unrolling_in_combination is None: - continue - if self.is_nested_tuple(unrolling_in_combination): - for sub_unrolling_in_combination in unrolling_in_combination: - unrolling_layer_dim = sub_unrolling_in_combination[0] - unrolling_layer_size = sub_unrolling_in_combination[1] - if unrolling_layer_dim in combination_check.keys(): - combination_check[ - unrolling_layer_dim - ] /= unrolling_layer_size - else: - # The unrolled layer dim does not exist in current layer. - # This only happens when the spatial mapping is user-defined, which - # contains non-existent layer dims in current layer. - pass - else: - unrolling_layer_dim = unrolling_in_combination[0] - unrolling_layer_size = unrolling_in_combination[1] - if unrolling_layer_dim in combination_check.keys(): - combination_check[unrolling_layer_dim] /= unrolling_layer_size - else: - # The unrolled layer dim does not exist in current layer. - # This only happens when the spatial mapping is user-defined, which - # contains non-existent layer dims in current layer. - pass - for layer_dim, layer_size in combination_check.items(): - if layer_size < 1: # the layer size/the unrolling size < 1 - # It means the unrolling size > the layer size, which is incorrect and impossible. - check_passed = False - break - if not check_passed: - continue user_spatial_mapping = { oa_dim_name: unrolling for (oa_dim_name, unrolling) in zip(oa_dim_names, combination) if unrolling is not None } + # Add act ir loop if it is weight stationary and the innermost memories serve for act. + if enable_weight_diagonal_mapping: + user_spatial_mapping = self.add_input_pr_spatial_loop_if_enabled( + layer=self.layer, + provided_user_spatial_mapping=user_spatial_mapping, + user_spatial_mapping_hint=user_spatial_mapping_hint, + innermost_levels=innermost_levels, + left_layer_dim_size=left_layer_dim_size, + enable_mix_spatial_mapping_generation=enable_mix_spatial_mapping_generation, + ) yield user_spatial_mapping yield_count += 1 # If yield_count==0, it means there is no legal spatial mapping found. @@ -235,13 +220,49 @@ def generate_user_spatial_mappings( yield_count > 0 ), "There is no legal spatial mapping found. Please make sure the provided spatial mappings do not exceed the layer dimension size." + def check_spatial_loop_legality(self, combination, layer): + # Extra check on the total unrolling size of a layer dim, if it is mapped on >=2 dimensions. + combination_check = { + layer_dim: layer_size + for layer_dim, layer_size in layer.loop_dim_size.items() + } + legal_spatial_loop = True # initialization + for unrolling_in_combination in combination: + if unrolling_in_combination is None: + continue + if self.is_nested_tuple(unrolling_in_combination): + for sub_unrolling_in_combination in unrolling_in_combination: + unrolling_layer_dim = sub_unrolling_in_combination[0] + unrolling_layer_size = sub_unrolling_in_combination[1] + if unrolling_layer_dim in combination_check.keys(): + combination_check[unrolling_layer_dim] /= unrolling_layer_size + else: + # The unrolled layer dim does not exist in current layer. + # This only happens when the spatial mapping is user-defined, which + # contains non-existent layer dims in current layer. + pass + else: + unrolling_layer_dim = unrolling_in_combination[0] + unrolling_layer_size = unrolling_in_combination[1] + if unrolling_layer_dim in combination_check.keys(): + combination_check[unrolling_layer_dim] /= unrolling_layer_size + else: + # The unrolled layer dim does not exist in current layer. + # This only happens when the spatial mapping is user-defined, which + # contains non-existent layer dims in current layer. + pass + for layer_dim, layer_size in combination_check.items(): + if layer_size < 1: # the layer size/the unrolling size < 1 + # It means the unrolling size > the layer size, which is incorrect and impossible. + legal_spatial_loop = False + break + return legal_spatial_loop, combination_check + def append_mix_spatial_unrollings( self, provided_oa_dim_unrollings, provided_oa_dim_unrolling_hints, oa_dim ): # Create and append new mix spatial unrollings to original oa_dim_unrollings # An example of mix: (("K",2), ("OX", 2)) - import math - oa_dim_unrollings = provided_oa_dim_unrollings oa_dim_unrolling_hints = provided_oa_dim_unrolling_hints if ( @@ -387,8 +408,6 @@ def sort_oa_dim_unrollings_in_the_order_of_utilization( # @param descending: # True -- the higher the mapping utilization is, the closer to the front it is. # False -- the lower the mapping utilization is, the closer to the front it is. - import math - oa_dim_unrollings = provided_oa_dim_unrollings if len(oa_dim_unrollings) > 1: # First we will record down the hardware utilization of each spatial unrolling in comb_value @@ -426,6 +445,309 @@ def sort_oa_dim_unrollings_in_the_order_of_utilization( hardware_utilization = None return oa_dim_unrollings, hardware_utilization + def add_input_pr_spatial_loop_if_enabled( + self, + layer, + provided_user_spatial_mapping, + user_spatial_mapping_hint, + innermost_levels, + left_layer_dim_size, + enable_mix_spatial_mapping_generation, + ): + # This function is used to support diagonal spatial mapping + # when input/activation is served in the innermost memories and the weight is stationary. + user_spatial_mapping = provided_user_spatial_mapping + # get the link from layer op to mem op + layer_op_to_mem_op: dict = layer.memory_operand_links + # check if it is weight stationary. + # keep the spatial loop as it was if it is not weight stationary. + if len(layer.constant_operands) > 1: + return user_spatial_mapping + # get weight operand name + const_operand = layer.constant_operands[0] # weight representation + # get activation operand name + act_operand = [ + operand for operand in layer.input_operands if operand != const_operand + ][0] + # get output operand name + output_operand = layer.output_operand + # get name of OX, OY (weight ir layer dims) + weight_ir_layer_dims: list = layer.operand_loop_dim[const_operand]["ir"] + # get the oa_dim name served by input / output innermost memory level + for memory_level in innermost_levels: + mem_ops = memory_level.operands + if layer_op_to_mem_op[act_operand] in mem_ops: + act_served_oa_dim: set = memory_level.served_dimensions + if layer_op_to_mem_op[output_operand] in mem_ops: + output_served_oa_dim: set = memory_level.served_dimensions + # check if act is not served in the innermost memories, or it is uti-casting for act. + # keep the spatial loop as it was if act is not served. + if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) == 0: + return user_spatial_mapping + + act_served_oa_dim_name = list(act_served_oa_dim)[0].name + output_served_oa_dim_name = list(output_served_oa_dim)[0].name + act_served_oa_dim_size = list(act_served_oa_dim)[0].size + output_served_oa_dim_size = list(output_served_oa_dim)[0].size + + # check if OX / OY in user_spatial_mapping_hint. Or else target_layer_dim will be empty. + target_layer_dim = [] # OX or OY or both + for layer_dim in weight_ir_layer_dims: + if layer_dim in user_spatial_mapping_hint[act_served_oa_dim_name]: + target_layer_dim.append(layer_dim) + + # no further execution if OX / OY unrolling is not in user_spatial_mapping_hint + if len(target_layer_dim) == 0: + return user_spatial_mapping + + ############################################ + # Get existed mapping size on act_served_oa_dim, which will be added with OX, OY later. + if ( + act_served_oa_dim_name in user_spatial_mapping.keys() + ): # there already is sm loop + sm_loop = user_spatial_mapping[act_served_oa_dim_name] + if self.is_nested_tuple(sm_loop): # a mix layer sm mapping + exist_act_loop_size = 1 + for element in sm_loop: + exist_act_loop_size *= element[1] + else: # a single layer sm mapping + exist_act_loop_size = sm_loop[1] + else: # there is no sm loop mapped on act served dim + exist_act_loop_size = 1 + + # Check if the existed mapping size is more than half of current oa dim size. + # If so, it means there is no space for extra mapping even with a size of 2. + # In that case, we will do nothing but return the orignal spatial mapping + if exist_act_loop_size * 2 > act_served_oa_dim_size: + return user_spatial_mapping + + # fetch pr loop pairs for activation, e.g. {"IX": ["OX", "FX"]} + act_pr_layer_dims: dict = layer.operand_loop_dim[act_operand]["pr"] + + # Next we get existed mapping size on output_served_oa_dim + # there are two classes of mapping: + # (1) ir mapping to weight, e.g. "C" + # (2) r mapping to weight, e.g. "FX", "FY" (kernel size) + + # We firstly create a dict for later recording down existed r mapping to weight + # it will be like: + # weight_r_loop = {"OX": {"FX": 1}, "OY": {"FY": 1}} + weight_r_loop: dict = {} # here we put a nested dict for recording + loops_name_for_kernel_size: list = [] + pr_sm_link: dict = ( + {} + ) # here we record down the link between pr loops, e.g. link["FX"]="OX" + + for weight_ir_layer_dim in weight_ir_layer_dims: + for [layer_dim1, layer_dim2] in act_pr_layer_dims.values(): + if weight_ir_layer_dim in [layer_dim1, layer_dim2]: + break + # as we are unsure in act_pr_layer_dims, it is [OX, FX] or [FX, OX], we consider two possibilities. + if layer_dim1 == weight_ir_layer_dim: # if the first one is OX / OY + weight_r_loop[layer_dim1] = {layer_dim2: 1} # 1 by default + loops_name_for_kernel_size.append(layer_dim2) + pr_sm_link[layer_dim2] = layer_dim1 + else: # layer_dim2 == weight_ir_layer_dim, the second one is OX / OY + weight_r_loop[layer_dim2] = {layer_dim1: 1} # 1 by default + loops_name_for_kernel_size.append(layer_dim1) + pr_sm_link[layer_dim1] = layer_dim2 + + # Next we will update the dict, and also find the mapping size (weight ir loop size) we do not care out. + weight_ir_loop_size = 1 # default value + sm_loop = user_spatial_mapping[output_served_oa_dim_name] + if self.is_nested_tuple(sm_loop): # a mix sm mapping + for element in sm_loop: + # save operation as above + layer_dim = element[0] + mapping_size = element[1] + if layer_dim in loops_name_for_kernel_size: # layer_dim in ["FX", "FY"] + paired_pr_layer_dim = pr_sm_link[ + layer_dim + ] # "FX" -> "OX", "FY" -> "OY" + weight_r_loop[paired_pr_layer_dim][layer_dim] *= mapping_size + else: # not care + weight_ir_loop_size *= mapping_size + else: # a single layer sm mapping + layer_dim = sm_loop[0] + mapping_size = sm_loop[1] + if layer_dim in loops_name_for_kernel_size: # layer_dim in ["FX", "FY"] + paired_pr_layer_dim = pr_sm_link[ + layer_dim + ] # "FX" -> "OX", "FY" -> "OY" + weight_r_loop[paired_pr_layer_dim][layer_dim] *= mapping_size + else: # not care + weight_ir_loop_size *= mapping_size + + # At this point, we already know what sm mapping existed. + ############################################ + + # Next we will try to add possible OX / OY mapping + # find all possible OX / OY mapping breakdown and put them in the pool + # It looks like: + # sm_pools = {"OX": [("OX",2),("OX",5),("OX",5)], "OY": [("OY",2),("OY",5),("OY",5)]} + sm_pools_to_add: dict = {} + for layer_dim in target_layer_dim: + layer_size = self.layer.loop_dim_size[layer_dim] + layer_size_breakdown: list = self.prime_factors(layer_size) + + # try to find the maximum OX / OY and add it to the list + # (1) check on act_served_oa_dim (ceil down to integer) + max_allowed_dim_size_on_act_served_dim = math.floor( + act_served_oa_dim_size / exist_act_loop_size + ) + # (2) check on output_served_oa_dim + existed_pr_mapping = list(weight_r_loop[layer_dim].values())[0] + for key in weight_r_loop.keys(): + if key != layer_dim: + ir_layer_dim_to_current_layer_dim = key + existed_pr_mapping_but_ir_to_current_layer_dim = list( + weight_r_loop[ir_layer_dim_to_current_layer_dim].values() + )[0] + max_allowed_dim_size_on_output_served_dim = ( + output_served_oa_dim_size + / weight_ir_loop_size + / existed_pr_mapping_but_ir_to_current_layer_dim + ) - (existed_pr_mapping - 1) + # ceil down to integer + max_allowed_dim_size_on_output_served_dim = math.floor( + max_allowed_dim_size_on_output_served_dim + ) + max_allowed_target_dim_size = min( + max_allowed_dim_size_on_act_served_dim, + max_allowed_dim_size_on_output_served_dim, + ) + # check whether the element in layer_size_breakdown is allowed to add + legal_layer_size_breakdown = [] + for factor in layer_size_breakdown: + if ( + factor <= max_allowed_target_dim_size + and factor <= left_layer_dim_size[layer_dim] + ): + legal_layer_size_breakdown.append(factor) + if len(legal_layer_size_breakdown) > 0: + sm_pools_to_add[layer_dim] = [ + tuple([layer_dim, size]) for size in legal_layer_size_breakdown + ] + + # check if there is anything in the pool + if len(sm_pools_to_add) == 0: + return user_spatial_mapping + + # Generate possible combination + # In the for loop below, we will first try only with OX or OY. Then with their combination. + # In the end, we will only keep the best one, which has the maximal value of OX*OY. + # If there are multiple combs having the same OX*OY, we will keep the first one, as their cost are the same. + best_comb = [] # list initialization + best_comb_size = 0 # reference value to find the best comb + target_layer_dim = [ + layer_dim + for layer_dim in target_layer_dim + if layer_dim in sm_pools_to_add.keys() + ] + if enable_mix_spatial_mapping_generation: + allowed_dim_comb_length = len(target_layer_dim) + else: + allowed_dim_comb_length = 1 + for dim_comb_length in range(1, allowed_dim_comb_length + 1): + for dim_comb in itertools.combinations(target_layer_dim, dim_comb_length): + # we will create a temporal pools for each dim combination + sm_pools_mix = [] + for layer_dim in dim_comb: + sm_pools_mix += sm_pools_to_add[layer_dim] + max_comb_length = len( + sm_pools_mix + ) # the max possible length of combination + for comb_length in range(1, max_comb_length + 1): + for comb in itertools.combinations(sm_pools_mix, comb_length): + # At this point, in comb, we have a possible OX / OY mapping + # First we get current comb size + # Example: comb_mapping = {"OX": 5, "OY", 10} + comb_mapping: dict = {} + for layer_dim in dim_comb: + comb_mapping[layer_dim] = 1 # default value + for element in comb: + layer_dim = element[0] + mapping_size = element[1] + comb_mapping[layer_dim] *= mapping_size + # Skip if current unrolling on a layer_dim is 1, which means it has been checked already. + curr_comb_already_checked = False + for unroll_size in comb_mapping.values(): + if unroll_size == 1: + curr_comb_already_checked = True + break + if curr_comb_already_checked: + continue + # We will check if this comb is possible + # (1) check on left_layer_dim_size + curr_comb_illegal = False + for unroll_dim, unroll_size in comb_mapping.items(): + if unroll_size > left_layer_dim_size[unroll_dim]: + curr_comb_illegal = True + break + if curr_comb_illegal: + continue + # (2) check on act_served_oa_dim + comb_size = math.prod([v for v in comb_mapping.values()]) + required_oa_dim_size = exist_act_loop_size * comb_size + if required_oa_dim_size > act_served_oa_dim_size: + continue # the comb is not possible on act_served_oa_dim + # (3) check on output_served_oa_dim + required_oa_dim_size = weight_ir_loop_size + for layer_dim in comb_mapping.keys(): + existed_pr_mapping = list( + weight_r_loop[layer_dim].values() + )[0] + pr_mapping_to_add = comb_mapping[layer_dim] + new_mapping_size = ( + existed_pr_mapping + pr_mapping_to_add - 1 + ) + required_oa_dim_size *= new_mapping_size + if len(comb_mapping) == 1: # only OX or OY + # add the other existed pr loop to required_oa_dim_size, + # because previously it is not counted in output_served_oa_dim_size. + sole_dim = list(comb_mapping.keys())[0] + the_other_pr_mapping_name = [ + key for key in weight_r_loop.keys() if key != sole_dim + ][0] + the_other_pr_mapping_size = list( + weight_r_loop[the_other_pr_mapping_name].values() + )[0] + required_oa_dim_size *= the_other_pr_mapping_size + if required_oa_dim_size > output_served_oa_dim_size: + continue # this comb is not possible on output_served_oa_dim + # (4) compare with best_comb + if comb_size > best_comb_size: + # reformat the comb and merge repetitive elements + # example: (("OX", 5), ("OY", 2)) + new_comb: list = [ + (layer_dim, mapping_size) + for (layer_dim, mapping_size) in comb_mapping.items() + ] + best_comb = new_comb + + # At this point, we get the best possible comb to add. Then we can add that to the current sm mapping + if len(best_comb) == 0: # did not find any comb + return user_spatial_mapping + else: + if ( + act_served_oa_dim_name in user_spatial_mapping.keys() + ): # there already is sm loop previously + act_served_mapping_to_change = user_spatial_mapping[ + act_served_oa_dim_name + ] + if self.is_nested_tuple( + act_served_mapping_to_change + ): # originally it is a mix mapping + reformed_sm = list(act_served_mapping_to_change) + best_comb + else: # originally it is a single layer mapping + reformed_sm = [act_served_mapping_to_change] + best_comb + else: # there is no sm loop on act served oa dim previously + reformed_sm = best_comb + reformed_sm = tuple(reformed_sm) + user_spatial_mapping[act_served_oa_dim_name] = reformed_sm + + return user_spatial_mapping + @staticmethod def all_unique(items): return len(set(items)) == len(items) diff --git a/zigzag/classes/stages/CostModelStage.py b/zigzag/classes/stages/CostModelStage.py index 941c7db0..ce2d135b 100644 --- a/zigzag/classes/stages/CostModelStage.py +++ b/zigzag/classes/stages/CostModelStage.py @@ -30,6 +30,7 @@ def __init__( accelerator, layer, spatial_mapping, + spatial_mapping_int, temporal_mapping, access_same_data_considered_as_no_access=True, **kwargs @@ -39,12 +40,14 @@ def __init__( self.accelerator, self.layer, self.spatial_mapping, + self.spatial_mapping_int, self.temporal_mapping, self.access_same_data_considered_as_no_access, ) = ( accelerator, layer, spatial_mapping, + spatial_mapping_int, temporal_mapping, access_same_data_considered_as_no_access, ) @@ -55,6 +58,7 @@ def run(self) -> Generator[Tuple[CostModelEvaluation, Any], None, None]: accelerator=self.accelerator, layer=self.layer, spatial_mapping=self.spatial_mapping, + spatial_mapping_int=self.spatial_mapping_int, temporal_mapping=self.temporal_mapping, # the below parameter is optional access_same_data_considered_as_no_access=self.access_same_data_considered_as_no_access, diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index 44cbf89f..6f89c9fc 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -52,7 +52,7 @@ def is_nested_tuple(obj): def run(self): user_spatial_mapping = self.layer.user_spatial_mapping - spatial_mapping = self.convert_user_spatial_mapping(user_spatial_mapping) + spatial_mapping, spatial_mapping_int = self.convert_user_spatial_mapping(user_spatial_mapping) # Since the spatial_mapping may be modified in the previous step, # we have to update this change to self.layer updated_user_spatial_mapping = {} @@ -75,6 +75,7 @@ def run(self): kwargs = self.kwargs.copy() kwargs["spatial_mapping"] = spatial_mapping + kwargs["spatial_mapping_int"] = spatial_mapping_int kwargs["accelerator"] = self.accelerator kwargs["layer"] = self.layer @@ -159,15 +160,75 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): f"User-provided spatial mapping converted to: {user_spatial_mapping_for_log}" ) + spatial_mapping_dict = self.generate_spatial_mapping_dict( + user_spatial_mapping=limited_user_spatial_mapping, + layer=self.layer, + accelerator=self.accelerator + ) + # The next spatial_mapping_dict is used in cost model to calculate the interval between different data transfer. + # Different with the one above, there must only be integer numbers (corresponding to the real cases) + spatial_mapping_dict_int = self.generate_spatial_mapping_dict( + user_spatial_mapping=user_spatial_mapping, + layer=self.layer, + accelerator=self.accelerator + ) + + return SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer + ), SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer + ) + + def generate_limited_user_spatial_mapping( + self, layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping + ): + ## Do check on spatial mapping, and convert the mapping to a tuple + (loop_dim_unrolled, loop_size_unrolled) = spatial_loop + # Check 0: Skip this spatial dimension if it doesn't exist in the layer + if loop_dim_unrolled not in layer_dim_sizes.keys(): + return None + # Check 1: Limit unrolling if operational array dimension is smaller than provided unrolling + oa_dim_size = next( + (oa_dim for oa_dim in oa_dims if oa_dim.name == oa_dim_name) + ).size + loop_size_unrolled = min(oa_dim_size, loop_size_unrolled) + # Check 2: Limit unrolling if layer dimension is smaller than provided unrolling or if the loop dim doesn't exist + layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) + loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) + # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size + # and if there is no more mapping for this layer dimension + no_more_mapping_for_current_layer_dim = self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping + ) + if no_more_mapping_for_current_layer_dim: + loop_size_unrolled_on_early_oa_dims = self.calc_unrolled_loop_size_on_early_oa_dims( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping + ) + temporal_remainder = int(np.ceil(layer_dim_size / (loop_size_unrolled*loop_size_unrolled_on_early_oa_dims))) + loop_size_unrolled = layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims + return ( + loop_dim_unrolled, + loop_size_unrolled, + ) + + def generate_spatial_mapping_dict( + self, user_spatial_mapping, layer, accelerator + ): + # This function is to convert spatial mapping to spatial_mapping_dict, + # which attaches spatial mapping to different memory levels. spatial_mapping_dict = {} - layer_to_mem_op = self.layer.memory_operand_links + layer_to_mem_op = layer.memory_operand_links mem_to_layer_op = { mem_op: layer_op for (layer_op, mem_op) in layer_to_mem_op.items() } - core_id = self.layer.core_allocation - mem_hierarchy = self.accelerator.get_core(core_id).memory_hierarchy + core_id = layer.core_allocation + mem_hierarchy = accelerator.get_core(core_id).memory_hierarchy for mem_op, layer_op in mem_to_layer_op.items(): - user_sm_copy = limited_user_spatial_mapping.copy() + user_sm_copy = user_spatial_mapping.copy() # layer_op = mem_to_layer_op[mem_op] spatial_mapping_dict[layer_op] = [] memory_levels = mem_hierarchy.get_memory_levels( @@ -192,8 +253,8 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): spatial_mapping_size, ) = sub_spatial_loop if ( - spatial_mapping_dim - in spatial_mapping_lvl_dict.keys() + spatial_mapping_dim + in spatial_mapping_lvl_dict.keys() ): spatial_mapping_lvl_dict[ spatial_mapping_dim @@ -216,8 +277,8 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): # as the spatial mapping representation is a level-by-level one. del user_sm_copy[dim_name] for ( - spatial_mapping_lvl_dict_dim, - spatial_mapping_lvl_dict_size, + spatial_mapping_lvl_dict_dim, + spatial_mapping_lvl_dict_size, ) in spatial_mapping_lvl_dict.items(): spatial_mapping_lvl.append( (spatial_mapping_lvl_dict_dim, spatial_mapping_lvl_dict_size) @@ -231,49 +292,10 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): spatial_loop for (dim_name, spatial_loop) in user_sm_copy.items() ] spatial_mapping_dict[layer_op].append(top_level_spatial_mapping) - - return SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer - ) - - def generate_limited_user_spatial_mapping( - self, layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping - ): - ## Do check on spatial mapping, and convert the mapping to a tuple - (loop_dim_unrolled, loop_size_unrolled) = spatial_loop - # Check 0: Skip this spatial dimension if it doesn't exist in the layer - if loop_dim_unrolled not in layer_dim_sizes.keys(): - return None - # Check 1: Limit unrolling if operational array dimension is smaller than provided unrolling - oa_dim_size = next( - (oa_dim for oa_dim in oa_dims if oa_dim.name == oa_dim_name) - ).size - loop_size_unrolled = min(oa_dim_size, loop_size_unrolled) - # Check 2: Limit unrolling if layer dimension is smaller than provided unrolling or if the loop dim doesn't exist - layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) - loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) - # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size - # and if there is no more mapping for this layer dimension - no_more_mapping_for_current_layer_dim = self.check_if_there_is_further_oa_mapping_for_current_layer_dim( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping - ) - if no_more_mapping_for_current_layer_dim: - loop_size_unrolled_on_early_oa_dims = self.calc_unrolled_loop_size_on_early_oa_dims( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping - ) - temporal_remainder = int(np.ceil(layer_dim_size / (loop_size_unrolled*loop_size_unrolled_on_early_oa_dims))) - loop_size_unrolled = layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims - return ( - loop_dim_unrolled, - loop_size_unrolled, - ) + return spatial_mapping_dict def check_if_there_is_further_oa_mapping_for_current_layer_dim( - self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping + self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping ): # For the case when there is layer dimension that is mapped on multiple oa dimensions. # We need to decide on which oa dimension to adjust the unrolling @@ -302,7 +324,7 @@ def check_if_there_is_further_oa_mapping_for_current_layer_dim( return no_more_mapping_for_current_layer_dim def calc_unrolled_loop_size_on_early_oa_dims( - self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping + self, oa_dim_name, loop_dim_unrolled, user_spatial_mapping ): # calculate the unrolled loop size for the specific layer dim on oa dims earlier than current oa dim loop_unrolled_size_already = 1 diff --git a/zigzag/classes/stages/SpatialMappingGeneratorStage.py b/zigzag/classes/stages/SpatialMappingGeneratorStage.py index 85b81719..8f7bf9fc 100644 --- a/zigzag/classes/stages/SpatialMappingGeneratorStage.py +++ b/zigzag/classes/stages/SpatialMappingGeneratorStage.py @@ -2,11 +2,14 @@ from zigzag.classes.opt.spatial.generator import UserSpatialMappingGenerator from zigzag.classes.hardware.architecture.core import Core +from zigzag.classes.hardware.architecture.accelerator import Accelerator +from zigzag.classes.hardware.architecture.memory_hierarchy import MemoryHierarchy from zigzag.classes.stages.Stage import Stage from zigzag.classes.stages.SpatialMappingConversionStage import ( SpatialMappingConversionStage, ) import copy +from zigzag.utils import pickle_deepcopy logger = logging.getLogger(__name__) @@ -33,6 +36,7 @@ def __init__( layer, enable_mix_spatial_mapping_generation=False, maximize_hardware_utilization=True, + enable_weight_diagonal_mapping=False, **kwargs, ): super().__init__(list_of_callables, **kwargs) @@ -43,6 +47,7 @@ def __init__( enable_mix_spatial_mapping_generation ) self.maximize_hardware_utilization = maximize_hardware_utilization + self.enable_weight_diagonal_mapping = enable_weight_diagonal_mapping @staticmethod # Check that the layer includes: @@ -74,9 +79,11 @@ def run(self): user_provided_spatial_mappings, dict ): # There is a single USM provided if len(user_provided_spatial_mappings) < len(oa_dims): - self.layer.user_spatial_mapping_hint = self.complete_user_spatial_mapping_hint( - user_spatial_mapping_hint=user_spatial_mapping_hint, - oa_dims=oa_dims + self.layer.user_spatial_mapping_hint = ( + self.complete_user_spatial_mapping_hint( + user_spatial_mapping_hint=user_spatial_mapping_hint, + oa_dims=oa_dims, + ) ) user_spatial_mapping_generator = UserSpatialMappingGenerator( layer=self.layer, @@ -96,9 +103,10 @@ def run(self): ): # There are multiple USMs provided user_spatial_mappings = user_provided_spatial_mappings else: # There is no USM provided - self.layer.user_spatial_mapping_hint = self.complete_user_spatial_mapping_hint( - user_spatial_mapping_hint=user_spatial_mapping_hint, - oa_dims=oa_dims + self.layer.user_spatial_mapping_hint = ( + self.complete_user_spatial_mapping_hint( + user_spatial_mapping_hint=user_spatial_mapping_hint, oa_dims=oa_dims + ) ) # Initialize the UserSpatialMappingGenerator which will automatically generate SMs user_spatial_mapping_generator = UserSpatialMappingGenerator( @@ -106,6 +114,7 @@ def run(self): accelerator=self.accelerator, enable_mix_spatial_mapping_generation=self.enable_mix_spatial_mapping_generation, maximize_hardware_utilization=self.maximize_hardware_utilization, + enable_weight_diagonal_mapping=self.enable_weight_diagonal_mapping, ) # Get all the USMs by running the generator user_spatial_mappings = list( @@ -122,18 +131,35 @@ def run(self): self.layer.user_spatial_mapping = user_spatial_mapping # Note: manual instantiation of spatial mapping conversion stage here. We let that class deal with # everything else, including instantion of the actual substages - spatial_mapping_conversion_stage = SpatialMappingConversionStage( - self.list_of_callables, - accelerator=self.accelerator, - layer=copy.copy(self.layer), - **self.kwargs, - ) + + # Modify the size of lower input mem to support weight diagonal spatial unrolling (for OX/OY) + if self.enable_weight_diagonal_mapping: + ( + input_mem_size_updated, + new_accelerator, + ) = self.modify_innermost_input_mem_size(core_id, user_spatial_mapping) + if self.enable_weight_diagonal_mapping and input_mem_size_updated: + original_accelerator = self.accelerator + spatial_mapping_conversion_stage = SpatialMappingConversionStage( + self.list_of_callables, + accelerator=new_accelerator, + layer=copy.copy(self.layer), + **self.kwargs, + ) + else: + spatial_mapping_conversion_stage = SpatialMappingConversionStage( + self.list_of_callables, + accelerator=self.accelerator, + layer=copy.copy(self.layer), + **self.kwargs, + ) for cme, extra_info in spatial_mapping_conversion_stage.run(): + if self.enable_weight_diagonal_mapping and input_mem_size_updated: + # recover back the accelerator if its mem size is adjusted before + cme.accelerator = original_accelerator yield cme, (user_spatial_mapping, extra_info) - def complete_user_spatial_mapping_hint( - self, user_spatial_mapping_hint, oa_dims - ): + def complete_user_spatial_mapping_hint(self, user_spatial_mapping_hint, oa_dims): # This function is to create user_spatial_mapping_hint when it is not provided # or complete it if it is provided but on only part of oa dimensions. complete_user_spatial_mapping_hint = user_spatial_mapping_hint @@ -156,4 +182,126 @@ def complete_user_spatial_mapping_hint( layer_dim for layer_dim in self.layer.loop_dim_list ] # self.layer.user_spatial_mapping_hint = user_spatial_mapping_hint - return complete_user_spatial_mapping_hint \ No newline at end of file + return complete_user_spatial_mapping_hint + + def modify_innermost_input_mem_size(self, core_id, user_spatial_mapping): + # To support OX, OY unrolling, we will scale the lowest input mem size by OXu*OYu + # to avoid the MemoryTooSmallException in loma stage. + input_mem_size_updated = ( + False # flag to indicate if the accelerator is modified. + ) + core = self.accelerator.get_core(core_id=core_id) + operational_array = core.operational_array + oa_dims = operational_array.dimensions + memory_hierarchy = copy.deepcopy(core.memory_hierarchy) + innermost_levels = memory_hierarchy.get_inner_memories() + # get the link from layer op to mem op + layer_op_to_mem_op: dict = self.layer.memory_operand_links + # check if it is weight stationary. + # keep the spatial loop as it was if it is not weight stationary. + if len(self.layer.constant_operands) > 1: + return input_mem_size_updated, self.accelerator + # get weight operand name + const_operand = self.layer.constant_operands[0] # weight representation + # get activation operand name + act_operand = [ + operand for operand in self.layer.input_operands if operand != const_operand + ][0] + # get name of OX, OY (weight ir layer dims) + weight_ir_layer_dims: list = self.layer.operand_loop_dim[const_operand]["ir"] + # get the oa_dim name served by input innermost memory level + for memory_level in innermost_levels: + mem_ops = memory_level.operands + if layer_op_to_mem_op[act_operand] in mem_ops: + act_innermost_mem_level = memory_level + act_served_oa_dim: set = memory_level.served_dimensions + act_served_oa_dim_name = list(act_served_oa_dim)[0].name + # check if act is not served in the innermost memories, or it is uti-casting for act. + # keep the spatial loop as it was if act is not served. + if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) == 0: + return input_mem_size_updated, self.accelerator + # get the mem scaling factor if OX, OY exist + mem_scaling_factor = 1 + if ( + act_served_oa_dim_name not in user_spatial_mapping.keys() + ): # there is no sm loop + pass + else: # there is sm loop on act served oa dim + act_served_oa_mapping = user_spatial_mapping[act_served_oa_dim_name] + if self.is_nested_tuple( + act_served_oa_mapping + ): # a mix sm mapping, e.g. (("K", 2), ("OX", 5)) + for element in act_served_oa_mapping: + layer_dim = element[0] + if layer_dim in weight_ir_layer_dims: + layer_size = element[1] + mem_scaling_factor *= layer_size + else: # a single layer dim mapping + layer_dim = act_served_oa_mapping[0] + if layer_dim in weight_ir_layer_dims: + layer_size = act_served_oa_mapping[1] + mem_scaling_factor *= layer_size + # scale the mem size + if mem_scaling_factor == 1: + # No need to change the input mem size + return input_mem_size_updated, self.accelerator + else: + input_mem_size_updated = True + # Initialize the new memory hierarchy + mh_name = memory_hierarchy.name + new_mh_name = mh_name + "-supporting-diagonal-map" + new_memory_hierarchy = MemoryHierarchy(operational_array, new_mh_name) + # Add memories to the new memory hierarchy with the correct attributes + for curr_mem_level, memory_level in enumerate( + memory_hierarchy.mem_level_list + ): + memory_instance = memory_level.memory_instance + if memory_level == act_innermost_mem_level: + memory_instance.size *= mem_scaling_factor # scale here. For others, keep them unchanged. + operands = tuple(memory_level.operands) + port_alloc = memory_level.port_alloc_raw + served_dimensions_vec = memory_level.served_dimensions_vec + assert len(served_dimensions_vec) >= 1 + served_dimensions = served_dimensions_vec[0] + + new_memory_instance = pickle_deepcopy(memory_instance) + new_operands = pickle_deepcopy(operands) + new_port_alloc = pickle_deepcopy(port_alloc) + new_served_dimensions = pickle_deepcopy(served_dimensions) + new_memory_hierarchy.add_memory( + memory_instance=new_memory_instance, + operands=new_operands, + port_alloc=new_port_alloc, + served_dimensions=new_served_dimensions, + ) + # Create the new core + id = core.id + dataflows = core.dataflows + new_id = id + new_dataflows = pickle_deepcopy(dataflows) + + new_core = Core( + id=new_id, + operational_array=operational_array, + memory_hierarchy=new_memory_hierarchy, + dataflows=new_dataflows, + ) + + # Create the new accelerator + name = self.accelerator.name + new_name = name + "-supporting-diagonal-map" + new_cores = {new_core} + new_accelerator = Accelerator( + name=new_name, + core_set=new_cores, + ) + return input_mem_size_updated, new_accelerator + + @staticmethod + def is_nested_tuple(obj): + if isinstance(obj, tuple): + for item in obj: + if isinstance(item, tuple): + # If any item within the tuple is itself a tuple, it's a nested tuple + return True + return False From 1f24d5636d934d50ba0f2f160d1a0b827f2e3f74 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Fri, 10 Nov 2023 20:20:06 +0100 Subject: [PATCH 07/14] delete debug.py --- debug.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 debug.py diff --git a/debug.py b/debug.py deleted file mode 100644 index 918bf98a..00000000 --- a/debug.py +++ /dev/null @@ -1,26 +0,0 @@ -from zigzag.api import get_hardware_performance_zigzag - -opt = 'EDP' -model = "alexnet" -onnx_model_path = f"zigzag/inputs/examples/workload/{model}.onnx" -workload = onnx_model_path - -hwarchs = ["Edge_TPU_like", "Ascend_like", "Eyeriss_like", "Meta_prototype", "Tesla_NPU_like", "TPU_like"] - -for hwarch in hwarchs: - - mapping = f"zigzag.inputs.examples.mapping.default" - accelerator = f"zigzag.inputs.examples.hardware.{hwarch}" - - dump_filename_pattern=f"outputs/{hwarch}-{model}-layer_?.json" - pickle_filename = f"outputs/{hwarch}-{model}-saved_list_of_cmes.pickle" - - energy, latency, cme = get_hardware_performance_zigzag(workload=workload, - accelerator=accelerator, - mapping=mapping, - opt=opt, - dump_filename_pattern=dump_filename_pattern, - pickle_filename=pickle_filename) - print(f"Total network energy = {energy:.2e} pJ") - print(f"Total network latency = {latency:.2e} cycles") - print(f"Total edp = {energy*latency:.2e} pJ*cycles") \ No newline at end of file From 9b481bba991c98224996cd5f94e121570344220f Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Sat, 11 Nov 2023 19:55:03 +0100 Subject: [PATCH 08/14] Update the check condition when applying weight diagonal mapping. Now the check condition is more strict. --- zigzag/classes/opt/spatial/generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index c3e35582..6d4f4a84 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -480,9 +480,11 @@ def add_input_pr_spatial_loop_if_enabled( act_served_oa_dim: set = memory_level.served_dimensions if layer_op_to_mem_op[output_operand] in mem_ops: output_served_oa_dim: set = memory_level.served_dimensions - # check if act is not served in the innermost memories, or it is uti-casting for act. + # check if act is not served in the innermost memories, or act/output is not multicasting on only one dimension. # keep the spatial loop as it was if act is not served. - if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) == 0: + if "act_served_oa_dim" not in locals() or len(act_served_oa_dim) != 1: + return user_spatial_mapping + if "output_served_oa_dim" not in locals() or len(output_served_oa_dim) != 1: return user_spatial_mapping act_served_oa_dim_name = list(act_served_oa_dim)[0].name From 18c39d2838458fd55de2a946a1abec245ea931c8 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Sun, 12 Nov 2023 19:18:36 +0100 Subject: [PATCH 09/14] Fix the issue that sometimes there is no sm loop yielded when a layer dim is mapped on multiple hardware dims --- zigzag/classes/opt/spatial/generator.py | 97 ++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index 6d4f4a84..f5d338c8 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -182,8 +182,9 @@ def generate_user_spatial_mappings( # Now we have to combine them into user-defined spatial mappings. # record down the number of yield yield_count = 0 + yield_count_limit = 2 # used to control the yield count when maximize_hardware_utilization == True for combination in itertools.product(*unrollings): - if maximize_hardware_utilization and yield_count >= 2: + if maximize_hardware_utilization and yield_count >= yield_count_limit: # 2 means: only check the top 2 spatial mapping with the highest hardware utilization # Modify "2" to other numbers if you want to check on more spatial mappings. break @@ -214,12 +215,104 @@ def generate_user_spatial_mappings( yield user_spatial_mapping yield_count += 1 # If yield_count==0, it means there is no legal spatial mapping found. - # The reason is that the spatial mapping provided by the user has exceeded the layer dim size, + # One reason is that the spatial mapping provided by the user has exceeded the layer dim size, # therefore the loop cannot pass the check. + # The other reason could be: there is a layer dim mapped on multiple oa dims, + # so the product has exceeded the layer dim size. + # For a quick fix on the second cause, we will reform the sm loop only for single layer dim mapping. + if yield_count == 0: + for combination in itertools.product(*unrollings): + is_mix_comb = False + for loop in combination: + if self.is_nested_tuple(loop): + is_mix_comb = True + continue + if is_mix_comb: + # The fix is not applied for mix sm loop. + continue + if maximize_hardware_utilization and yield_count >= yield_count_limit: + # 2 means: only check the top 2 spatial mapping with the highest hardware utilization + # Modify "2" to other numbers if you want to check on more spatial mappings. + break + new_combination, left_layer_dim_size = self.shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( + combination=combination, + layer=self.layer, + ) + # Zip the combination (which is a (layer_dim, layer_size) for each oa_dim with the oa_dim names. + oa_dim_names = [oa_dim.name for oa_dim in oa_dims] + + user_spatial_mapping = { + oa_dim_name: unrolling + for (oa_dim_name, unrolling) in zip(oa_dim_names, new_combination) + if unrolling is not None + } + # Add act ir loop if it is weight stationary and the innermost memories serve for act. + if enable_weight_diagonal_mapping: + user_spatial_mapping = self.add_input_pr_spatial_loop_if_enabled( + layer=self.layer, + provided_user_spatial_mapping=user_spatial_mapping, + user_spatial_mapping_hint=user_spatial_mapping_hint, + innermost_levels=innermost_levels, + left_layer_dim_size=left_layer_dim_size, + enable_mix_spatial_mapping_generation=enable_mix_spatial_mapping_generation, + ) + yield user_spatial_mapping + yield_count += 1 + assert ( yield_count > 0 ), "There is no legal spatial mapping found. Please make sure the provided spatial mappings do not exceed the layer dimension size." + def shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( + self, combination, layer + ): + new_combination = combination + legal_spatial_loop, left_layer_dim_size = self.check_spatial_loop_legality( + combination=new_combination, layer=layer + ) + while not legal_spatial_loop: + new_combination_next = list(new_combination) + for layer_dim, layer_dim_size in left_layer_dim_size.items(): + if layer_dim_size < 1: + scaled_success = False + for oa_index in range(len(new_combination_next)-1, -1, -1): # reverse order on oa dims + (mapped_layer_dim, mapped_layer_dim_size) = new_combination_next[oa_index] + if mapped_layer_dim_size > 1: + # shrink the mapped layer dim size + mapped_layer_dim_size -= 1 + new_combination_next[oa_index] = (mapped_layer_dim, mapped_layer_dim_size) + scaled_success = True + break + else: + # because a layer can be mapped on multiple oa dims, we will move to the next oa dim. + pass + # assert: if not scaled_success, + # it means the sm loop cannot pass the check, even though all mapped size on this layer dim is 1 + assert scaled_success, \ + f"The spatial loop cannot meet the current hardware dimension after scaling, " \ + f"Current spatial loop: {new_combination}" + new_combination_next = tuple(new_combination_next) + # Next we will judge if new_combination_next is a legal loop + # If it is, then we will keep the current combination, rather than new_combination_next, + # the reason is: new_combination can cover the entire layer dim, but new_combination_next is smaller than + # the layer dim, therefore the actual sm loop for the layer dim is a decimal number. + # In that case, we will ceil it up to mimic the real case on hardware. + legal_spatial_loop, left_layer_dim_size_next = self.check_spatial_loop_legality( + combination=new_combination_next, layer=layer + ) + if not legal_spatial_loop: + new_combination = new_combination_next + left_layer_dim_size = left_layer_dim_size_next + else: + for layer_dim, layer_dim_size in left_layer_dim_size.items(): + # A special case when we will use new_combination_next when legal_spatial_loop == True + # This case is when new_combination_next exactly match the layer dim size (left size == 1) + if layer_dim_size < 1 and left_layer_dim_size_next[layer_dim] == 1: + new_combination = new_combination_next + left_layer_dim_size = left_layer_dim_size_next + break + return new_combination, left_layer_dim_size + def check_spatial_loop_legality(self, combination, layer): # Extra check on the total unrolling size of a layer dim, if it is mapped on >=2 dimensions. combination_check = { From 20f541c719564da69476dc5efd0fb8d6042fcd33 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Mon, 13 Nov 2023 14:31:12 +0100 Subject: [PATCH 10/14] fix typo in cost models for pure pe and imc --- tests/main/test_origin/test_ascend_like.py | 2 +- .../test_origin/test_meta_prototype_like.py | 2 +- tests/main/test_origin/test_tesla_npu_like.py | 8 +- tests/main/test_origin/test_tpu_like.py | 4 +- .../test_ascend_like.py | 2 +- .../test_ascend_like.py | 2 +- .../test_tesla_npu_like.py | 6 +- .../test_tpu_like.py | 4 +- zigzag/classes/cost_model/cost_model.py | 2 +- zigzag/classes/opt/spatial/generator.py | 29 +++- .../stages/SpatialMappingConversionStage.py | 142 +++++++++++++----- 11 files changed, 144 insertions(+), 59 deletions(-) diff --git a/tests/main/test_origin/test_ascend_like.py b/tests/main/test_origin/test_ascend_like.py index 248c6d0c..f92e9788 100644 --- a/tests/main/test_origin/test_ascend_like.py +++ b/tests/main/test_origin/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5738192980.375, 8728331), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7426499), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7439255), "zigzag/inputs/examples/workload/resnet18.onnx": (1858697886.165, 3720129), "zigzag.inputs.examples.workload.resnet18": (2408671233.7250004, 4804196), } diff --git a/tests/main/test_origin/test_meta_prototype_like.py b/tests/main/test_origin/test_meta_prototype_like.py index e4299fba..4d8f397f 100644 --- a/tests/main/test_origin/test_meta_prototype_like.py +++ b/tests/main/test_origin/test_meta_prototype_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5771558839.89, 8400651), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3594631), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3606391), "zigzag/inputs/examples/workload/resnet18.onnx": (1869519792.3449998, 3408373), "zigzag.inputs.examples.workload.resnet18": (2419893343.4549994, 4176163), } diff --git a/tests/main/test_origin/test_tesla_npu_like.py b/tests/main/test_origin/test_tesla_npu_like.py index 11a53097..f8a98a2c 100644 --- a/tests/main/test_origin/test_tesla_npu_like.py +++ b/tests/main/test_origin/test_tesla_npu_like.py @@ -11,10 +11,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8486444), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2909436), - "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3395752), - "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4082454), + "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8496179), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2964784), + "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3410738), + "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4096544), } diff --git a/tests/main/test_origin/test_tpu_like.py b/tests/main/test_origin/test_tpu_like.py index a2ca227f..d59700e6 100644 --- a/tests/main/test_origin/test_tpu_like.py +++ b/tests/main/test_origin/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9078209), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23112606), + "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9080913), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23131716), "zigzag/inputs/examples/workload/resnet18.onnx": (1795904779.6570003, 4160591), "zigzag.inputs.examples.workload.resnet18": (2296491401.491, 4909027), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py index 182a872f..fcc09fa1 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py @@ -14,7 +14,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5667407342.66, 8528846), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3828967), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3835435), "zigzag/inputs/examples/workload/resnet18.onnx": (1679218425.5100002, 3713386), "zigzag.inputs.examples.workload.resnet18": (2290766279.31, 4442443), } diff --git a/tests/main/test_without_unused_memory/test_ascend_like.py b/tests/main/test_without_unused_memory/test_ascend_like.py index 4eee129a..b6fc7a72 100644 --- a/tests/main/test_without_unused_memory/test_ascend_like.py +++ b/tests/main/test_without_unused_memory/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6499441), "zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047), "zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130), } diff --git a/tests/main/test_without_unused_memory/test_tesla_npu_like.py b/tests/main/test_without_unused_memory/test_tesla_npu_like.py index 25eb9648..3ccaafb2 100644 --- a/tests/main/test_without_unused_memory/test_tesla_npu_like.py +++ b/tests/main/test_without_unused_memory/test_tesla_npu_like.py @@ -12,9 +12,9 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457), - "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898), - "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1969009), + "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3267252), + "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3943074), } diff --git a/tests/main/test_without_unused_memory/test_tpu_like.py b/tests/main/test_without_unused_memory/test_tpu_like.py index 28df3fa1..ae1fe912 100644 --- a/tests/main/test_without_unused_memory/test_tpu_like.py +++ b/tests/main/test_without_unused_memory/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214), + "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8981556), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873319), "zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289), "zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235), } diff --git a/zigzag/classes/cost_model/cost_model.py b/zigzag/classes/cost_model/cost_model.py index d4296170..5369dfd1 100644 --- a/zigzag/classes/cost_model/cost_model.py +++ b/zigzag/classes/cost_model/cost_model.py @@ -251,7 +251,7 @@ def __init__( # self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int( # self.spatial_mapping.mapping_dict_origin # ) - self.spatial_mapping_dict_int = self.spatial_mapping_int + self.spatial_mapping_dict_int = self.spatial_mapping_int.mapping_dict_origin # For constructing Mapping object, the last parameter "self.access_same_data_considered_as_no_access" is optional self.mapping = Mapping( diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index f5d338c8..cf7d74c6 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -234,7 +234,10 @@ def generate_user_spatial_mappings( # 2 means: only check the top 2 spatial mapping with the highest hardware utilization # Modify "2" to other numbers if you want to check on more spatial mappings. break - new_combination, left_layer_dim_size = self.shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( + ( + new_combination, + left_layer_dim_size, + ) = self.shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( combination=combination, layer=self.layer, ) @@ -275,12 +278,20 @@ def shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( for layer_dim, layer_dim_size in left_layer_dim_size.items(): if layer_dim_size < 1: scaled_success = False - for oa_index in range(len(new_combination_next)-1, -1, -1): # reverse order on oa dims - (mapped_layer_dim, mapped_layer_dim_size) = new_combination_next[oa_index] + for oa_index in range( + len(new_combination_next) - 1, -1, -1 + ): # reverse order on oa dims + ( + mapped_layer_dim, + mapped_layer_dim_size, + ) = new_combination_next[oa_index] if mapped_layer_dim_size > 1: # shrink the mapped layer dim size mapped_layer_dim_size -= 1 - new_combination_next[oa_index] = (mapped_layer_dim, mapped_layer_dim_size) + new_combination_next[oa_index] = ( + mapped_layer_dim, + mapped_layer_dim_size, + ) scaled_success = True break else: @@ -288,16 +299,20 @@ def shrink_combination_when_a_layer_dim_is_mapped_on_multiple_oa_dims( pass # assert: if not scaled_success, # it means the sm loop cannot pass the check, even though all mapped size on this layer dim is 1 - assert scaled_success, \ - f"The spatial loop cannot meet the current hardware dimension after scaling, " \ + assert scaled_success, ( + f"The spatial loop cannot meet the current hardware dimension after scaling, " f"Current spatial loop: {new_combination}" + ) new_combination_next = tuple(new_combination_next) # Next we will judge if new_combination_next is a legal loop # If it is, then we will keep the current combination, rather than new_combination_next, # the reason is: new_combination can cover the entire layer dim, but new_combination_next is smaller than # the layer dim, therefore the actual sm loop for the layer dim is a decimal number. # In that case, we will ceil it up to mimic the real case on hardware. - legal_spatial_loop, left_layer_dim_size_next = self.check_spatial_loop_legality( + ( + legal_spatial_loop, + left_layer_dim_size_next, + ) = self.check_spatial_loop_legality( combination=new_combination_next, layer=layer ) if not legal_spatial_loop: diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index 6f89c9fc..f995df88 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -52,7 +52,9 @@ def is_nested_tuple(obj): def run(self): user_spatial_mapping = self.layer.user_spatial_mapping - spatial_mapping, spatial_mapping_int = self.convert_user_spatial_mapping(user_spatial_mapping) + spatial_mapping, spatial_mapping_int = self.convert_user_spatial_mapping( + user_spatial_mapping + ) # Since the spatial_mapping may be modified in the previous step, # we have to update this change to self.layer updated_user_spatial_mapping = {} @@ -105,13 +107,29 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dims = core.operational_array.dimensions layer_dim_sizes = self.layer.loop_dim_size.copy() limited_user_spatial_mapping = {} # init dict we will be filling + limited_user_spatial_mapping_int = {} # init dict int we will be filling for oa_dim_name, spatial_loop in user_spatial_mapping.items(): if self.is_nested_tuple(spatial_loop): # mix sm loop limited_mix_user_spatial_mapping_on_dim = [] + limited_mix_user_spatial_mapping_int_on_dim = [] for spatial_loop_element in spatial_loop: limited_user_spatial_mapping_to_check = ( self.generate_limited_user_spatial_mapping( - layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop_element, user_spatial_mapping + layer_dim_sizes, + oa_dims, + oa_dim_name, + spatial_loop_element, + user_spatial_mapping, + ) + ) + limited_user_spatial_mapping_int_to_check = ( + self.generate_limited_user_spatial_mapping( + layer_dim_sizes, + oa_dims, + oa_dim_name, + spatial_loop_element, + user_spatial_mapping, + False, ) ) if limited_user_spatial_mapping_to_check == None: @@ -120,19 +138,42 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): limited_mix_user_spatial_mapping_on_dim.append( limited_user_spatial_mapping_to_check ) + limited_mix_user_spatial_mapping_int_on_dim.append( + limited_user_spatial_mapping_int_to_check + ) if len(limited_mix_user_spatial_mapping_on_dim) == 0: continue # Skip this spatial dimension if the defined dims in sm don't exist in the layer else: limited_mix_user_spatial_mapping_on_dim = tuple( limited_mix_user_spatial_mapping_on_dim ) + limited_mix_user_spatial_mapping_int_on_dim = tuple( + limited_mix_user_spatial_mapping_int_on_dim + ) limited_user_spatial_mapping[ oa_dim_name ] = limited_mix_user_spatial_mapping_on_dim + limited_user_spatial_mapping_int[ + oa_dim_name + ] = limited_mix_user_spatial_mapping_int_on_dim else: # single-dim sm loop limited_user_spatial_mapping_to_check = ( self.generate_limited_user_spatial_mapping( - layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping + layer_dim_sizes, + oa_dims, + oa_dim_name, + spatial_loop, + user_spatial_mapping, + ) + ) + limited_user_spatial_mapping_int_to_check = ( + self.generate_limited_user_spatial_mapping( + layer_dim_sizes, + oa_dims, + oa_dim_name, + spatial_loop, + user_spatial_mapping, + False, ) ) if limited_user_spatial_mapping_to_check == None: @@ -141,6 +182,9 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): limited_user_spatial_mapping[ oa_dim_name ] = limited_user_spatial_mapping_to_check + limited_user_spatial_mapping_int[ + oa_dim_name + ] = limited_user_spatial_mapping_int_to_check # Update the layer_dim_size to support multiple oa dims unrolling the same loop dim but not unrolling it more than the total layer dim # if ( # temporal_remainder == 1 @@ -163,14 +207,14 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): spatial_mapping_dict = self.generate_spatial_mapping_dict( user_spatial_mapping=limited_user_spatial_mapping, layer=self.layer, - accelerator=self.accelerator + accelerator=self.accelerator, ) # The next spatial_mapping_dict is used in cost model to calculate the interval between different data transfer. - # Different with the one above, there must only be integer numbers (corresponding to the real cases) + # Different with the one above, there are only integer numbers (corresponding to the real cases) spatial_mapping_dict_int = self.generate_spatial_mapping_dict( - user_spatial_mapping=user_spatial_mapping, + user_spatial_mapping=limited_user_spatial_mapping_int, layer=self.layer, - accelerator=self.accelerator + accelerator=self.accelerator, ) return SpatialMapping( @@ -180,7 +224,13 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): ) def generate_limited_user_spatial_mapping( - self, layer_dim_sizes, oa_dims, oa_dim_name, spatial_loop, user_spatial_mapping + self, + layer_dim_sizes, + oa_dims, + oa_dim_name, + spatial_loop, + user_spatial_mapping, + check_3=True, ): ## Do check on spatial mapping, and convert the mapping to a tuple (loop_dim_unrolled, loop_size_unrolled) = spatial_loop @@ -195,29 +245,41 @@ def generate_limited_user_spatial_mapping( # Check 2: Limit unrolling if layer dimension is smaller than provided unrolling or if the loop dim doesn't exist layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) - # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size - # and if there is no more mapping for this layer dimension - no_more_mapping_for_current_layer_dim = self.check_if_there_is_further_oa_mapping_for_current_layer_dim( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping - ) - if no_more_mapping_for_current_layer_dim: - loop_size_unrolled_on_early_oa_dims = self.calc_unrolled_loop_size_on_early_oa_dims( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping + if check_3: + # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size + # and if there is no more mapping for this layer dimension + no_more_mapping_for_current_layer_dim = ( + self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping, + ) ) - temporal_remainder = int(np.ceil(layer_dim_size / (loop_size_unrolled*loop_size_unrolled_on_early_oa_dims))) - loop_size_unrolled = layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims + if no_more_mapping_for_current_layer_dim: + loop_size_unrolled_on_early_oa_dims = ( + self.calc_unrolled_loop_size_on_early_oa_dims( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping, + ) + ) + temporal_remainder = int( + np.ceil( + layer_dim_size + / (loop_size_unrolled * loop_size_unrolled_on_early_oa_dims) + ) + ) + loop_size_unrolled = ( + layer_dim_size + / temporal_remainder + / loop_size_unrolled_on_early_oa_dims + ) return ( loop_dim_unrolled, loop_size_unrolled, ) - def generate_spatial_mapping_dict( - self, user_spatial_mapping, layer, accelerator - ): + def generate_spatial_mapping_dict(self, user_spatial_mapping, layer, accelerator): # This function is to convert spatial mapping to spatial_mapping_dict, # which attaches spatial mapping to different memory levels. spatial_mapping_dict = {} @@ -253,8 +315,8 @@ def generate_spatial_mapping_dict( spatial_mapping_size, ) = sub_spatial_loop if ( - spatial_mapping_dim - in spatial_mapping_lvl_dict.keys() + spatial_mapping_dim + in spatial_mapping_lvl_dict.keys() ): spatial_mapping_lvl_dict[ spatial_mapping_dim @@ -277,8 +339,8 @@ def generate_spatial_mapping_dict( # as the spatial mapping representation is a level-by-level one. del user_sm_copy[dim_name] for ( - spatial_mapping_lvl_dict_dim, - spatial_mapping_lvl_dict_size, + spatial_mapping_lvl_dict_dim, + spatial_mapping_lvl_dict_size, ) in spatial_mapping_lvl_dict.items(): spatial_mapping_lvl.append( (spatial_mapping_lvl_dict_dim, spatial_mapping_lvl_dict_size) @@ -319,7 +381,9 @@ def check_if_there_is_further_oa_mapping_for_current_layer_dim( loop_dim_unrolled_private = spatial_loop_private[0] if loop_dim_unrolled == loop_dim_unrolled_private: no_more_mapping_for_current_layer_dim = False - if not no_more_mapping_for_current_layer_dim: # early exit if the flag is already False + if ( + not no_more_mapping_for_current_layer_dim + ): # early exit if the flag is already False break return no_more_mapping_for_current_layer_dim @@ -332,12 +396,18 @@ def calc_unrolled_loop_size_on_early_oa_dims( if oa_dim_name == oa_dim_name_private: break if self.is_nested_tuple(spatial_loop_private): # mix sm loop - for spatial_loop_element in spatial_loop_private: - (loop_dim_unrolled_private, loop_size_unrolled_private) = spatial_loop_element - if loop_dim_unrolled == loop_dim_unrolled_private: - loop_unrolled_size_already *= loop_size_unrolled_private + for spatial_loop_element in spatial_loop_private: + ( + loop_dim_unrolled_private, + loop_size_unrolled_private, + ) = spatial_loop_element + if loop_dim_unrolled == loop_dim_unrolled_private: + loop_unrolled_size_already *= loop_size_unrolled_private else: - (loop_dim_unrolled_private, loop_size_unrolled_private) = spatial_loop_private + ( + loop_dim_unrolled_private, + loop_size_unrolled_private, + ) = spatial_loop_private if loop_dim_unrolled == loop_dim_unrolled_private: loop_unrolled_size_already *= loop_size_unrolled_private - return loop_unrolled_size_already \ No newline at end of file + return loop_unrolled_size_already From c684a49c0c6dd980f77019f9c86b7d8c27320eb3 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Mon, 13 Nov 2023 18:51:17 +0100 Subject: [PATCH 11/14] Reform the user-provided sm loop if it exceeds the layer size. Update pytest results. --- tests/main/test_origin/test_ascend_like.py | 2 +- .../test_origin/test_meta_prototype_like.py | 2 +- tests/main/test_origin/test_tesla_npu_like.py | 8 +-- tests/main/test_origin/test_tpu_like.py | 4 +- .../test_ascend_like.py | 2 +- .../test_edge_tpu_like.py | 8 +-- .../test_meta_prototype_like.py | 8 +-- .../test_tesla_npu_like.py | 8 +-- .../test_ascend_like.py | 2 +- .../test_tesla_npu_like.py | 6 +-- .../test_tpu_like.py | 4 +- zigzag/classes/opt/spatial/generator.py | 23 +++++++- .../stages/SpatialMappingConversionStage.py | 53 ++++++++++++------- 13 files changed, 82 insertions(+), 48 deletions(-) diff --git a/tests/main/test_origin/test_ascend_like.py b/tests/main/test_origin/test_ascend_like.py index f92e9788..248c6d0c 100644 --- a/tests/main/test_origin/test_ascend_like.py +++ b/tests/main/test_origin/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5738192980.375, 8728331), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7439255), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7426499), "zigzag/inputs/examples/workload/resnet18.onnx": (1858697886.165, 3720129), "zigzag.inputs.examples.workload.resnet18": (2408671233.7250004, 4804196), } diff --git a/tests/main/test_origin/test_meta_prototype_like.py b/tests/main/test_origin/test_meta_prototype_like.py index 4d8f397f..e4299fba 100644 --- a/tests/main/test_origin/test_meta_prototype_like.py +++ b/tests/main/test_origin/test_meta_prototype_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5771558839.89, 8400651), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3606391), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3594631), "zigzag/inputs/examples/workload/resnet18.onnx": (1869519792.3449998, 3408373), "zigzag.inputs.examples.workload.resnet18": (2419893343.4549994, 4176163), } diff --git a/tests/main/test_origin/test_tesla_npu_like.py b/tests/main/test_origin/test_tesla_npu_like.py index f8a98a2c..11a53097 100644 --- a/tests/main/test_origin/test_tesla_npu_like.py +++ b/tests/main/test_origin/test_tesla_npu_like.py @@ -11,10 +11,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8496179), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2964784), - "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3410738), - "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4096544), + "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8486444), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2909436), + "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3395752), + "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4082454), } diff --git a/tests/main/test_origin/test_tpu_like.py b/tests/main/test_origin/test_tpu_like.py index d59700e6..a2ca227f 100644 --- a/tests/main/test_origin/test_tpu_like.py +++ b/tests/main/test_origin/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9080913), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23131716), + "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9078209), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23112606), "zigzag/inputs/examples/workload/resnet18.onnx": (1795904779.6570003, 4160591), "zigzag.inputs.examples.workload.resnet18": (2296491401.491, 4909027), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py index fcc09fa1..182a872f 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py @@ -14,7 +14,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5667407342.66, 8528846), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3835435), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3828967), "zigzag/inputs/examples/workload/resnet18.onnx": (1679218425.5100002, 3713386), "zigzag.inputs.examples.workload.resnet18": (2290766279.31, 4442443), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py b/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py index 86acd714..8287ba69 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5582430184.085, 8343378), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (762066732.5049998, 3003074), - "zigzag/inputs/examples/workload/resnet18.onnx": (1743190534.155, 5305825), - "zigzag.inputs.examples.workload.resnet18": (2087322696.315, 6155355), + "zigzag/inputs/examples/workload/alexnet.onnx": (5582059481.445, 8343378), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (819971935.77, 2430583), + "zigzag/inputs/examples/workload/resnet18.onnx": (1763135800.67, 5001291), + "zigzag.inputs.examples.workload.resnet18": (2090252961.0700002, 5858437), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py b/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py index ff7ea9a8..c002b8d9 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5681909351.240001, 8299150), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (919452681.2249999, 2894129), - "zigzag/inputs/examples/workload/resnet18.onnx": (1789888904.4450002, 3472280), - "zigzag.inputs.examples.workload.resnet18": (2348207081.7949996, 4238517), + "zigzag/inputs/examples/workload/alexnet.onnx": (5679695605, 8299150), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (901092009, 2610609), + "zigzag/inputs/examples/workload/resnet18.onnx": (1730672410, 3262009), + "zigzag.inputs.examples.workload.resnet18": (2265438430, 4017227), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py b/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py index 682604d4..c4b0c5e6 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (958401881.3470002, 1964453), - "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898), - "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616), + "zigzag/inputs/examples/workload/alexnet.onnx": (6044768678, 8370470), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060, 1965457), + "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681, 3257898), + "zigzag.inputs.examples.workload.resnet18": (2220861655, 3934616), } diff --git a/tests/main/test_without_unused_memory/test_ascend_like.py b/tests/main/test_without_unused_memory/test_ascend_like.py index b6fc7a72..4eee129a 100644 --- a/tests/main/test_without_unused_memory/test_ascend_like.py +++ b/tests/main/test_without_unused_memory/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6499441), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685), "zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047), "zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130), } diff --git a/tests/main/test_without_unused_memory/test_tesla_npu_like.py b/tests/main/test_without_unused_memory/test_tesla_npu_like.py index 3ccaafb2..25eb9648 100644 --- a/tests/main/test_without_unused_memory/test_tesla_npu_like.py +++ b/tests/main/test_without_unused_memory/test_tesla_npu_like.py @@ -12,9 +12,9 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1969009), - "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3267252), - "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3943074), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457), + "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898), + "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616), } diff --git a/tests/main/test_without_unused_memory/test_tpu_like.py b/tests/main/test_without_unused_memory/test_tpu_like.py index ae1fe912..28df3fa1 100644 --- a/tests/main/test_without_unused_memory/test_tpu_like.py +++ b/tests/main/test_without_unused_memory/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8981556), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873319), + "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214), "zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289), "zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235), } diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index cf7d74c6..4a7d4ddc 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -134,7 +134,28 @@ def generate_user_spatial_mappings( defined_mapping is not None and defined_mapping.get(oa_dim.name) is not None ): - oa_dim_unrollings = [defined_mapping.get(oa_dim.name)] + # scale down the defined_mapping size if it exceeds the layer dim size + ori_loop = defined_mapping.get(oa_dim.name) + loop_to_reform = [] + if self.is_nested_tuple(ori_loop): # mix sm loop + for sub_loop in ori_loop: + sub_loop_dim = sub_loop[0] + sub_loop_size = sub_loop[1] + if sub_loop_dim in self.layer.loop_dim_size.keys(): + if sub_loop_size > self.layer.loop_dim_size[sub_loop_dim]: + sub_loop_size = self.layer.loop_dim_size[sub_loop_dim] + loop_to_reform.append((sub_loop_dim, sub_loop_size)) + else: # single layer sm loop + loop_dim = ori_loop[0] + loop_size = ori_loop[1] + if loop_dim in self.layer.loop_dim_size.keys(): + if loop_size > self.layer.loop_dim_size[loop_dim]: + loop_size = self.layer.loop_dim_size[loop_dim] + loop_to_reform.append((loop_dim, loop_size)) + loop_to_reform = tuple(loop_to_reform) + if len(loop_to_reform) == 0: + loop_to_reform = None + oa_dim_unrollings = [loop_to_reform] else: oa_dim_unrollings = [] oa_dim_unrolling_hints = user_spatial_mapping_hint[oa_dim.name] diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index f995df88..b55d3cf4 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -120,6 +120,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop_element, user_spatial_mapping, + limited_user_spatial_mapping, ) ) limited_user_spatial_mapping_int_to_check = ( @@ -129,6 +130,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop_element, user_spatial_mapping, + limited_user_spatial_mapping, False, ) ) @@ -164,6 +166,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop, user_spatial_mapping, + limited_user_spatial_mapping, ) ) limited_user_spatial_mapping_int_to_check = ( @@ -173,6 +176,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop, user_spatial_mapping, + limited_user_spatial_mapping, False, ) ) @@ -230,7 +234,8 @@ def generate_limited_user_spatial_mapping( oa_dim_name, spatial_loop, user_spatial_mapping, - check_3=True, + limited_user_spatial_mapping, + allow_decimal_sm_loop_size=True, ): ## Do check on spatial mapping, and convert the mapping to a tuple (loop_dim_unrolled, loop_size_unrolled) = spatial_loop @@ -245,35 +250,43 @@ def generate_limited_user_spatial_mapping( # Check 2: Limit unrolling if layer dimension is smaller than provided unrolling or if the loop dim doesn't exist layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) - if check_3: - # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size - # and if there is no more mapping for this layer dimension - no_more_mapping_for_current_layer_dim = ( - self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size + # and if there is no more mapping for this layer dimension + no_more_mapping_for_current_layer_dim = ( + self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping, + ) + ) + if no_more_mapping_for_current_layer_dim: + loop_size_unrolled_on_early_oa_dims = ( + self.calc_unrolled_loop_size_on_early_oa_dims( oa_dim_name=oa_dim_name, loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping, + user_spatial_mapping=limited_user_spatial_mapping, ) ) - if no_more_mapping_for_current_layer_dim: - loop_size_unrolled_on_early_oa_dims = ( - self.calc_unrolled_loop_size_on_early_oa_dims( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping, - ) - ) - temporal_remainder = int( - np.ceil( - layer_dim_size - / (loop_size_unrolled * loop_size_unrolled_on_early_oa_dims) - ) + temporal_remainder = int( + np.ceil( + layer_dim_size + / (loop_size_unrolled * loop_size_unrolled_on_early_oa_dims) ) + ) + if allow_decimal_sm_loop_size: loop_size_unrolled = ( layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims ) + else: + loop_size_unrolled = int( + np.ceil( + layer_dim_size + / temporal_remainder + / loop_size_unrolled_on_early_oa_dims + ) + ) return ( loop_dim_unrolled, loop_size_unrolled, From 79df7f8c7773a1e70e624f69cc8583f633ffc9a2 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Mon, 13 Nov 2023 18:59:44 +0100 Subject: [PATCH 12/14] update top_level_spatial_mapping in SpatialMappingConversionStage to support mix sm loop --- .../stages/SpatialMappingConversionStage.py | 41 ++++++++++++++++--- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index b55d3cf4..7d08ae4f 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -220,12 +220,21 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): layer=self.layer, accelerator=self.accelerator, ) + try: + SpatialMapping(spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer), SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer + ) + except: + pass - return SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer - ), SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer - ) + try: + return SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer + ), SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer + ) + except: + pass def generate_limited_user_spatial_mapping( self, @@ -363,8 +372,28 @@ def generate_spatial_mapping_dict(self, user_spatial_mapping, layer, accelerator # After we have gone through the memory levels, if there are still user-defined dimensions # present, add them as the top level. Otherwise add an empty list to make arch levels correct: # because first list we added was the operational array level. + + # We will merge together if the top memory level is serving multiple oa dims + # and there are layer dims existing on multiple oa dims. + top_level_spatial_mapping_dict = {} + for (dim_name, spatial_loop) in user_sm_copy.items(): + if self.is_nested_tuple(spatial_loop): # mix sm loop + for sub_spatial_loop in spatial_loop: + spatial_loop_dim = sub_spatial_loop[0] + spatial_loop_size = sub_spatial_loop[1] + if spatial_loop_dim not in top_level_spatial_mapping_dict.keys(): + top_level_spatial_mapping_dict[spatial_loop_dim] = spatial_loop_size + else: + top_level_spatial_mapping_dict[spatial_loop_dim] *= spatial_loop_size + else: + spatial_loop_dim = spatial_loop[0] + spatial_loop_size = spatial_loop[1] + if spatial_loop_dim not in top_level_spatial_mapping_dict.keys(): + top_level_spatial_mapping_dict[spatial_loop_dim] = spatial_loop_size + else: + top_level_spatial_mapping_dict[spatial_loop_dim] *= spatial_loop_size top_level_spatial_mapping = [ - spatial_loop for (dim_name, spatial_loop) in user_sm_copy.items() + (layer_dim, layer_size) for (layer_dim, layer_size) in top_level_spatial_mapping_dict.items() ] spatial_mapping_dict[layer_op].append(top_level_spatial_mapping) return spatial_mapping_dict From aff0de7a3aac42427a53b3a12e7a6c4aabd2a297 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Mon, 13 Nov 2023 19:18:33 +0100 Subject: [PATCH 13/14] update SeachUnusedMemoryStage and keep top weight mem to be a mem that serves all hardware dims --- .../classes/stages/SearchUnusedMemoryStage.py | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/zigzag/classes/stages/SearchUnusedMemoryStage.py b/zigzag/classes/stages/SearchUnusedMemoryStage.py index c63136b0..5143ee9f 100644 --- a/zigzag/classes/stages/SearchUnusedMemoryStage.py +++ b/zigzag/classes/stages/SearchUnusedMemoryStage.py @@ -302,9 +302,16 @@ def update_top_mem_level(self): if ( const_operand in served_operands ): # identify the top weight mem level + # We need to check if the current mem serve all oa dims, otherwise we will not decrease + # the mem_update_weight. + # The reason is if the current mem not serve all oa dims, the mapping will impact the memory + # utilization, so solely comparing with total memory size will be incorrect. + mem_serve_all_oa_dims = self.check_if_mem_serve_all_oa_dims( + mem, self.accelerator + ) if ( curr_mem_level < self.mem_update_weight - ): # mem_update_weight is bigger than the top weight mem level + ) and mem_serve_all_oa_dims: # mem_update_weight is bigger than the top weight mem level self.mem_update_weight = curr_mem_level break else: ## node (layer) that is not a branch starting node or a branch final node @@ -402,9 +409,18 @@ def update_top_mem_level(self): self.update_IO_mem_level( curr_id, output_operand, curr_mem_level ) # update output mem level + # For weight, we need to check if the current mem serve all oa dims, otherwise we will not + # decrease the mem_update_weight. + # The reason is if the current mem not serve all oa dims, the mapping will impact the memory + # utilization, so solely comparing with total memory size will be incorrect. + mem_serve_all_oa_dims = self.check_if_mem_serve_all_oa_dims( + mem, self.accelerator + ) if ( - curr_mem_level < self.mem_update_weight - ) and mem_serve_weight: # update weight mem level + (curr_mem_level < self.mem_update_weight) + and mem_serve_all_oa_dims + and mem_serve_weight + ): # update weight mem level self.mem_update_weight = curr_mem_level ## [OPTIONAL CHECK] assert check if there is -1 value in mem_update_list ## [NOTE] Until here, if there is still -1 value in mem_update_list, it means the size of top mem level for IO is not big enough. @@ -414,6 +430,17 @@ def update_top_mem_level(self): list(operand_dict.values())[0] >= 0 ), "SearchUnusedMemoryStage fisnishes abnormally, there are still layers with top mem levels not figured out." + def check_if_mem_serve_all_oa_dims(self, mem, accelerator): + # check if mem serve all hardare dimensions + core = accelerator.cores[0] + operational_array = core.operational_array + oa_dim_nb = len(operational_array.dimensions) + mem_served_oa_dim_nb = len(mem.served_dimensions) + if mem_served_oa_dim_nb == oa_dim_nb: + return True + else: + return False + def update_mem_level_for_loading_data(self): """ [OPTIONAL FUNCTION] This is an optional function. From 47c2cf585b4032f7aed8b5b5a99d70026f3e72e1 Mon Sep 17 00:00:00 2001 From: JiacongSun Date: Mon, 13 Nov 2023 20:09:29 +0100 Subject: [PATCH 14/14] remove codes for debugging --- .../stages/SpatialMappingConversionStage.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index 7d08ae4f..617921e7 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -220,21 +220,12 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): layer=self.layer, accelerator=self.accelerator, ) - try: - SpatialMapping(spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer), SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer - ) - except: - pass - try: - return SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer - ), SpatialMapping( - spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer - ) - except: - pass + return SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict, layer_node=self.layer + ), SpatialMapping( + spatial_mapping_dict=spatial_mapping_dict_int, layer_node=self.layer + ) def generate_limited_user_spatial_mapping( self,