diff --git a/tests/main/test_origin/test_ascend_like.py b/tests/main/test_origin/test_ascend_like.py index f92e9788..248c6d0c 100644 --- a/tests/main/test_origin/test_ascend_like.py +++ b/tests/main/test_origin/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5738192980.375, 8728331), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7439255), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1913797698.5250015, 7426499), "zigzag/inputs/examples/workload/resnet18.onnx": (1858697886.165, 3720129), "zigzag.inputs.examples.workload.resnet18": (2408671233.7250004, 4804196), } diff --git a/tests/main/test_origin/test_meta_prototype_like.py b/tests/main/test_origin/test_meta_prototype_like.py index 4d8f397f..e4299fba 100644 --- a/tests/main/test_origin/test_meta_prototype_like.py +++ b/tests/main/test_origin/test_meta_prototype_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5771558839.89, 8400651), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3606391), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1731935837.864999, 3594631), "zigzag/inputs/examples/workload/resnet18.onnx": (1869519792.3449998, 3408373), "zigzag.inputs.examples.workload.resnet18": (2419893343.4549994, 4176163), } diff --git a/tests/main/test_origin/test_tesla_npu_like.py b/tests/main/test_origin/test_tesla_npu_like.py index f8a98a2c..11a53097 100644 --- a/tests/main/test_origin/test_tesla_npu_like.py +++ b/tests/main/test_origin/test_tesla_npu_like.py @@ -11,10 +11,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8496179), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2964784), - "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3410738), - "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4096544), + "zigzag/inputs/examples/workload/alexnet.onnx": (6131950030.816001, 8486444), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1671933042.2130003, 2909436), + "zigzag/inputs/examples/workload/resnet18.onnx": (1863717063.505, 3395752), + "zigzag.inputs.examples.workload.resnet18": (2375316568.8910007, 4082454), } diff --git a/tests/main/test_origin/test_tpu_like.py b/tests/main/test_origin/test_tpu_like.py index d59700e6..a2ca227f 100644 --- a/tests/main/test_origin/test_tpu_like.py +++ b/tests/main/test_origin/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9080913), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23131716), + "zigzag/inputs/examples/workload/alexnet.onnx": (5567502618.941999, 9078209), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1904494517.552001, 23112606), "zigzag/inputs/examples/workload/resnet18.onnx": (1795904779.6570003, 4160591), "zigzag.inputs.examples.workload.resnet18": (2296491401.491, 4909027), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py index fcc09fa1..182a872f 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_ascend_like.py @@ -14,7 +14,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5667407342.66, 8528846), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3835435), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (921552096.0700004, 3828967), "zigzag/inputs/examples/workload/resnet18.onnx": (1679218425.5100002, 3713386), "zigzag.inputs.examples.workload.resnet18": (2290766279.31, 4442443), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py b/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py index 86acd714..8287ba69 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_edge_tpu_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5582430184.085, 8343378), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (762066732.5049998, 3003074), - "zigzag/inputs/examples/workload/resnet18.onnx": (1743190534.155, 5305825), - "zigzag.inputs.examples.workload.resnet18": (2087322696.315, 6155355), + "zigzag/inputs/examples/workload/alexnet.onnx": (5582059481.445, 8343378), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (819971935.77, 2430583), + "zigzag/inputs/examples/workload/resnet18.onnx": (1763135800.67, 5001291), + "zigzag.inputs.examples.workload.resnet18": (2090252961.0700002, 5858437), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py b/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py index ff7ea9a8..c002b8d9 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_meta_prototype_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5681909351.240001, 8299150), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (919452681.2249999, 2894129), - "zigzag/inputs/examples/workload/resnet18.onnx": (1789888904.4450002, 3472280), - "zigzag.inputs.examples.workload.resnet18": (2348207081.7949996, 4238517), + "zigzag/inputs/examples/workload/alexnet.onnx": (5679695605, 8299150), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (901092009, 2610609), + "zigzag/inputs/examples/workload/resnet18.onnx": (1730672410, 3262009), + "zigzag.inputs.examples.workload.resnet18": (2265438430, 4017227), } diff --git a/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py b/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py index 682604d4..c4b0c5e6 100644 --- a/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py +++ b/tests/main/test_with_mix_spatial_mapping/test_tesla_npu_like.py @@ -13,10 +13,10 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (958401881.3470002, 1964453), - "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898), - "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616), + "zigzag/inputs/examples/workload/alexnet.onnx": (6044768678, 8370470), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060, 1965457), + "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681, 3257898), + "zigzag.inputs.examples.workload.resnet18": (2220861655, 3934616), } diff --git a/tests/main/test_without_unused_memory/test_ascend_like.py b/tests/main/test_without_unused_memory/test_ascend_like.py index b6fc7a72..4eee129a 100644 --- a/tests/main/test_without_unused_memory/test_ascend_like.py +++ b/tests/main/test_without_unused_memory/test_ascend_like.py @@ -12,7 +12,7 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (5649555894.9, 8637780), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6499441), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (1881386179.71, 6486685), "zigzag/inputs/examples/workload/resnet18.onnx": (1709089377.83, 3583047), "zigzag.inputs.examples.workload.resnet18": (2243493483.15, 4657130), } diff --git a/tests/main/test_without_unused_memory/test_tesla_npu_like.py b/tests/main/test_without_unused_memory/test_tesla_npu_like.py index 3ccaafb2..25eb9648 100644 --- a/tests/main/test_without_unused_memory/test_tesla_npu_like.py +++ b/tests/main/test_without_unused_memory/test_tesla_npu_like.py @@ -12,9 +12,9 @@ # Expected energy and latency for each workload defined above ens_lats = { "zigzag/inputs/examples/workload/alexnet.onnx": (6040086796.366001, 8389669), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1969009), - "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3267252), - "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3943074), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (930702060.6110002, 1965457), + "zigzag/inputs/examples/workload/resnet18.onnx": (1724869681.4799998, 3257898), + "zigzag.inputs.examples.workload.resnet18": (2220861655.6660004, 3934616), } diff --git a/tests/main/test_without_unused_memory/test_tpu_like.py b/tests/main/test_without_unused_memory/test_tpu_like.py index ae1fe912..28df3fa1 100644 --- a/tests/main/test_without_unused_memory/test_tpu_like.py +++ b/tests/main/test_without_unused_memory/test_tpu_like.py @@ -11,8 +11,8 @@ # Expected energy and latency for each workload defined above ens_lats = { - "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8981556), - "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873319), + "zigzag/inputs/examples/workload/alexnet.onnx": (5475639384.492001, 8979956), + "zigzag/inputs/examples/workload/mobilenetv2.onnx": (952688145.0069999, 21873214), "zigzag/inputs/examples/workload/resnet18.onnx": (1659252422.016, 4000289), "zigzag.inputs.examples.workload.resnet18": (1982830786.5119998, 4509235), } diff --git a/zigzag/classes/opt/spatial/generator.py b/zigzag/classes/opt/spatial/generator.py index cf7d74c6..4a7d4ddc 100644 --- a/zigzag/classes/opt/spatial/generator.py +++ b/zigzag/classes/opt/spatial/generator.py @@ -134,7 +134,28 @@ def generate_user_spatial_mappings( defined_mapping is not None and defined_mapping.get(oa_dim.name) is not None ): - oa_dim_unrollings = [defined_mapping.get(oa_dim.name)] + # scale down the defined_mapping size if it exceeds the layer dim size + ori_loop = defined_mapping.get(oa_dim.name) + loop_to_reform = [] + if self.is_nested_tuple(ori_loop): # mix sm loop + for sub_loop in ori_loop: + sub_loop_dim = sub_loop[0] + sub_loop_size = sub_loop[1] + if sub_loop_dim in self.layer.loop_dim_size.keys(): + if sub_loop_size > self.layer.loop_dim_size[sub_loop_dim]: + sub_loop_size = self.layer.loop_dim_size[sub_loop_dim] + loop_to_reform.append((sub_loop_dim, sub_loop_size)) + else: # single layer sm loop + loop_dim = ori_loop[0] + loop_size = ori_loop[1] + if loop_dim in self.layer.loop_dim_size.keys(): + if loop_size > self.layer.loop_dim_size[loop_dim]: + loop_size = self.layer.loop_dim_size[loop_dim] + loop_to_reform.append((loop_dim, loop_size)) + loop_to_reform = tuple(loop_to_reform) + if len(loop_to_reform) == 0: + loop_to_reform = None + oa_dim_unrollings = [loop_to_reform] else: oa_dim_unrollings = [] oa_dim_unrolling_hints = user_spatial_mapping_hint[oa_dim.name] diff --git a/zigzag/classes/stages/SpatialMappingConversionStage.py b/zigzag/classes/stages/SpatialMappingConversionStage.py index f995df88..b55d3cf4 100644 --- a/zigzag/classes/stages/SpatialMappingConversionStage.py +++ b/zigzag/classes/stages/SpatialMappingConversionStage.py @@ -120,6 +120,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop_element, user_spatial_mapping, + limited_user_spatial_mapping, ) ) limited_user_spatial_mapping_int_to_check = ( @@ -129,6 +130,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop_element, user_spatial_mapping, + limited_user_spatial_mapping, False, ) ) @@ -164,6 +166,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop, user_spatial_mapping, + limited_user_spatial_mapping, ) ) limited_user_spatial_mapping_int_to_check = ( @@ -173,6 +176,7 @@ def convert_user_spatial_mapping(self, user_spatial_mapping): oa_dim_name, spatial_loop, user_spatial_mapping, + limited_user_spatial_mapping, False, ) ) @@ -230,7 +234,8 @@ def generate_limited_user_spatial_mapping( oa_dim_name, spatial_loop, user_spatial_mapping, - check_3=True, + limited_user_spatial_mapping, + allow_decimal_sm_loop_size=True, ): ## Do check on spatial mapping, and convert the mapping to a tuple (loop_dim_unrolled, loop_size_unrolled) = spatial_loop @@ -245,35 +250,43 @@ def generate_limited_user_spatial_mapping( # Check 2: Limit unrolling if layer dimension is smaller than provided unrolling or if the loop dim doesn't exist layer_dim_size = layer_dim_sizes.get(loop_dim_unrolled, 1) loop_size_unrolled = min(layer_dim_size, loop_size_unrolled) - if check_3: - # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size - # and if there is no more mapping for this layer dimension - no_more_mapping_for_current_layer_dim = ( - self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + # Check 3: Adjust unrolling if it is not a multiple of the layer dimension size + # and if there is no more mapping for this layer dimension + no_more_mapping_for_current_layer_dim = ( + self.check_if_there_is_further_oa_mapping_for_current_layer_dim( + oa_dim_name=oa_dim_name, + loop_dim_unrolled=loop_dim_unrolled, + user_spatial_mapping=user_spatial_mapping, + ) + ) + if no_more_mapping_for_current_layer_dim: + loop_size_unrolled_on_early_oa_dims = ( + self.calc_unrolled_loop_size_on_early_oa_dims( oa_dim_name=oa_dim_name, loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping, + user_spatial_mapping=limited_user_spatial_mapping, ) ) - if no_more_mapping_for_current_layer_dim: - loop_size_unrolled_on_early_oa_dims = ( - self.calc_unrolled_loop_size_on_early_oa_dims( - oa_dim_name=oa_dim_name, - loop_dim_unrolled=loop_dim_unrolled, - user_spatial_mapping=user_spatial_mapping, - ) - ) - temporal_remainder = int( - np.ceil( - layer_dim_size - / (loop_size_unrolled * loop_size_unrolled_on_early_oa_dims) - ) + temporal_remainder = int( + np.ceil( + layer_dim_size + / (loop_size_unrolled * loop_size_unrolled_on_early_oa_dims) ) + ) + if allow_decimal_sm_loop_size: loop_size_unrolled = ( layer_dim_size / temporal_remainder / loop_size_unrolled_on_early_oa_dims ) + else: + loop_size_unrolled = int( + np.ceil( + layer_dim_size + / temporal_remainder + / loop_size_unrolled_on_early_oa_dims + ) + ) return ( loop_dim_unrolled, loop_size_unrolled,