feat: experimental performance modeller / browser for CPU multithreading

Added the experimental "discopop_optimizer" to the discopop_library and GUI
discopop-project · Sep 27, 2023 · b6d2d0a · b6d2d0a
1 parent 31b2150
commit b6d2d0a
Show file tree

Hide file tree

Showing 74 changed files with 5,180 additions and 107 deletions.
diff --git a/discopop_explorer/pattern_detection.py b/discopop_explorer/pattern_detection.py
@@ -7,9 +7,11 @@
 # directory for details.
 import os
 import sys
+from typing import Dict, Union
 
 from discopop_library.discopop_optimizer.OptimizationGraph import OptimizationGraph
 from discopop_library.discopop_optimizer.Variables.Experiment import Experiment
+from discopop_library.discopop_optimizer.classes.system.System import System
 from discopop_library.discopop_optimizer.scheduling.workload_delta import (
     get_workload_delta_for_cu_node,
 )
@@ -124,39 +126,52 @@ def __identify_scheduling_clauses(
     ) -> DetectionResult:
         """Identifies scheduling clauses for suggestions and returns the updated DetectionResult"""
         # construct optimization graph (basically an acyclic representation of the PET)
-        experiment = Experiment(project_folder_path, res, file_mapping_path)
-        print("\tcreating optimization graph...")
-        # saves optimization graph in experiment
-        optimization_graph = OptimizationGraph(project_folder_path, experiment)
-        print("\tDetermining scheduling clauses...")
-        with alive_bar(len(res.do_all)) as progress_bar:
-            for do_all_suggestion in res.do_all:
-                for node_id in get_nodes_from_cu_id(
-                    experiment.optimization_graph, do_all_suggestion.node_id
-                ):
-                    workload_delta, min_workload, max_workload = get_workload_delta_for_cu_node(
-                        experiment, node_id
-                    )
-                    print(
-                        "DOALL @ ",
-                        do_all_suggestion.node_id,
-                        " -> ",
-                        "node_id: ",
-                        node_id,
-                        " --> Delta WL: ",
-                        workload_delta,
-                        " (",
-                        min_workload,
-                        "/",
-                        max_workload,
-                        ")",
-                        file=sys.stderr,
-                    )
-                    # todo
-                    #  very naive and non-robust approach, needs improvement in the future
-                    #  reflects the behavior as described in https://dl.acm.org/doi/pdf/10.1145/3330345.3330375
-                    if workload_delta != 0:
-                        do_all_suggestion.scheduling_clause = "dynamic"
-                progress_bar()
+        system = System(headless=True)
+        discopop_output_path = project_folder_path
+        discopop_optimizer_path = "INVALID_DUMMY"
+        code_export_path = "INVALID_DUMMY"
+        arguments_1 = {"--compile-command": "make"}
+        experiment = Experiment(
+            project_folder_path,
+            discopop_output_path,
+            discopop_optimizer_path,
+            code_export_path,
+            file_mapping_path,
+            system,
+            res,
+            arguments_1,
+        )
+        arguments_2 = {"--exhaustive-search": False, "--headless-mode": True}
+        optimization_graph = OptimizationGraph(
+            project_folder_path, experiment, arguments_2, None, False
+        )
+
+        for do_all_suggestion in res.do_all:
+            for node_id in get_nodes_from_cu_id(
+                experiment.optimization_graph, do_all_suggestion.node_id
+            ):
+                workload_delta, min_workload, max_workload = get_workload_delta_for_cu_node(
+                    experiment, node_id
+                )
+                print(
+                    "DOALL @ ",
+                    do_all_suggestion.node_id,
+                    " -> ",
+                    "node_id: ",
+                    node_id,
+                    " --> Delta WL: ",
+                    workload_delta,
+                    " (",
+                    min_workload,
+                    "/",
+                    max_workload,
+                    ")",
+                    file=sys.stderr,
+                )
+                # todo
+                #  very naive and non-robust approach, needs improvement in the future
+                #  reflects the behavior as described in https://dl.acm.org/doi/pdf/10.1145/3330345.3330375
+                if workload_delta != 0:
+                    do_all_suggestion.scheduling_clause = "dynamic"
 
         return res
diff --git a/discopop_explorer/utils.py b/discopop_explorer/utils.py
@@ -98,7 +98,9 @@ def is_loop_index2(pet: PETGraphX, root_loop: Node, var_name: str) -> bool:
 
 # NOTE: left old code as it may become relevant again in the near future
 # We decided to omit the information that computes the workload and the relevant codes. For large programs (e.g., ffmpeg), the generated Data.xml file becomes very large. However, we keep the code here because we would like to integrate a hotspot detection algorithm (TODO: Bertin) with the parallelism discovery. Then, we need to retrieve the information to decide which code sections (loops or functions) are worth parallelizing.
-def calculate_workload(pet: PETGraphX, node: Node) -> int:
+def calculate_workload(
+    pet: PETGraphX, node: Node, ignore_function_calls_and_cached_values: bool = False
+) -> int:
     """Calculates and stores the workload for a given node
     The workload is the number of instructions multiplied by respective number of iterations
 
@@ -108,7 +110,8 @@ def calculate_workload(pet: PETGraphX, node: Node) -> int:
     """
     # check if value already present
     if node.workload is not None:
-        return node.workload
+        if not ignore_function_calls_and_cached_values:
+            return node.workload
     res = 0
     if node.type == NodeType.DUMMY:
         # store workload
@@ -118,15 +121,25 @@ def calculate_workload(pet: PETGraphX, node: Node) -> int:
         # if a function is called, replace the instruction with the costs of the called function
         # note: recursive function calls are counted as a single instruction
         res += cast(CUNode, node).instructions_count
-        for calls_edge in pet.out_edges(cast(CUNode, node).id, EdgeType.CALLSNODE):
-            # add costs of the called function
-            res += calculate_workload(pet, pet.node_at(calls_edge[1]))
-            # substract 1 to ignore the call instruction
-            # todo: should we keep the cost for the call instruction and just add the costs of the called funciton?
-            res -= 1
+        if not ignore_function_calls_and_cached_values:
+            for calls_edge in pet.out_edges(cast(CUNode, node).id, EdgeType.CALLSNODE):
+                # add costs of the called function
+                res += calculate_workload(
+                    pet,
+                    pet.node_at(calls_edge[1]),
+                    ignore_function_calls_and_cached_values=ignore_function_calls_and_cached_values,
+                )
+                # substract 1 to ignore the call instruction
+                # todo: should we keep the cost for the call instruction and just add the costs of the called funciton?
+                res -= 1
     elif node.type == NodeType.FUNC:
-        for child in find_subnodes(pet, node, EdgeType.CHILD):
-            res += calculate_workload(pet, child)
+        if not ignore_function_calls_and_cached_values:
+            for child in find_subnodes(pet, node, EdgeType.CHILD):
+                res += calculate_workload(
+                    pet,
+                    child,
+                    ignore_function_calls_and_cached_values=ignore_function_calls_and_cached_values,
+                )
     elif node.type == NodeType.LOOP:
         for child in find_subnodes(pet, node, EdgeType.CHILD):
             if child.type == NodeType.CU:
@@ -139,23 +152,45 @@ def calculate_workload(pet: PETGraphX, node: Node) -> int:
                         if cast(LoopNode, node).loop_data is None
                         else cast(LoopData, cast(LoopNode, node).loop_data).average_iteration_count
                     )
-                    res += calculate_workload(pet, child) * average_iteration_count + 1
+                    res += (
+                        calculate_workload(
+                            pet,
+                            child,
+                            ignore_function_calls_and_cached_values=ignore_function_calls_and_cached_values,
+                        )
+                        * average_iteration_count
+                        + 1
+                    )
                 else:
                     # determine average iteration count. Use traditional iteration count as a fallback
                     average_iteration_count = (
                         cast(LoopNode, node).loop_iterations
                         if cast(LoopNode, node).loop_data is None
                         else cast(LoopData, cast(LoopNode, node).loop_data).average_iteration_count
                     )
-                    res += calculate_workload(pet, child) * average_iteration_count
+                    res += (
+                        calculate_workload(
+                            pet,
+                            child,
+                            ignore_function_calls_and_cached_values=ignore_function_calls_and_cached_values,
+                        )
+                        * average_iteration_count
+                    )
             else:
                 # determine average iteration count. Use traditional iteration count as a fallback
                 average_iteration_count = (
                     cast(LoopNode, node).loop_iterations
                     if cast(LoopNode, node).loop_data is None
                     else cast(LoopData, cast(LoopNode, node).loop_data).average_iteration_count
                 )
-                res += calculate_workload(pet, child) * average_iteration_count
+                res += (
+                    calculate_workload(
+                        pet,
+                        child,
+                        ignore_function_calls_and_cached_values=ignore_function_calls_and_cached_values,
+                    )
+                    * average_iteration_count
+                )
     # store workload
     node.workload = res
     return res