From 84d171bd927f054efe6cd4d07ca8a725b47abd43 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 23 Oct 2024 09:53:32 +0200 Subject: [PATCH] feat(explorer): calculate code statistics --- discopop_explorer/__main__.py | 4 + discopop_explorer/discopop_explorer.py | 6 + .../statistics/collect_statistics.py | 125 +++++++++ .../cyclomatic_complexity/boxplot.py | 63 +++++ .../cyclomatic_complexity/cc_dictionary.py | 60 ++++ .../cyclomatic_complexity/subtree.py | 52 ++++ .../statistics/cyclomatic_complexity/total.py | 32 +++ .../statistics/maximum_call_path_depth.py | 21 ++ .../statistics/num_function_calls.py | 24 ++ .../utilities/statistics/output_statistics.py | 259 ++++++++++++++++++ .../statistics/suggestion_call_path_depths.py | 23 ++ .../suggestion_cyclomatic_complexity.py | 39 +++ .../statistics/suggestion_lines_of_code.py | 56 ++++ .../statistics/utilities/call_path_depth.py | 39 +++ .../utilities/num_function_calls.py | 34 +++ 15 files changed, 837 insertions(+) create mode 100644 discopop_explorer/utilities/statistics/collect_statistics.py create mode 100644 discopop_explorer/utilities/statistics/cyclomatic_complexity/boxplot.py create mode 100644 discopop_explorer/utilities/statistics/cyclomatic_complexity/cc_dictionary.py create mode 100644 discopop_explorer/utilities/statistics/cyclomatic_complexity/subtree.py create mode 100644 discopop_explorer/utilities/statistics/cyclomatic_complexity/total.py create mode 100644 discopop_explorer/utilities/statistics/maximum_call_path_depth.py create mode 100644 discopop_explorer/utilities/statistics/num_function_calls.py create mode 100644 discopop_explorer/utilities/statistics/output_statistics.py create mode 100644 discopop_explorer/utilities/statistics/suggestion_call_path_depths.py create mode 100644 discopop_explorer/utilities/statistics/suggestion_cyclomatic_complexity.py create mode 100644 discopop_explorer/utilities/statistics/suggestion_lines_of_code.py create mode 100644 discopop_explorer/utilities/statistics/utilities/call_path_depth.py create mode 100644 discopop_explorer/utilities/statistics/utilities/num_function_calls.py diff --git a/discopop_explorer/__main__.py b/discopop_explorer/__main__.py index dd523cbf4..b0dee494c 100644 --- a/discopop_explorer/__main__.py +++ b/discopop_explorer/__main__.py @@ -104,6 +104,9 @@ def parse_args() -> ExplorerArguments: "--llvm-cxxfilt-path", type=str, default=None, help="Path to llvm-cxxfilt executable. Required for task pattern detector if non-standard path should be used.", ) + experimental_parser.add_argument( + "--disable-statistics", action="store_false", help="Disable the calculation and storing of statistics for code and generated suggestions." + ) # fmt: on arguments = parser.parse_args() @@ -156,6 +159,7 @@ def parse_args() -> ExplorerArguments: log_level=arguments.log.upper(), write_log=arguments.write_log, load_existing_doall_and_reduction_patterns=arguments.load_existing_doall_and_reduction_patterns, + collect_statistics=arguments.disable_statistics, ) diff --git a/discopop_explorer/discopop_explorer.py b/discopop_explorer/discopop_explorer.py index d9fd4aa0a..b5cb51895 100644 --- a/discopop_explorer/discopop_explorer.py +++ b/discopop_explorer/discopop_explorer.py @@ -19,6 +19,7 @@ import pstats2 # type:ignore from pluginbase import PluginBase # type: ignore from discopop_explorer.functions.PEGraph.output.json import dump_to_pickled_json +from discopop_explorer.utilities.statistics.collect_statistics import collect_statistics from discopop_library.ArgumentClasses.GeneralArguments import GeneralArguments # type: ignore from discopop_library.HostpotLoader.HotspotLoaderArguments import HotspotLoaderArguments from discopop_library.HostpotLoader.HotspotNodeType import HotspotNodeType @@ -65,6 +66,7 @@ class ExplorerArguments(GeneralArguments): llvm_cxxfilt_path: Optional[str] microbench_file: Optional[str] load_existing_doall_and_reduction_patterns: bool + collect_statistics: bool def __post_init__(self) -> None: self.__validate() @@ -245,6 +247,10 @@ def run(arguments: ExplorerArguments) -> None: end = time.time() + # collect code and suggestions statistics, if requested + if arguments.collect_statistics: + collect_statistics(arguments, res) + if arguments.enable_pet_dump_file is not None: with open(arguments.enable_pet_dump_file, "w+") as f: f.write(dump_to_pickled_json(res.pet)) diff --git a/discopop_explorer/utilities/statistics/collect_statistics.py b/discopop_explorer/utilities/statistics/collect_statistics.py new file mode 100644 index 000000000..5c2ac560d --- /dev/null +++ b/discopop_explorer/utilities/statistics/collect_statistics.py @@ -0,0 +1,125 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations +from typing import TYPE_CHECKING +import logging + +from discopop_explorer.utilities.statistics.cyclomatic_complexity.boxplot import get_cyclomatic_complexities_for_boxplot +from discopop_explorer.utilities.statistics.cyclomatic_complexity.total import get_summed_cyclomatic_complexity +from discopop_explorer.utilities.statistics.maximum_call_path_depth import get_maximum_call_path_depth +from discopop_explorer.utilities.statistics.num_function_calls import get_suggestion_num_function_calls +from discopop_explorer.utilities.statistics.output_statistics import ( + output_aggregated_suggestion_statistics, + output_code_statistics, + output_suggestion_statistics, +) +from discopop_explorer.utilities.statistics.suggestion_call_path_depths import get_suggestion_call_path_depths +from discopop_explorer.utilities.statistics.suggestion_cyclomatic_complexity import ( + get_suggestion_summed_cyclomatic_complexity_from_calls, +) +from discopop_explorer.utilities.statistics.suggestion_lines_of_code import ( + get_suggestion_immediate_lines_of_code, + get_suggestion_lines_of_code_including_calls, +) + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult + +logger = logging.getLogger("statistics") + + +def collect_statistics(arguments: ExplorerArguments, res: DetectionResult) -> None: + logger.info("Collecting code statistics...") + maximum_call_path_depth = get_maximum_call_path_depth(res.pet) + logger.debug("--> maximum_call_path_depth: " + str(maximum_call_path_depth)) + suggestion_call_path_depths = get_suggestion_call_path_depths(res) + logger.debug( + "--> suggestion_call_path_depths: " + + str([str(key) + " => " + str(suggestion_call_path_depths[key]) for key in suggestion_call_path_depths]) + ) + suggestion_num_function_calls = get_suggestion_num_function_calls(res) + logger.debug( + "--> suggestion_num_function_calls: " + + str([str(key) + " => " + str(suggestion_num_function_calls[key]) for key in suggestion_num_function_calls]) + ) + suggestion_immediate_lines_of_code = get_suggestion_immediate_lines_of_code(res) + logger.debug( + "--> suggestion_immediate_lines_of_code: " + + str( + [ + str(key) + " => " + str(suggestion_immediate_lines_of_code[key]) + for key in suggestion_immediate_lines_of_code + ] + ) + ) + + suggestion_lines_of_code_including_calls = get_suggestion_lines_of_code_including_calls(res) + logger.debug( + "--> suggestion_lines_of_code_including_calls: " + + str( + [ + str(key) + " => " + str(suggestion_lines_of_code_including_calls[key]) + for key in suggestion_lines_of_code_including_calls + ] + ) + ) + + summed_cyclomatic_complexity = get_summed_cyclomatic_complexity(arguments, res) + logger.debug("--> summed_cyclomatic_complexity = " + str(summed_cyclomatic_complexity)) + + cc_min, cc_max, cc_avg, cc_lower_quart, cc_upper_quart = get_cyclomatic_complexities_for_boxplot(arguments, res) + logger.debug("--> cc_min: " + str(cc_min)) + logger.debug("--> cc_max: " + str(cc_max)) + logger.debug("--> cc_avg: " + str(cc_avg)) + logger.debug("--> cc_lower_quart: " + str(cc_lower_quart)) + logger.debug("--> cc_upper_quart: " + str(cc_upper_quart)) + + suggestion_summed_cyclomatic_complexity_from_calls = get_suggestion_summed_cyclomatic_complexity_from_calls( + arguments, res + ) + logger.debug( + "--> suggestion_summed_cyclomatic_complexity_from_calls: " + + str( + [ + str(key) + " => " + str(suggestion_summed_cyclomatic_complexity_from_calls[key]) + for key in suggestion_summed_cyclomatic_complexity_from_calls + ] + ) + ) + + # output statistics to file + output_code_statistics( + arguments, + maximum_call_path_depth, + summed_cyclomatic_complexity, + cc_min, + cc_max, + cc_avg, + cc_lower_quart, + cc_upper_quart, + ) + + output_suggestion_statistics( + arguments, + suggestion_call_path_depths, + suggestion_num_function_calls, + suggestion_immediate_lines_of_code, + suggestion_lines_of_code_including_calls, + suggestion_summed_cyclomatic_complexity_from_calls, + ) + + output_aggregated_suggestion_statistics( + arguments, + suggestion_call_path_depths, + suggestion_num_function_calls, + suggestion_immediate_lines_of_code, + suggestion_lines_of_code_including_calls, + suggestion_summed_cyclomatic_complexity_from_calls, + ) diff --git a/discopop_explorer/utilities/statistics/cyclomatic_complexity/boxplot.py b/discopop_explorer/utilities/statistics/cyclomatic_complexity/boxplot.py new file mode 100644 index 000000000..20e6b26e4 --- /dev/null +++ b/discopop_explorer/utilities/statistics/cyclomatic_complexity/boxplot.py @@ -0,0 +1,63 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations +from typing import TYPE_CHECKING, Dict, List, Tuple + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult + +# define aliases for readability +from subprocess import check_output +from discopop_library.PathManagement.PathManagement import load_file_mapping + + +MIN = int +MAX = int +AVG = int +LOWER_QUART = int +UPPER_QUART = int + + +def get_cyclomatic_complexities_for_boxplot( + arguments: ExplorerArguments, res: DetectionResult +) -> Tuple[MIN, MAX, AVG, LOWER_QUART, UPPER_QUART]: + file_mapping = load_file_mapping(arguments.file_mapping_file) + # get summed cyclomatic complexity for all functions in all files + cmd = ["pmccabe", "-C"] + for file_id in file_mapping: + file_path = file_mapping[file_id] + cmd.append(str(file_path)) + out = check_output(cmd).decode("utf-8") + + # unpack and store results temporarily + cyclomatic_complexities: List[int] = [] + for line in out.split("\n"): + split_line = line.split("\t") + if len(split_line) < 9: + continue + cyclomatic_complexity = split_line[1] + # file_path = split_line[7] + # function_name = split_line[8] + + cyclomatic_complexities.append(int(cyclomatic_complexity)) + + # calculate statistics + cc_min: MIN = min(cyclomatic_complexities) + cc_max: MAX = max(cyclomatic_complexities) + cc_avg: AVG = int(sum(cyclomatic_complexities) / len(cyclomatic_complexities)) + sorted_cyclomatic_complexities = sorted(cyclomatic_complexities) + lower_quartile_idx = int((len(sorted_cyclomatic_complexities) + 1) * 1 / 4) + upper_quartile_idx = int((len(sorted_cyclomatic_complexities) + 1) * 3 / 4) + lower_quartile = sorted_cyclomatic_complexities[lower_quartile_idx] + if len(sorted_cyclomatic_complexities) == 1: + upper_quartile_idx = 0 + upper_quartile = sorted_cyclomatic_complexities[upper_quartile_idx] + + return cc_min, cc_max, cc_avg, lower_quartile, upper_quartile diff --git a/discopop_explorer/utilities/statistics/cyclomatic_complexity/cc_dictionary.py b/discopop_explorer/utilities/statistics/cyclomatic_complexity/cc_dictionary.py new file mode 100644 index 000000000..dbcbb280d --- /dev/null +++ b/discopop_explorer/utilities/statistics/cyclomatic_complexity/cc_dictionary.py @@ -0,0 +1,60 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations +from subprocess import check_output +from typing import TYPE_CHECKING, Dict, Optional + +from discopop_library.PathManagement.PathManagement import load_file_mapping + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult + + +FILE_ID = int +FILEPATH = str +FUNC_NAME = str +CYC_COMP = int + +CC_DICT = Dict[FILE_ID, Dict[FUNC_NAME, CYC_COMP]] + + +def get_cyclomatic_complexity_dictionary(arguments: ExplorerArguments, res: DetectionResult) -> CC_DICT: + file_mapping = load_file_mapping(arguments.file_mapping_file) + # get summed cyclomatic complexity for all functions in all files + cmd = ["pmccabe", "-C"] + for file_id in file_mapping: + file_path = file_mapping[file_id] + cmd.append(str(file_path)) + out = check_output(cmd).decode("utf-8") + + # unpack and repack results + cc_dict: Dict[FILE_ID, Dict[FUNC_NAME, CYC_COMP]] = dict() + for line in out.split("\n"): + split_line = line.split("\t") + if len(split_line) < 9: + continue + cyclomatic_complexity: CYC_COMP = int(split_line[1]) + file_path_2: FILEPATH = split_line[7] + # get file_id + file_id_2: Optional[int] = None + for file_id in file_mapping: + if str(file_mapping[file_id]) == file_path_2: + file_id_2 = file_id + + if file_id_2 is None: + continue + + func_name: FUNC_NAME = split_line[8] + + if file_id_2 not in cc_dict: + cc_dict[file_id_2] = dict() + cc_dict[file_id_2][func_name] = cyclomatic_complexity + + return cc_dict diff --git a/discopop_explorer/utilities/statistics/cyclomatic_complexity/subtree.py b/discopop_explorer/utilities/statistics/cyclomatic_complexity/subtree.py new file mode 100644 index 000000000..06266274e --- /dev/null +++ b/discopop_explorer/utilities/statistics/cyclomatic_complexity/subtree.py @@ -0,0 +1,52 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations + +from typing import TYPE_CHECKING, List, cast + +from discopop_explorer.enums.EdgeType import EdgeType +from discopop_explorer.functions.PEGraph.queries.edges import out_edges + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult + from discopop_explorer.utilities.statistics.cyclomatic_complexity.cc_dictionary import CC_DICT + from discopop_explorer.aliases.NodeID import NodeID + +from discopop_explorer.classes.PEGraph.FunctionNode import FunctionNode +from discopop_explorer.functions.PEGraph.queries.subtree import subtree_of_type + + +def get_subtree_cyclomatic_complexity_from_calls( + arguments: ExplorerArguments, res: DetectionResult, cc_dict: CC_DICT, root_node_id: NodeID +) -> int: + subtree = subtree_of_type(res.pet, res.pet.node_at(root_node_id)) + # collect called functions + called_functions: List[FunctionNode] = [] + for node in subtree: + out_call_edges = out_edges(res.pet, node.id, EdgeType.CALLSNODE) + for _, target, _ in out_call_edges: + called_functions.append(cast(FunctionNode, res.pet.node_at(target))) + # identify cyclomatic complexities for called functions by matching against cc_dict entries + # due to overloading or name extensions (i.e. get_a vs. get_a_and_b), select the shortest function name that matches + cyclomatic_complexities: List[int] = [] + for func in called_functions: + if func.file_id not in cc_dict: + continue + candidates: List[str] = [] + for clean_func_name in cc_dict[func.file_id]: + if clean_func_name in func.name: + candidates.append(clean_func_name) + sorted_candidates = sorted(candidates, key=lambda x: len(x)) + if len(sorted_candidates) > 0: + best_match = sorted_candidates[0] + cyclomatic_complexities.append(cc_dict[func.file_id][best_match]) + + # sum cyclomatic complexities of called functions + return sum(cyclomatic_complexities) diff --git a/discopop_explorer/utilities/statistics/cyclomatic_complexity/total.py b/discopop_explorer/utilities/statistics/cyclomatic_complexity/total.py new file mode 100644 index 000000000..6cb660eb5 --- /dev/null +++ b/discopop_explorer/utilities/statistics/cyclomatic_complexity/total.py @@ -0,0 +1,32 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult +from discopop_library.PathManagement.PathManagement import load_file_mapping + + +from subprocess import check_output + + +def get_summed_cyclomatic_complexity(arguments: ExplorerArguments, res: DetectionResult) -> int: + """calculate the total cyclomatic complexity""" + file_mapping = load_file_mapping(arguments.file_mapping_file) + # get summed cyclomatic complexity for all functions in all files + cmd = ["pmccabe", "-T"] + for file_id in file_mapping: + file_path = file_mapping[file_id] + cmd.append(str(file_path)) + out = check_output(cmd).decode("utf-8") + summed_cyclomatic_complexity = int(out.split("\t")[1]) + + return summed_cyclomatic_complexity diff --git a/discopop_explorer/utilities/statistics/maximum_call_path_depth.py b/discopop_explorer/utilities/statistics/maximum_call_path_depth.py new file mode 100644 index 000000000..b92e66f0b --- /dev/null +++ b/discopop_explorer/utilities/statistics/maximum_call_path_depth.py @@ -0,0 +1,21 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from discopop_explorer.classes.PEGraph.PEGraphX import PEGraphX +from discopop_explorer.enums.NodeType import NodeType +from discopop_explorer.functions.PEGraph.queries.nodes import all_nodes +from discopop_explorer.utilities.statistics.utilities.call_path_depth import get_outgoing_call_path_depth + + +def get_maximum_call_path_depth(pet: PEGraphX) -> int: + res = 0 + for node in all_nodes(pet): + if node.type != NodeType.FUNC: + continue + res = max(res, get_outgoing_call_path_depth(pet, node)) + return res diff --git a/discopop_explorer/utilities/statistics/num_function_calls.py b/discopop_explorer/utilities/statistics/num_function_calls.py new file mode 100644 index 000000000..fab988915 --- /dev/null +++ b/discopop_explorer/utilities/statistics/num_function_calls.py @@ -0,0 +1,24 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + + +from typing import Dict, Set +from discopop_explorer.aliases.NodeID import NodeID +from discopop_explorer.utilities.statistics.utilities.num_function_calls import get_num_function_calls +from discopop_library.result_classes.DetectionResult import DetectionResult + + +def get_suggestion_num_function_calls(res: DetectionResult) -> Dict[int, int]: + res_dict: Dict[int, int] = dict() + + # collect number of function calls in entire subtree of a parallelization suggestion + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + res_dict[pattern.pattern_id] = get_num_function_calls(res.pet, res.pet.node_at(pattern.node_id), []) + + return res_dict diff --git a/discopop_explorer/utilities/statistics/output_statistics.py b/discopop_explorer/utilities/statistics/output_statistics.py new file mode 100644 index 000000000..a0e7c4f49 --- /dev/null +++ b/discopop_explorer/utilities/statistics/output_statistics.py @@ -0,0 +1,259 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations +import json +import os +from typing import TYPE_CHECKING, Dict, List + +from discopop_explorer.aliases.NodeID import NodeID + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + + +def output_code_statistics( + arguments: ExplorerArguments, + maximum_call_path_depth: int, + summed_cyclomatic_complexity: int, + cc_min: int, + cc_max: int, + cc_avg: int, + cc_lower_quart: int, + cc_upper_quart: int, +) -> None: + # create statistics directory + if not os.path.exists(os.path.join(arguments.project_path, "explorer", "statistics")): + os.mkdir(os.path.join(arguments.project_path, "explorer", "statistics")) + # clear existing result + statistics_file = os.path.join(arguments.project_path, "explorer", "statistics", "code_statistics.json") + if os.path.exists(statistics_file): + os.remove(statistics_file) + + statistics_dict: Dict[str, int] = dict() + statistics_dict["maximum_call_path_depth"] = maximum_call_path_depth + statistics_dict["summed_cyclomatic_complexity"] = summed_cyclomatic_complexity + statistics_dict["cc_min"] = cc_min + statistics_dict["cc_max"] = cc_max + statistics_dict["cc_avg"] = cc_avg + statistics_dict["cc_lower_quart"] = cc_lower_quart + statistics_dict["cc_upper_quart"] = cc_upper_quart + + with open(statistics_file, "w+") as f: + f.write(json.dumps(statistics_dict) + "\n") + + +def output_suggestion_statistics( + arguments: ExplorerArguments, + suggestion_call_path_depths: Dict[int, int], + suggestion_num_function_calls: Dict[int, int], + suggestion_immediate_lines_of_code: Dict[int, int], + suggestion_lines_of_code_including_calls: Dict[int, int], + suggestion_summed_cyclomatic_complexity_from_calls: Dict[int, int], +) -> None: + # create statistics directory + if not os.path.exists(os.path.join(arguments.project_path, "explorer", "statistics")): + os.mkdir(os.path.join(arguments.project_path, "explorer", "statistics")) + # clear existing result + statistics_file_by_suggestionID = os.path.join( + arguments.project_path, "explorer", "statistics", "suggestion_statistics_by_suggestionID.json" + ) + if os.path.exists(statistics_file_by_suggestionID): + os.remove(statistics_file_by_suggestionID) + + statistics_dict_by_suggestionID: Dict[int, Dict[str, int]] = dict() + + for suggestion_id in suggestion_call_path_depths: + if suggestion_id not in statistics_dict_by_suggestionID: + statistics_dict_by_suggestionID[suggestion_id] = dict() + statistics_dict_by_suggestionID[suggestion_id]["suggestion_call_path_depth"] = suggestion_call_path_depths[ + suggestion_id + ] + + for suggestion_id in suggestion_num_function_calls: + if suggestion_id not in statistics_dict_by_suggestionID: + statistics_dict_by_suggestionID[suggestion_id] = dict() + statistics_dict_by_suggestionID[suggestion_id]["suggestion_num_function_calls"] = suggestion_num_function_calls[ + suggestion_id + ] + + for suggestion_id in suggestion_summed_cyclomatic_complexity_from_calls: + if suggestion_id not in statistics_dict_by_suggestionID: + statistics_dict_by_suggestionID[suggestion_id] = dict() + statistics_dict_by_suggestionID[suggestion_id][ + "suggestion_summed_cyclomatic_complexity_from_calls" + ] = suggestion_summed_cyclomatic_complexity_from_calls[suggestion_id] + + for suggestion_id in suggestion_immediate_lines_of_code: + if suggestion_id not in statistics_dict_by_suggestionID: + statistics_dict_by_suggestionID[suggestion_id] = dict() + statistics_dict_by_suggestionID[suggestion_id][ + "suggestion_immediate_lines_of_code" + ] = suggestion_immediate_lines_of_code[suggestion_id] + + for suggestion_id in suggestion_lines_of_code_including_calls: + if suggestion_id not in statistics_dict_by_suggestionID: + statistics_dict_by_suggestionID[suggestion_id] = dict() + statistics_dict_by_suggestionID[suggestion_id][ + "suggestion_lines_of_code_including_calls" + ] = suggestion_lines_of_code_including_calls[suggestion_id] + + with open(statistics_file_by_suggestionID, "w+") as f: + f.write(json.dumps(statistics_dict_by_suggestionID) + "\n") + + +def output_aggregated_suggestion_statistics( + arguments: ExplorerArguments, + suggestion_call_path_depths: Dict[int, int], + suggestion_num_function_calls: Dict[int, int], + suggestion_immediate_lines_of_code: Dict[int, int], + suggestion_lines_of_code_including_calls: Dict[int, int], + suggestion_summed_cyclomatic_complexity_from_calls: Dict[int, int], +) -> None: + res_dict: Dict[str, Dict[str, int]] = dict() # {value_identifier : {value_descriptor: value}} + # create statistics directory + if not os.path.exists(os.path.join(arguments.project_path, "explorer", "statistics")): + os.mkdir(os.path.join(arguments.project_path, "explorer", "statistics")) + # clear existing result + statistics_file = os.path.join(arguments.project_path, "explorer", "statistics", "suggestion_statistics.json") + if os.path.exists(statistics_file): + os.remove(statistics_file) + + values: List[int] = [] + # suggestion_call_path_depths + values = list(suggestion_call_path_depths.values()) + if len(values) > 0: + v_min = min(values) + v_max = max(values) + v_avg = int(sum(values) / len(values)) + lower_quartile_idx = int((len(values) + 1) * 1 / 4) + upper_quartile_idx = min(len(values) - 1, int((len(values) + 1) * 3 / 4)) + lower_quartile = sorted(values)[lower_quartile_idx] + upper_quartile = sorted(values)[upper_quartile_idx] + else: + v_min = 0 + v_max = 0 + v_avg = 0 + lower_quartile = 0 + upper_quartile = 0 + res_dict["suggestion_call_path_depths"] = { + "min": v_min, + "max": v_max, + "avg": v_avg, + "lower_quartile": lower_quartile, + "upper_quartile": upper_quartile, + } + + # suggestion_num_function_calls + values = list(suggestion_num_function_calls.values()) + if len(values) > 0: + v_min = min(values) + v_max = max(values) + v_avg = int(sum(values) / len(values)) + lower_quartile_idx = int((len(values) + 1) * 1 / 4) + upper_quartile_idx = min(len(values) - 1, int((len(values) + 1) * 3 / 4)) + lower_quartile = sorted(values)[lower_quartile_idx] + upper_quartile = sorted(values)[upper_quartile_idx] + else: + v_min = 0 + v_max = 0 + v_avg = 0 + lower_quartile = 0 + upper_quartile = 0 + res_dict["suggestion_num_function_calls"] = { + "min": v_min, + "max": v_max, + "avg": v_avg, + "lower_quartile": lower_quartile, + "upper_quartile": upper_quartile, + } + + # suggestion_immediate_lines_of_code + values = list(suggestion_immediate_lines_of_code.values()) + if len(values) > 0: + v_min = min(values) + v_max = max(values) + v_avg = int(sum(values) / len(values)) + lower_quartile_idx = int((len(values) + 1) * 1 / 4) + upper_quartile_idx = min(len(values) - 1, int((len(values) + 1) * 3 / 4)) + lower_quartile = sorted(values)[lower_quartile_idx] + upper_quartile = sorted(values)[upper_quartile_idx] + else: + v_min = 0 + v_max = 0 + v_avg = 0 + lower_quartile = 0 + upper_quartile = 0 + res_dict["suggestion_immediate_lines_of_code"] = { + "min": v_min, + "max": v_max, + "avg": v_avg, + "lower_quartile": lower_quartile, + "upper_quartile": upper_quartile, + } + + # suggestion_lines_of_code_including_calls + values = list(suggestion_lines_of_code_including_calls.values()) + if len(values) > 0: + v_min = min(values) + v_max = max(values) + v_avg = int(sum(values) / len(values)) + lower_quartile_idx = int((len(values) + 1) * 1 / 4) + upper_quartile_idx = min(len(values) - 1, int((len(values) + 1) * 3 / 4)) + lower_quartile = sorted(values)[lower_quartile_idx] + upper_quartile = sorted(values)[upper_quartile_idx] + else: + v_min = 0 + v_max = 0 + v_avg = 0 + lower_quartile = 0 + upper_quartile = 0 + res_dict["suggestion_lines_of_code_including_calls"] = { + "min": v_min, + "max": v_max, + "avg": v_avg, + "lower_quartile": lower_quartile, + "upper_quartile": upper_quartile, + } + + # suggestion_summed_cyclomatic_complexity_from_calls + values = list(suggestion_summed_cyclomatic_complexity_from_calls.values()) + if len(values) > 0: + v_min = min(values) + v_max = max(values) + v_avg = int(sum(values) / len(values)) + lower_quartile_idx = int((len(values) + 1) * 1 / 4) + upper_quartile_idx = min(len(values) - 1, int((len(values) + 1) * 3 / 4)) + lower_quartile = sorted(values)[lower_quartile_idx] + upper_quartile = sorted(values)[upper_quartile_idx] + else: + v_min = 0 + v_max = 0 + v_avg = 0 + lower_quartile = 0 + upper_quartile = 0 + res_dict["suggestion_summed_cyclomatic_complexity_from_calls"] = { + "min": v_min, + "max": v_max, + "avg": v_avg, + "lower_quartile": lower_quartile, + "upper_quartile": upper_quartile, + } + + # suggestion_count + suggestion_count = max( + len(suggestion_call_path_depths), + len(suggestion_num_function_calls), + len(suggestion_immediate_lines_of_code), + len(suggestion_lines_of_code_including_calls), + len(suggestion_summed_cyclomatic_complexity_from_calls), + ) + res_dict["suggestion_count"] = {"total": suggestion_count} + + with open(statistics_file, "w+") as f: + f.write(json.dumps(res_dict) + "\n") diff --git a/discopop_explorer/utilities/statistics/suggestion_call_path_depths.py b/discopop_explorer/utilities/statistics/suggestion_call_path_depths.py new file mode 100644 index 000000000..408c362e5 --- /dev/null +++ b/discopop_explorer/utilities/statistics/suggestion_call_path_depths.py @@ -0,0 +1,23 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from typing import Dict, Set +from discopop_explorer.aliases.NodeID import NodeID +from discopop_explorer.utilities.statistics.utilities.call_path_depth import get_outgoing_call_path_depth +from discopop_library.result_classes.DetectionResult import DetectionResult + + +def get_suggestion_call_path_depths(res: DetectionResult) -> Dict[int, int]: + res_dict: Dict[int, int] = dict() + + # collect NodeIDs where suggestions are located + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + res_dict[pattern.pattern_id] = get_outgoing_call_path_depth(res.pet, res.pet.node_at(pattern.node_id)) + + return res_dict diff --git a/discopop_explorer/utilities/statistics/suggestion_cyclomatic_complexity.py b/discopop_explorer/utilities/statistics/suggestion_cyclomatic_complexity.py new file mode 100644 index 000000000..853a1e823 --- /dev/null +++ b/discopop_explorer/utilities/statistics/suggestion_cyclomatic_complexity.py @@ -0,0 +1,39 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from __future__ import annotations + +from typing import Dict, TYPE_CHECKING, Set + +from discopop_explorer.utilities.statistics.cyclomatic_complexity.cc_dictionary import ( + get_cyclomatic_complexity_dictionary, +) +from discopop_explorer.utilities.statistics.cyclomatic_complexity.subtree import ( + get_subtree_cyclomatic_complexity_from_calls, +) + +if TYPE_CHECKING: + from discopop_explorer.discopop_explorer import ExplorerArguments + from discopop_library.result_classes.DetectionResult import DetectionResult + from discopop_explorer.aliases.NodeID import NodeID + + +def get_suggestion_summed_cyclomatic_complexity_from_calls( + arguments: ExplorerArguments, res: DetectionResult +) -> Dict[int, int]: + res_dict: Dict[int, int] = dict() + cc_dict = get_cyclomatic_complexity_dictionary(arguments, res) + + # collect NodeIDs where suggestions are located + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + res_dict[pattern.pattern_id] = get_subtree_cyclomatic_complexity_from_calls( + arguments, res, cc_dict, pattern.node_id + ) + + return res_dict diff --git a/discopop_explorer/utilities/statistics/suggestion_lines_of_code.py b/discopop_explorer/utilities/statistics/suggestion_lines_of_code.py new file mode 100644 index 000000000..516ffc14e --- /dev/null +++ b/discopop_explorer/utilities/statistics/suggestion_lines_of_code.py @@ -0,0 +1,56 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from typing import Dict, List, Set +from discopop_explorer.aliases.LineID import LineID +from discopop_explorer.aliases.NodeID import NodeID +from discopop_explorer.functions.PEGraph.queries.subtree import subtree_of_type +from discopop_library.result_classes.DetectionResult import DetectionResult + + +def get_suggestion_immediate_lines_of_code(res: DetectionResult) -> Dict[int, int]: # pattern_id: lines of code + # immediate lines of code --> scope size without following function calls + res_dict: Dict[int, int] = dict() + + # collect scope sizes without following function calls + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + start_line_num = int(pattern.start_line.split(":")[1]) + end_line_num = int(pattern.end_line.split(":")[1]) + res_dict[pattern.pattern_id] = end_line_num - start_line_num + + return res_dict + + +def get_suggestion_lines_of_code_including_calls( + res: DetectionResult, +) -> Dict[int, int]: # pattern_id : lines of inlined code + # lines of code are counted as if every function call would be inlined + res_dict: Dict[int, int] = dict() + + # step 1: get immediate lines of code per pattern + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + start_line_num = int(pattern.start_line.split(":")[1]) + end_line_num = int(pattern.end_line.split(":")[1]) + res_dict[pattern.pattern_id] = end_line_num - start_line_num + + # step 2: add lines of code from function calls + for pattern_type in res.patterns.__dict__: + for pattern in res.patterns.__dict__[pattern_type]: + pattern_base_node = res.pet.node_at(pattern.node_id) + # get subtree of pattern base node and collect line ids + subtree = subtree_of_type(res.pet, pattern_base_node) + lines_in_subtree: Set[LineID] = set() + for node in subtree: + for line_num_in_scope in range(node.start_line, node.end_line + 1): + lines_in_subtree.add(LineID(str(node.file_id) + ":" + str(line_num_in_scope))) + # count distinct line ids and add to the result + res_dict[pattern.pattern_id] += len(lines_in_subtree) + + return res_dict diff --git a/discopop_explorer/utilities/statistics/utilities/call_path_depth.py b/discopop_explorer/utilities/statistics/utilities/call_path_depth.py new file mode 100644 index 000000000..aca15460a --- /dev/null +++ b/discopop_explorer/utilities/statistics/utilities/call_path_depth.py @@ -0,0 +1,39 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +import copy +from typing import List +from discopop_explorer.classes.PEGraph.Node import Node +from discopop_explorer.classes.PEGraph.PEGraphX import PEGraphX +from discopop_explorer.enums.EdgeType import EdgeType +from discopop_explorer.functions.PEGraph.queries.edges import out_edges +from discopop_explorer.functions.PEGraph.traversal.children import direct_children + + +def get_outgoing_call_path_depth(pet: PEGraphX, node: Node) -> int: + visited: List[Node] = [] + queue: List[tuple[Node, int]] = [(node, 0)] + child_depths: List[int] = [0] + visited.append(node) + + while queue: + cur_node, cur_call_depth = queue.pop() + visited.append(cur_node) + + for child in direct_children(pet, cur_node): + out_call_edges = out_edges(pet, child.id, EdgeType.CALLSNODE) + if len(out_call_edges) > 0: + for _, t, _ in out_call_edges: + if pet.node_at(t) in visited: + continue + child_depths.append(cur_call_depth + 1) + queue.append((pet.node_at(t), cur_call_depth + 1)) + if child not in visited: + queue.append((child, cur_call_depth)) + + return max(child_depths) diff --git a/discopop_explorer/utilities/statistics/utilities/num_function_calls.py b/discopop_explorer/utilities/statistics/utilities/num_function_calls.py new file mode 100644 index 000000000..904b10dc9 --- /dev/null +++ b/discopop_explorer/utilities/statistics/utilities/num_function_calls.py @@ -0,0 +1,34 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + + +import copy +from typing import List, Set +from discopop_explorer.classes.PEGraph.Node import Node +from discopop_explorer.classes.PEGraph.PEGraphX import PEGraphX +from discopop_explorer.enums.EdgeType import EdgeType +from discopop_explorer.functions.PEGraph.queries.edges import out_edges +from discopop_explorer.functions.PEGraph.traversal.children import direct_children + + +def get_num_function_calls(pet: PEGraphX, node: Node, visited: List[Node]) -> int: + function_calls = 0 + visited.append(node) + + for child in direct_children(pet, node): + out_call_edges = out_edges(pet, child.id, EdgeType.CALLSNODE) + if len(out_call_edges) > 0: + for _, t, _ in out_call_edges: + function_calls += 1 + if t in visited: + continue + function_calls += get_num_function_calls(pet, pet.node_at(t), copy.deepcopy(visited)) + else: + function_calls += get_num_function_calls(pet, child, copy.deepcopy(visited)) + + return function_calls