Skip to content

Commit

Permalink
feat(explorer): calculate code statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasrothenberger committed Oct 23, 2024
1 parent 1bcc70b commit c4a8834
Show file tree
Hide file tree
Showing 15 changed files with 837 additions and 0 deletions.
4 changes: 4 additions & 0 deletions discopop_explorer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def parse_args() -> ExplorerArguments:
"--llvm-cxxfilt-path", type=str, default=None,
help="Path to llvm-cxxfilt executable. Required for task pattern detector if non-standard path should be used.",
)
experimental_parser.add_argument(
"--disable-statistics", action="store_false", help="Disable the calculation and storing of statistics for code and generated suggestions."
)
# fmt: on

arguments = parser.parse_args()
Expand Down Expand Up @@ -156,6 +159,7 @@ def parse_args() -> ExplorerArguments:
log_level=arguments.log.upper(),
write_log=arguments.write_log,
load_existing_doall_and_reduction_patterns=arguments.load_existing_doall_and_reduction_patterns,
collect_statistics=arguments.disable_statistics,
)


Expand Down
6 changes: 6 additions & 0 deletions discopop_explorer/discopop_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pstats2 # type:ignore
from pluginbase import PluginBase # type: ignore
from discopop_explorer.functions.PEGraph.output.json import dump_to_pickled_json
from discopop_explorer.utilities.statistics.collect_statistics import collect_statistics
from discopop_library.ArgumentClasses.GeneralArguments import GeneralArguments # type: ignore
from discopop_library.HostpotLoader.HotspotLoaderArguments import HotspotLoaderArguments
from discopop_library.HostpotLoader.HotspotNodeType import HotspotNodeType
Expand Down Expand Up @@ -65,6 +66,7 @@ class ExplorerArguments(GeneralArguments):
llvm_cxxfilt_path: Optional[str]
microbench_file: Optional[str]
load_existing_doall_and_reduction_patterns: bool
collect_statistics: bool

def __post_init__(self) -> None:
self.__validate()
Expand Down Expand Up @@ -245,6 +247,10 @@ def run(arguments: ExplorerArguments) -> None:

end = time.time()

# collect code and suggestions statistics, if requested
if arguments.collect_statistics:
collect_statistics(arguments, res)

if arguments.enable_pet_dump_file is not None:
with open(arguments.enable_pet_dump_file, "w+") as f:
f.write(dump_to_pickled_json(res.pet))
Expand Down
125 changes: 125 additions & 0 deletions discopop_explorer/utilities/statistics/collect_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
#
# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
#
# This software may be modified and distributed under the terms of
# the 3-Clause BSD License. See the LICENSE file in the package base
# directory for details.

from __future__ import annotations
from typing import TYPE_CHECKING
import logging

from discopop_explorer.utilities.statistics.cyclomatic_complexity.boxplot import get_cyclomatic_complexities_for_boxplot
from discopop_explorer.utilities.statistics.cyclomatic_complexity.total import get_summed_cyclomatic_complexity
from discopop_explorer.utilities.statistics.maximum_call_path_depth import get_maximum_call_path_depth
from discopop_explorer.utilities.statistics.num_function_calls import get_suggestion_num_function_calls
from discopop_explorer.utilities.statistics.output_statistics import (
output_aggregated_suggestion_statistics,
output_code_statistics,
output_suggestion_statistics,
)
from discopop_explorer.utilities.statistics.suggestion_call_path_depths import get_suggestion_call_path_depths
from discopop_explorer.utilities.statistics.suggestion_cyclomatic_complexity import (
get_suggestion_summed_cyclomatic_complexity_from_calls,
)
from discopop_explorer.utilities.statistics.suggestion_lines_of_code import (
get_suggestion_immediate_lines_of_code,
get_suggestion_lines_of_code_including_calls,
)

if TYPE_CHECKING:
from discopop_explorer.discopop_explorer import ExplorerArguments
from discopop_library.result_classes.DetectionResult import DetectionResult

logger = logging.getLogger("statistics")


def collect_statistics(arguments: ExplorerArguments, res: DetectionResult) -> None:
logger.info("Collecting code statistics...")
maximum_call_path_depth = get_maximum_call_path_depth(res.pet)
logger.debug("--> maximum_call_path_depth: " + str(maximum_call_path_depth))
suggestion_call_path_depths = get_suggestion_call_path_depths(res)
logger.debug(
"--> suggestion_call_path_depths: "
+ str([str(key) + " => " + str(suggestion_call_path_depths[key]) for key in suggestion_call_path_depths])
)
suggestion_num_function_calls = get_suggestion_num_function_calls(res)
logger.debug(
"--> suggestion_num_function_calls: "
+ str([str(key) + " => " + str(suggestion_num_function_calls[key]) for key in suggestion_num_function_calls])
)
suggestion_immediate_lines_of_code = get_suggestion_immediate_lines_of_code(res)
logger.debug(
"--> suggestion_immediate_lines_of_code: "
+ str(
[
str(key) + " => " + str(suggestion_immediate_lines_of_code[key])
for key in suggestion_immediate_lines_of_code
]
)
)

suggestion_lines_of_code_including_calls = get_suggestion_lines_of_code_including_calls(res)
logger.debug(
"--> suggestion_lines_of_code_including_calls: "
+ str(
[
str(key) + " => " + str(suggestion_lines_of_code_including_calls[key])
for key in suggestion_lines_of_code_including_calls
]
)
)

summed_cyclomatic_complexity = get_summed_cyclomatic_complexity(arguments, res)
logger.debug("--> summed_cyclomatic_complexity = " + str(summed_cyclomatic_complexity))

cc_min, cc_max, cc_avg, cc_lower_quart, cc_upper_quart = get_cyclomatic_complexities_for_boxplot(arguments, res)
logger.debug("--> cc_min: " + str(cc_min))
logger.debug("--> cc_max: " + str(cc_max))
logger.debug("--> cc_avg: " + str(cc_avg))
logger.debug("--> cc_lower_quart: " + str(cc_lower_quart))
logger.debug("--> cc_upper_quart: " + str(cc_upper_quart))

suggestion_summed_cyclomatic_complexity_from_calls = get_suggestion_summed_cyclomatic_complexity_from_calls(
arguments, res
)
logger.debug(
"--> suggestion_summed_cyclomatic_complexity_from_calls: "
+ str(
[
str(key) + " => " + str(suggestion_summed_cyclomatic_complexity_from_calls[key])
for key in suggestion_summed_cyclomatic_complexity_from_calls
]
)
)

# output statistics to file
output_code_statistics(
arguments,
maximum_call_path_depth,
summed_cyclomatic_complexity,
cc_min,
cc_max,
cc_avg,
cc_lower_quart,
cc_upper_quart,
)

output_suggestion_statistics(
arguments,
suggestion_call_path_depths,
suggestion_num_function_calls,
suggestion_immediate_lines_of_code,
suggestion_lines_of_code_including_calls,
suggestion_summed_cyclomatic_complexity_from_calls,
)

output_aggregated_suggestion_statistics(
arguments,
suggestion_call_path_depths,
suggestion_num_function_calls,
suggestion_immediate_lines_of_code,
suggestion_lines_of_code_including_calls,
suggestion_summed_cyclomatic_complexity_from_calls,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
#
# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
#
# This software may be modified and distributed under the terms of
# the 3-Clause BSD License. See the LICENSE file in the package base
# directory for details.

from __future__ import annotations
from typing import TYPE_CHECKING, Dict, List, Tuple

if TYPE_CHECKING:
from discopop_explorer.discopop_explorer import ExplorerArguments
from discopop_library.result_classes.DetectionResult import DetectionResult

# define aliases for readability
from subprocess import check_output
from discopop_library.PathManagement.PathManagement import load_file_mapping


MIN = int
MAX = int
AVG = int
LOWER_QUART = int
UPPER_QUART = int


def get_cyclomatic_complexities_for_boxplot(
arguments: ExplorerArguments, res: DetectionResult
) -> Tuple[MIN, MAX, AVG, LOWER_QUART, UPPER_QUART]:
file_mapping = load_file_mapping(arguments.file_mapping_file)
# get summed cyclomatic complexity for all functions in all files
cmd = ["pmccabe", "-C"]
for file_id in file_mapping:
file_path = file_mapping[file_id]
cmd.append(str(file_path))
out = check_output(cmd).decode("utf-8")

# unpack and store results temporarily
cyclomatic_complexities: List[int] = []
for line in out.split("\n"):
split_line = line.split("\t")
if len(split_line) < 9:
continue
cyclomatic_complexity = split_line[1]
# file_path = split_line[7]
# function_name = split_line[8]

cyclomatic_complexities.append(int(cyclomatic_complexity))

# calculate statistics
cc_min: MIN = min(cyclomatic_complexities)
cc_max: MAX = max(cyclomatic_complexities)
cc_avg: AVG = int(sum(cyclomatic_complexities) / len(cyclomatic_complexities))
sorted_cyclomatic_complexities = sorted(cyclomatic_complexities)
lower_quartile_idx = int((len(sorted_cyclomatic_complexities) + 1) * 1 / 4)
upper_quartile_idx = int((len(sorted_cyclomatic_complexities) + 1) * 3 / 4)
lower_quartile = sorted_cyclomatic_complexities[lower_quartile_idx]
if len(sorted_cyclomatic_complexities) == 1:
upper_quartile_idx = 0
upper_quartile = sorted_cyclomatic_complexities[upper_quartile_idx]

return cc_min, cc_max, cc_avg, lower_quartile, upper_quartile
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
#
# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
#
# This software may be modified and distributed under the terms of
# the 3-Clause BSD License. See the LICENSE file in the package base
# directory for details.

from __future__ import annotations
from subprocess import check_output
from typing import TYPE_CHECKING, Dict, Optional

from discopop_library.PathManagement.PathManagement import load_file_mapping

if TYPE_CHECKING:
from discopop_explorer.discopop_explorer import ExplorerArguments
from discopop_library.result_classes.DetectionResult import DetectionResult


FILE_ID = int
FILEPATH = str
FUNC_NAME = str
CYC_COMP = int

CC_DICT = Dict[FILE_ID, Dict[FUNC_NAME, CYC_COMP]]


def get_cyclomatic_complexity_dictionary(arguments: ExplorerArguments, res: DetectionResult) -> CC_DICT:
file_mapping = load_file_mapping(arguments.file_mapping_file)
# get summed cyclomatic complexity for all functions in all files
cmd = ["pmccabe", "-C"]
for file_id in file_mapping:
file_path = file_mapping[file_id]
cmd.append(str(file_path))
out = check_output(cmd).decode("utf-8")

# unpack and repack results
cc_dict: Dict[FILE_ID, Dict[FUNC_NAME, CYC_COMP]] = dict()
for line in out.split("\n"):
split_line = line.split("\t")
if len(split_line) < 9:
continue
cyclomatic_complexity: CYC_COMP = int(split_line[1])
file_path_2: FILEPATH = split_line[7]
# get file_id
file_id_2: Optional[int] = None
for file_id in file_mapping:
if str(file_mapping[file_id]) == file_path_2:
file_id_2 = file_id

if file_id_2 is None:
continue

func_name: FUNC_NAME = split_line[8]

if file_id_2 not in cc_dict:
cc_dict[file_id_2] = dict()
cc_dict[file_id_2][func_name] = cyclomatic_complexity

return cc_dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
#
# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
#
# This software may be modified and distributed under the terms of
# the 3-Clause BSD License. See the LICENSE file in the package base
# directory for details.

from __future__ import annotations

from typing import TYPE_CHECKING, List, cast

from discopop_explorer.enums.EdgeType import EdgeType
from discopop_explorer.functions.PEGraph.queries.edges import out_edges

if TYPE_CHECKING:
from discopop_explorer.discopop_explorer import ExplorerArguments
from discopop_library.result_classes.DetectionResult import DetectionResult
from discopop_explorer.utilities.statistics.cyclomatic_complexity.cc_dictionary import CC_DICT
from discopop_explorer.aliases.NodeID import NodeID

from discopop_explorer.classes.PEGraph.FunctionNode import FunctionNode
from discopop_explorer.functions.PEGraph.queries.subtree import subtree_of_type


def get_subtree_cyclomatic_complexity_from_calls(
arguments: ExplorerArguments, res: DetectionResult, cc_dict: CC_DICT, root_node_id: NodeID
) -> int:
subtree = subtree_of_type(res.pet, res.pet.node_at(root_node_id))
# collect called functions
called_functions: List[FunctionNode] = []
for node in subtree:
out_call_edges = out_edges(res.pet, node.id, EdgeType.CALLSNODE)
for _, target, _ in out_call_edges:
called_functions.append(cast(FunctionNode, res.pet.node_at(target)))
# identify cyclomatic complexities for called functions by matching against cc_dict entries
# due to overloading or name extensions (i.e. get_a vs. get_a_and_b), select the shortest function name that matches
cyclomatic_complexities: List[int] = []
for func in called_functions:
if func.file_id not in cc_dict:
continue
candidates: List[str] = []
for clean_func_name in cc_dict[func.file_id]:
if clean_func_name in func.name:
candidates.append(clean_func_name)
sorted_candidates = sorted(candidates, key=lambda x: len(x))
if len(sorted_candidates) > 0:
best_match = sorted_candidates[0]
cyclomatic_complexities.append(cc_dict[func.file_id][best_match])

# sum cyclomatic complexities of called functions
return sum(cyclomatic_complexities)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
#
# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
#
# This software may be modified and distributed under the terms of
# the 3-Clause BSD License. See the LICENSE file in the package base
# directory for details.

from __future__ import annotations
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from discopop_explorer.discopop_explorer import ExplorerArguments
from discopop_library.result_classes.DetectionResult import DetectionResult
from discopop_library.PathManagement.PathManagement import load_file_mapping


from subprocess import check_output


def get_summed_cyclomatic_complexity(arguments: ExplorerArguments, res: DetectionResult) -> int:
"""calculate the total cyclomatic complexity"""
file_mapping = load_file_mapping(arguments.file_mapping_file)
# get summed cyclomatic complexity for all functions in all files
cmd = ["pmccabe", "-T"]
for file_id in file_mapping:
file_path = file_mapping[file_id]
cmd.append(str(file_path))
out = check_output(cmd).decode("utf-8")
summed_cyclomatic_complexity = int(out.split("\t")[1])

return summed_cyclomatic_complexity
Loading

0 comments on commit c4a8834

Please sign in to comment.