define decorators to remove repetitive code that shows up at the begi…

…nning of CodeMods
pixee · LucasFaudman · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
commit 05876b310107f4e6940ef6d6121435ddb4b1a5f4
@@ -1,6 +1,6 @@
 from enum import Enum
 from pathlib import Path
-from typing import Any, Optional, TypeAlias
+from typing import Any, Iterator, Optional, Type, TypeAlias, TypeVar
 
 import libcst as cst
 from libcst import MetadataDependent, matchers
@@ -216,3 +216,23 @@ def is_zero(node: cst.CSTNode) -> bool:
         case cst.Call(func=cst.Name("int")) | cst.Call(func=cst.Name("float")):
             return is_zero(node.args[0].value)
     return False
+
+
+_CSTNode = TypeVar("_CSTNode", bound=cst.CSTNode)
+
+
+def extract_boolean_operands(
+    node: cst.BooleanOperation, ensure_type: Type[_CSTNode] = cst.CSTNode
+) -> Iterator[_CSTNode]:
+    """
+    Recursively extract operands from a cst.BooleanOperation node from left to right as an iterator of nodes.
+    """
+    if isinstance(node.left, cst.BooleanOperation):
+        # Recursively yield operands from the boolean operation on the left
+        yield from extract_boolean_operands(node.left, ensure_type)
+    else:
+        # Yield left operand
+        yield cst.ensure_type(node.left, ensure_type)
+
+    # Yield right operand
+    yield cst.ensure_type(node.right, ensure_type)
@@ -0,0 +1,96 @@
+from functools import wraps
+from typing import Callable, TypeVar
+
+import libcst as cst
+
+from codemodder.codemods.base_visitor import BaseTransformer, BaseVisitor
+
+# TypeVars for decorators below
+_BaseVisitorOrTransformer = TypeVar(
+    "_BaseVisitorOrTransformer", BaseVisitor, BaseTransformer
+)
+_CSTNode = TypeVar("_CSTNode", bound=cst.CSTNode)
+
+
+def check_filter_by_path_includes_or_excludes(
+    func: Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]
+) -> Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]:
+    """
+    Decorator to filter nodes based on path-includes or path-excludes flags.
+
+    Calls the decorated func only if the original node is not filtered.
+    otherwise, returns the updated node as is.
+
+    ```
+    @check_filter_by_path_includes_or_excludes
+    def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
+        # rest of the method
+    ```
+
+    Is equivalent to:
+
+    ```
+    def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
+        # Added by the decorator
+        if not self.filter_by_path_includes_or_excludes(
+            self.node_position(original_node)
+            ):
+            return updated_node
+
+        # rest of the method
+    ```
+    """
+
+    @wraps(func)
+    def wrapper(
+        instance: _BaseVisitorOrTransformer,
+        original_node: _CSTNode,
+        updated_node: _CSTNode,
+    ) -> cst.CSTNode:
+        if not instance.filter_by_path_includes_or_excludes(
+            instance.node_position(original_node)
+        ):
+            return updated_node
+        return func(instance, original_node, updated_node)
+
+    return wrapper
+
+
+def check_node_is_not_selected(
+    func: Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]
+) -> Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]:
+    """
+    Decorator to filter nodes based on whether the node is selected or not.
+
+    Calls the decorated func only if the original node is not selected.
+    otherwise, returns the updated node as is.
+
+    ```
+    @check_node_is_not_selected
+    def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
+        # rest of the method
+    ```
+
+    Is equivalent to:
+
+    ```
+    def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
+        # Added by the decorator
+        if not self.node_is_selected(original_node):
+            return updated_node
+
+        # rest of the method
+    ```
+    """
+
+    @wraps(func)
+    def wrapper(
+        instance: _BaseVisitorOrTransformer,
+        original_node: _CSTNode,
+        updated_node: _CSTNode,
+    ) -> cst.CSTNode:
+        if not instance.node_is_selected(original_node):
+            return updated_node
+        return func(instance, original_node, updated_node)
+
+    return wrapper
@@ -0,0 +1,98 @@
+import libcst as cst
+from libcst import matchers as m
+
+from codemodder.codemods.utils import extract_boolean_operands
+from codemodder.codemods.utils_decorators import (
+    check_filter_by_path_includes_or_excludes,
+)
+from codemodder.codemods.utils_mixin import NameResolutionMixin
+from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod
+
+
+class CombineIsinstanceIssubclass(SimpleCodemod, NameResolutionMixin):
+    metadata = Metadata(
+        name="combine-isinstance-issubclass",
+        summary="Simplify Boolean Expressions Using `isinstance` and `issubclass`",
+        review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW,
+        references=[],
+    )
+    change_description = "Use tuple of matches instead of boolean expression"
+
+    @check_filter_by_path_includes_or_excludes
+    def leave_BooleanOperation(
+        self, original_node: cst.BooleanOperation, updated_node: cst.BooleanOperation
+    ) -> cst.CSTNode:
+        if self.matches_isinstance_issubclass_or_pattern(original_node):
+            self.report_change(original_node)
+
+            elements = []
+            seen_values = set()
+            for call in extract_boolean_operands(updated_node, ensure_type=cst.Call):
+                class_tuple_arg_value = call.args[1].value
+                if isinstance(class_tuple_arg_value, cst.Tuple):
+                    arg_elements = class_tuple_arg_value.elements
+                else:
+                    arg_elements = (cst.Element(value=class_tuple_arg_value),)
+
+                for element in arg_elements:
+                    if (value := getattr(element.value, "value", None)) in seen_values:
+                        # If an element has a non-None evaluated value that has already been seen, continue to avoid duplicates
+                        continue
+                    if value is not None:
+                        seen_values.add(value)
+                    elements.append(element)
+
+            instance_arg = updated_node.left.args[0]
+            new_class_tuple_arg = cst.Arg(value=cst.Tuple(elements=elements))
+            return cst.Call(func=call.func, args=[instance_arg, new_class_tuple_arg])
+
+        return updated_node
+
+    def matches_isinstance_issubclass_or_pattern(
+        self, node: cst.BooleanOperation
+    ) -> bool:
+        # Match the pattern: isinstance(x, cls1) or isinstance(x, cls2) or isinstance(x, cls3) or ...
+        # and the same but with issubclass
+        args = [m.Arg(value=m.Name()), m.Arg(value=m.Name() | m.Tuple())]
+        isinstance_call = m.Call(
+            func=m.Name("isinstance"),
+            args=args,
+        )
+        issubclass_call = m.Call(
+            func=m.Name("issubclass"),
+            args=args,
+        )
+        isinstance_or = m.BooleanOperation(
+            left=isinstance_call, operator=m.Or(), right=isinstance_call
+        )
+        issubclass_or = m.BooleanOperation(
+            left=issubclass_call, operator=m.Or(), right=issubclass_call
+        )
+
+        # Check for simple case: isinstance(x, cls) or issubclass(x, cls)
+        if (
+            m.matches(node, isinstance_or | issubclass_or)
+            and node.left.func.value == node.right.func.value  # Same function
+            and node.left.args[0].value.value
+            == node.right.args[0].value.value  # Same first argument (instance)
+        ):
+            return True
+
+        # Check for chained case: isinstance(x, cls1) or isinstance(x, cls2) or isinstance(x, cls3) or ...
+        chained_or = m.BooleanOperation(
+            left=m.BooleanOperation(operator=m.Or()),
+            operator=m.Or(),
+            right=isinstance_call | issubclass_call,
+        )
+        if m.matches(node, chained_or):
+            return all(
+                (
+                    call.func.value == node.right.func.value  # Same function
+                    and call.args[0].value.value
+                    == node.right.args[0].value.value  # Same first argument (instance)
+                )
+                for call in extract_boolean_operands(node, ensure_type=cst.Call)
+            )
+
+        # No match
+        return False
@@ -0,0 +1,135 @@
+import pytest
+
+from codemodder.codemods.test import BaseCodemodTest
+from core_codemods.combine_isinstance_issubclass import CombineIsinstanceIssubclass
+
+each_func = pytest.mark.parametrize("func", ["isinstance", "issubclass"])
+
+
+class TestCombineIsinstanceIssubclass(BaseCodemodTest):
+    codemod = CombineIsinstanceIssubclass
+
+    def test_name(self):
+        assert self.codemod.name == "combine-isinstance-issubclass"
+
+    @each_func
+    def test_combine(self, tmpdir, func):
+        input_code = f"""
+        {func}(x, str) or {func}(x, bytes)
+        """
+        expected = f"""
+        {func}(x, (str, bytes))
+        """
+        self.run_and_assert(tmpdir, input_code, expected)
+
+    @pytest.mark.parametrize(
+        "code",
+        [
+            "isinstance(x, str)",
+            "isinstance(x, (str, bytes))",
+            "isinstance(x, str) and isinstance(x, bytes)",
+            "isinstance(x, str) and isinstance(x, str) or True",
+            "isinstance(x, str) or issubclass(x, str)",
+            "isinstance(x, str) or isinstance(y, str)",
+        ],
+    )
+    def test_no_change(self, tmpdir, code):
+        self.run_and_assert(tmpdir, code, code)
+
+    def test_exclude_line(self, tmpdir):
+        input_code = (
+            expected
+        ) = """
+        x = "foo"
+        isinstance(x, str) or isinstance(x, bytes)
+        """
+        lines_to_exclude = [3]
+        self.run_and_assert(
+            tmpdir,
+            input_code,
+            expected,
+            lines_to_exclude=lines_to_exclude,
+        )
+
+    def _format_func_run_test(self, tmpdir, func, input_code, expected, num_changes=1):
+        self.run_and_assert(
+            tmpdir,
+            input_code.replace("{func}", func),
+            expected.replace("{func}", func),
+            num_changes,
+        )
+
+    @each_func
+    @pytest.mark.parametrize(
+        "input_code, expected",
+        [
+            # Tuple on the left
+            (
+                "{func}(x, (str, bytes)) or {func}(x, bytearray)",
+                "{func}(x, (str, bytes, bytearray))",
+            ),
+            # Tuple on the right
+            (
+                "{func}(x, str) or {func}(x, (bytes, bytearray))",
+                "{func}(x, (str, bytes, bytearray))",
+            ),
+            # Tuple on both sides no duplicates
+            (
+                "{func}(x, (str, bytes)) or {func}(x, (bytearray, memoryview))",
+                "{func}(x, (str, bytes, bytearray, memoryview))",
+            ),
+            # Tuple on both sides with duplicates
+            (
+                "{func}(x, (str, bytes)) or {func}(x, (str, bytes, bytearray))",
+                "{func}(x, (str, bytes, bytearray))",
+            ),
+        ],
+    )
+    def test_combine_tuples(self, tmpdir, func, input_code, expected):
+        self._format_func_run_test(tmpdir, func, input_code, expected)
+
+    @each_func
+    @pytest.mark.parametrize(
+        "input_code, expected",
+        [
+            # 3 cst.Names
+            (
+                "{func}(x, str) or {func}(x, bytes) or {func}(x, bytearray)",
+                "{func}(x, (str, bytes, bytearray))",
+            ),
+            # 4 cst.Names
+            (
+                "{func}(x, str) or {func}(x, bytes) or {func}(x, bytearray) or {func}(x, some_type)",
+                "{func}(x, (str, bytes, bytearray, some_type))",
+            ),
+            # 5 cst.Names
+            (
+                "{func}(x, str) or {func}(x, bytes) or {func}(x, bytearray) or {func}(x, some_type) or {func}(x, another_type)",
+                "{func}(x, (str, bytes, bytearray, some_type, another_type))",
+            ),
+            # 2 cst.Names and 1 cst.Tuple
+            (
+                "{func}(x, str) or {func}(x, bytes) or {func}(x, (bytearray, memoryview))",
+                "{func}(x, (str, bytes, bytearray, memoryview))",
+            ),
+            # 2 cst.Name and 2 cst.Tuples
+            (
+                "{func}(x, str) or {func}(x, (bytes, bytearray)) or {func}(x, (memoryview, bytearray)) or {func}(x, list)",
+                "{func}(x, (str, bytes, bytearray, memoryview, list))",
+            ),
+            # 3 cst.Tuples
+            (
+                "{func}(x, (str, bytes)) or {func}(x, (bytes, bytearray)) or {func}(x, (bytearray, memoryview))",
+                "{func}(x, (str, bytes, bytearray, memoryview))",
+            ),
+            # 4 cst.Tuples
+            (
+                "{func}(x, (str, bytes)) or {func}(x, (bytes, bytearray)) or {func}(x, (bytearray, memoryview)) or {func}(x, (memoryview, str))",
+                "{func}(x, (str, bytes, bytearray, memoryview))",
+            ),
+        ],
+    )
+    def test_more_than_two_calls(self, tmpdir, func, input_code, expected):
+        self._format_func_run_test(
+            tmpdir, func, input_code, expected, input_code.count(" or ")
+        )