Skip to content

Commit

Permalink
initial implementation for fluid interface check
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Dec 9, 2023
1 parent 435963c commit bb8e5f8
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 0 deletions.
107 changes: 107 additions & 0 deletions refurb/checks/readability/fluid_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from dataclasses import dataclass

from mypy.nodes import (
Block,
Statement,
AssignmentStmt,
MypyFile,
CallExpr,
MemberExpr,
NameExpr,
)

from refurb.checks.common import check_block_like
from refurb.error import Error


@dataclass
class ErrorInfo(Error):
r"""When an API has a Fluent Interface (the ability to chain multiple calls together), you should chain those calls
instead of repeatedly assigning and using the value.
Sometimes a return statement can be written more succinctly:
Bad:
```python
def get_tensors(device: str) -> torch.Tensor:
a = torch.ones(2, 1)
a = a.long()
a = a.to(device)
return a
def process(file_name: str):
common_columns = ["col1_renamed", "col2_renamed", "custom_col"]
df = spark.read.parquet(file_name)
df = df \
.withColumnRenamed('col1', 'col1_renamed') \
.withColumnRenamed('col2', 'col2_renamed')
df = df \
.select(common_columns) \
.withColumn('service_type', F.lit('green'))
return df
```
Good:
```python
def get_tensors(device: str) -> torch.Tensor:
a = (
torch.ones(2, 1)
.long()
.to(device)
)
return a
def process(file_name: str):
common_columns = ["col1_renamed", "col2_renamed", "custom_col"]
df = (
spark.read.parquet(file_name)
.withColumnRenamed('col1', 'col1_renamed')
.withColumnRenamed('col2', 'col2_renamed')
.select(common_columns)
.withColumn('service_type', F.lit('green'))
)
return df
```
"""

name = "use-fluid-interface"
code = 184
categories = ("readability",)


def check(node: Block | MypyFile, errors: list[Error]) -> None:
check_block_like(check_stmts, node, errors)


def check_call(node) -> bool:
match node:
# Single chain
case CallExpr(callee=MemberExpr(expr=NameExpr(name=x), name=y)):
return True
# Nested
case CallExpr(callee=MemberExpr(expr=call_node, name=y)):
return check_call(call_node)

return False


def check_stmts(stmts: list[Statement], errors: list[Error]) -> None:
last = ""

for stmt in stmts:
match stmt:
case AssignmentStmt(lvalues=[NameExpr(name=name)], rvalue=rvalue):
if last and f"{last}'" == name and check_call(rvalue):
errors.append(
ErrorInfo.from_node(
stmt,
f"Assignment statements should be chained",
)
)

last = name

case _:
last = ""
82 changes: 82 additions & 0 deletions test/data/err_184.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
class torch:
@staticmethod
def ones(*args):
return torch

@staticmethod
def long():
return torch

@staticmethod
def to(device: str):
return torch.Tensor()

class Tensor:
pass


def transform(x):
return x


class spark:
class read:
@staticmethod
def parquet(file_name: str):
return spark.DataFrame()

class functions:
@staticmethod
def lit(constant):
return constant

@staticmethod
def col(col_name):
return col_name

class DataFrame:
@staticmethod
def withColumnRenamed(col_in, col_out):
return spark.DataFrame()

@staticmethod
def withColumn(col_in, col_out):
return spark.DataFrame()

@staticmethod
def select(*args):
return spark.DataFrame()

# these will match
def get_tensors(device: str) -> torch.Tensor:
a = torch.ones(2, 1)
a = a.long()
a = a.to(device)
return a


def process(file_name: str):
common_columns = ["col1_renamed", "col2_renamed", "custom_col"]
df = spark.read.parquet(file_name)
df = df \
.withColumnRenamed('col1', 'col1_renamed') \
.withColumnRenamed('col2', 'col2_renamed')
df = df \
.select(common_columns) \
.withColumn('service_type', spark.functions.lit('green'))
return df


def projection(df_in: spark.DataFrame) -> spark.DataFrame:
df = (
df_in.select(["col1", "col2"])
.withColumnRenamed("col1", "col1a")
)
return df.withColumn("col2a", spark.functions.col("col2").cast("date"))


# these will not
def no_match():
y = 10
y = transform(y)
return y
4 changes: 4 additions & 0 deletions test/data/err_184.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
test/data/err_184.py:53:5 [FURB184]: Assignment statements should be chained
test/data/err_184.py:54:5 [FURB184]: Assignment statements should be chained
test/data/err_184.py:61:5 [FURB184]: Assignment statements should be chained
test/data/err_184.py:64:5 [FURB184]: Assignment statements should be chained

0 comments on commit bb8e5f8

Please sign in to comment.