Skip to content

Commit

Permalink
some lint
Browse files Browse the repository at this point in the history
  • Loading branch information
tssweeney committed Jul 31, 2024
1 parent a739c45 commit 2a16c0e
Showing 1 changed file with 12 additions and 15 deletions.
27 changes: 12 additions & 15 deletions weave/flow/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,6 @@ class EvaluationResults(weave.Object):
rows: weave.Table


def make_evaluation_results(eval_rows: list[dict]) -> EvaluationResults:
# The need for this pattern is quite unfortunate and highlights a gap in our
# data model. As a user, i just want to pass a list of data `eval_rows` to
# summarize. Under the hood, Weave should choose the appropriate storage
# format (in this case `Table`) and serialize it that way. Right now, it is
# just a huge list of dicts. The fact that "as a user" I need to construct
# `weave.Table` at all is a leaky abstraction. Moreover, the need to
# construct `EvaluationResults` just so that tracing and the UI works is
# also bad. In the near-term, this will at least solve the problem of
# breaking summarization with big datasets, but this is not the correct
# long-term play.
return EvaluationResults(rows=weave.Table(eval_rows))


class Evaluation(Object):
"""
Sets up an evaluation which includes a set of scorers and a dataset.
Expand Down Expand Up @@ -331,7 +317,18 @@ async def eval_example(example: dict) -> dict:
eval_row["scores"][scorer_name] = {}
eval_rows.append(eval_row)

summary = await self.summarize(make_evaluation_results(eval_rows))
# The need for this pattern is quite unfortunate and highlights a gap in our
# data model. As a user, I just want to pass a list of data `eval_rows` to
# summarize. Under the hood, Weave should choose the appropriate storage
# format (in this case `Table`) and serialize it that way. Right now, it is
# just a huge list of dicts. The fact that "as a user" I need to construct
# `weave.Table` at all is a leaky abstraction. Moreover, the need to
# construct `EvaluationResults` just so that tracing and the UI works is
# also bad. In the near-term, this will at least solve the problem of
# breaking summarization with big datasets, but this is not the correct
# long-term solution.
eval_results = EvaluationResults(rows=weave.Table(eval_rows))
summary = await self.summarize(eval_results)

print("Evaluation summary", summary)

Expand Down

0 comments on commit 2a16c0e

Please sign in to comment.