From 2a16c0e2d2bc6f5e6b50a0cfbb5d9edcc5e50b59 Mon Sep 17 00:00:00 2001 From: Tim Sweeney Date: Wed, 31 Jul 2024 12:51:20 -0700 Subject: [PATCH] some lint --- weave/flow/eval.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/weave/flow/eval.py b/weave/flow/eval.py index 273562ba6f3..8cdee87da4b 100644 --- a/weave/flow/eval.py +++ b/weave/flow/eval.py @@ -52,20 +52,6 @@ class EvaluationResults(weave.Object): rows: weave.Table -def make_evaluation_results(eval_rows: list[dict]) -> EvaluationResults: - # The need for this pattern is quite unfortunate and highlights a gap in our - # data model. As a user, i just want to pass a list of data `eval_rows` to - # summarize. Under the hood, Weave should choose the appropriate storage - # format (in this case `Table`) and serialize it that way. Right now, it is - # just a huge list of dicts. The fact that "as a user" I need to construct - # `weave.Table` at all is a leaky abstraction. Moreover, the need to - # construct `EvaluationResults` just so that tracing and the UI works is - # also bad. In the near-term, this will at least solve the problem of - # breaking summarization with big datasets, but this is not the correct - # long-term play. - return EvaluationResults(rows=weave.Table(eval_rows)) - - class Evaluation(Object): """ Sets up an evaluation which includes a set of scorers and a dataset. @@ -331,7 +317,18 @@ async def eval_example(example: dict) -> dict: eval_row["scores"][scorer_name] = {} eval_rows.append(eval_row) - summary = await self.summarize(make_evaluation_results(eval_rows)) + # The need for this pattern is quite unfortunate and highlights a gap in our + # data model. As a user, I just want to pass a list of data `eval_rows` to + # summarize. Under the hood, Weave should choose the appropriate storage + # format (in this case `Table`) and serialize it that way. Right now, it is + # just a huge list of dicts. The fact that "as a user" I need to construct + # `weave.Table` at all is a leaky abstraction. Moreover, the need to + # construct `EvaluationResults` just so that tracing and the UI works is + # also bad. In the near-term, this will at least solve the problem of + # breaking summarization with big datasets, but this is not the correct + # long-term solution. + eval_results = EvaluationResults(rows=weave.Table(eval_rows)) + summary = await self.summarize(eval_results) print("Evaluation summary", summary)