Skip to content

Commit

Permalink
rename model_output -> output in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tcapelle committed Oct 12, 2024
1 parent 831db85 commit 23dcd6a
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion tests/trace/test_client_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,7 +1443,7 @@ def test_named_reuse(client):
dataset = weave.ref(d_ref.uri()).get()

@weave.op()
async def dummy_score(model_output):
async def dummy_score(output):
return 1

class SimpleModel(weave.Model):
Expand Down
22 changes: 11 additions & 11 deletions tests/trace/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


expected_eval_result = {
"model_output": {"mean": 9.5},
"output": {"mean": 9.5},
"score": {"true_count": 1, "true_fraction": 0.5},
"model_latency": {"mean": pytest.approx(0, abs=1)},
}
Expand All @@ -24,8 +24,8 @@ async def predict(self, input) -> str:


@weave.op()
def score(target, model_output):
return target == model_output
def score(target, output):
return target == output


@weave.op()
Expand Down Expand Up @@ -57,7 +57,7 @@ async def model_predict(input, target) -> str:
)
result = asyncio.run(evaluation.evaluate(model_predict))
assert result == {
"model_output": {"mean": 18.5},
"output": {"mean": 18.5},
"score": {"true_count": 0, "true_fraction": 0.0},
"model_latency": {
"mean": pytest.approx(0, abs=1),
Expand Down Expand Up @@ -111,8 +111,8 @@ async def infer(self, input) -> str:
def test_score_as_class(client):
class MyScorer(weave.Scorer):
@weave.op()
def score(self, target, model_output):
return target == model_output
def score(self, target, output):
return target == output

evaluation = Evaluation(
dataset=dataset_rows,
Expand All @@ -121,7 +121,7 @@ def score(self, target, model_output):
model = EvalModel()
result = asyncio.run(evaluation.evaluate(model))
assert result == {
"model_output": {"mean": 9.5},
"output": {"mean": 9.5},
"MyScorer": {"true_count": 1, "true_fraction": 0.5},
"model_latency": {
"mean": pytest.approx(0, abs=1),
Expand All @@ -137,8 +137,8 @@ def summarize(self, score_rows):
return {"awesome": 3}

@weave.op()
def score(self, target, model_output):
return target == model_output
def score(self, target, output):
return target == output

evaluation = Evaluation(
dataset=dataset_rows,
Expand All @@ -147,7 +147,7 @@ def score(self, target, model_output):
model = EvalModel()
result = asyncio.run(evaluation.evaluate(model))
assert result == {
"model_output": {"mean": 9.5},
"output": {"mean": 9.5},
"MyScorer": {"awesome": 3},
"model_latency": {
"mean": pytest.approx(0, abs=1),
Expand All @@ -167,7 +167,7 @@ def return_pred(pred):

result = asyncio.run(evaluation.evaluate(return_pred))
assert result == {
"model_output": {
"output": {
"a": {"true_count": 1, "true_fraction": 1.0},
"b": {"true_count": 0, "true_fraction": 0.0},
},
Expand Down
4 changes: 2 additions & 2 deletions tests/trace/test_evaluation_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def predict(question: str):
return "I don't know"

@weave.op()
def score(question: str, expected: str, model_output: str):
return model_output == expected
def score(question: str, expected: str, output: str):
return output == expected

evaluation = weave.Evaluation(
name="My Evaluation",
Expand Down
14 changes: 7 additions & 7 deletions tests/trace/test_weave_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ async def model_predict(input) -> str:
dataset_rows = [{"input": "1 + 2", "target": 3}, {"input": "2**4", "target": 15}]

@weave.op()
async def score(target, model_output):
return target == model_output
async def score(target, output):
return target == output

evaluation = Evaluation(
name="my-eval",
Expand Down Expand Up @@ -747,8 +747,8 @@ async def model_predict(input) -> str:
dataset_rows = [{"input": "1 + 2", "target": 3}, {"input": "2**4", "target": 15}]

@weave.op()
async def score(target, model_output):
return target == model_output
async def score(target, output):
return target == output

evaluation = Evaluation(
name="my-eval",
Expand All @@ -757,7 +757,7 @@ async def score(target, model_output):
)
result = asyncio.run(evaluation.evaluate(model_predict))
expected_eval_result = {
"model_output": {"mean": 9.5},
"output": {"mean": 9.5},
"score": {"true_count": 1, "true_fraction": 0.5},
}
assert result == expected_eval_result
Expand Down Expand Up @@ -857,8 +857,8 @@ def test_nested_ref_is_inner(client):
dataset_rows = [{"input": "1 + 2", "target": 3}, {"input": "2**4", "target": 15}]

@weave.op()
async def score(target, model_output):
return target == model_output
async def score(target, output):
return target == output

evaluation = Evaluation(
name="my-eval",
Expand Down

0 comments on commit 23dcd6a

Please sign in to comment.