Skip to content

Commit

Permalink
[AIC-py][eval] make e2e test a little more robust (#435)
Browse files Browse the repository at this point in the history
[AIC-py][eval] make e2e test a little more robust

---
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with
[ReviewStack](https://reviewstack.dev/lastmile-ai/aiconfig/pull/435).
* __->__ #435
* #432
  • Loading branch information
jonathanlastmileai authored Dec 8, 2023
2 parents a4376d1 + 24e1394 commit 85fa6fa
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions python/tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ async def test_run_test_suite_with_inputs(data: st.DataObject):
)
)

input_data, _ = cu.unzip(user_test_suite_with_inputs)

mock_aiconfig = MockAIConfigRuntime()

out = await run_test_suite_helper(
Expand All @@ -166,13 +164,25 @@ async def test_run_test_suite_with_inputs(data: st.DataObject):
"best_possible_value",
"worst_possible_value",
]
inputs = df["input"].astype(str).tolist() # type: ignore[no-untyped-call]
assert set(inputs) == set(input_data) # type: ignore[no-untyped-call]

input_pairs = {
(input_datum, metric.interpretation.id)
for input_datum, metric in user_test_suite_with_inputs
}
result_pairs = set( # type: ignore[no-untyped-call]
df[["input", "metric_id"]].itertuples(index=False, name=None) # type: ignore[no-untyped-call]
)

assert input_pairs == result_pairs

df_brevity = df[df["metric_name"] == "brevity"]
assert (
df_brevity["aiconfig_output"].apply(len) # type: ignore[no-untyped-call]
== df_brevity["value"] # type: ignore[no-untyped-call]
).all()

df_substring = df[df["metric_name"] == "substring_match"]
assert (df_substring["value"].apply(lambda x: x in {0.0, 0.1})).all() # type: ignore[no-untyped-call]

case Err(e):
assert False, f"expected Ok, got Err({e})"

0 comments on commit 85fa6fa

Please sign in to comment.