diff --git a/Carrot-Assistant/evaluation/eval_tests.py b/Carrot-Assistant/evaluation/eval_tests.py index 8bfc43d..64932fd 100644 --- a/Carrot-Assistant/evaluation/eval_tests.py +++ b/Carrot-Assistant/evaluation/eval_tests.py @@ -1,3 +1,4 @@ +from typing import Dict from evaluation.evaltypes import ( SingleResultPipelineTest, SingleResultMetric, @@ -6,16 +7,60 @@ class LLMPipelineTest(SingleResultPipelineTest): + """ + This class provides a pipeline test for LLM pipelines that return a single result + """ + def __init__( self, name: str, pipeline: LLMPipeline, metrics: list[SingleResultMetric], ): + """ + Initialises the LLMPipelineTest class + + Parameters + ---------- + name: str + Name given to the test + pipeline: LLMPipeline + The pipeline used to generate output + metrics: list[SingleResultMetric] + A list of metrics used to compare the pipeline output with the expected output + """ super().__init__(name, pipeline, metrics) - def run_pipeline(self, input_data): + def run_pipeline(self, input_data) -> str: + """ + Runs the provided pipeline on the input_data + + Parameters + ---------- + input_data + The data used for input to the pipeline + + Returns + ------- + str + The reply from the pipeline + """ return super().run_pipeline(input_data) - def evaluate(self, input_data, expected_output): + def evaluate(self, input_data, expected_output) -> Dict: + """ + Evaluates the attached pipeline's output against the expected output using the metrics + + Parameters + ---------- + input_data + The data used for input to the pipeline + expected_output + The expected result of running the input data through the pipeline + + Returns + ------- + Dict + A dictionary of results from evaluating the pipeline. + """ return super().evaluate(input_data, expected_output) diff --git a/Carrot-Assistant/tests/test_evals.py b/Carrot-Assistant/tests/test_evals.py index a8fc99c..3969788 100644 --- a/Carrot-Assistant/tests/test_evals.py +++ b/Carrot-Assistant/tests/test_evals.py @@ -104,6 +104,7 @@ def test_pipeline_called_from_eval_returns_string(self, llm_pipeline_test): def test_llm_pipelinetest_evaluates(self, llm_pipeline_test): model_eval = llm_pipeline_test.evaluate( + name="Testing the parrot pipeline", input_data={"input_sentence": "Polly wants a cracker"}, expected_output="Polly wants a cracker", )