diff --git a/README.md b/README.md index 85b37ee46..d69f84b01 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Aleph Alpha Intelligence Layer ☯️ -The Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs). +The Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs). With a unified framework for LLM-based workflows, it facilitates seamless AI product development, from prototyping and prompt experimentation to result evaluation and deployment. The key features of the Intelligence Layer are: @@ -77,8 +77,8 @@ Set your access token: ```bash GITHUB_TOKEN= ``` -We recommend setting up a dedicated virtual environment. You can do so by using [conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [venv](https://docs.python.org/3/library/venv.html). +We recommend setting up a dedicated virtual environment. You can do so by using [conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [venv](https://docs.python.org/3/library/venv.html). Let's install the package: @@ -118,6 +118,10 @@ From here, you can customize everything, including the prompt, model, and more i This not only saves you time but also ensures you're building on a tried and tested foundation. Therefore, think of these use-cases as stepping stones, guiding you towards crafting tailored solutions that best fit your unique requirements. +## References + +- Full documentation: https://glowing-tribble-223446r.pages.github.io/ + ## License This project can only be used after signing the agreement with Aleph Alpha®. Please refer to the [LICENSE](LICENSE.md) file for more details. diff --git a/src/intelligence_layer/core/evaluator.py b/src/intelligence_layer/core/evaluator.py index 8f9656097..b41805859 100644 --- a/src/intelligence_layer/core/evaluator.py +++ b/src/intelligence_layer/core/evaluator.py @@ -61,13 +61,35 @@ def evaluate( logger: DebugLogger, expected_output: ExpectedOutput, ) -> Evaluation: - """Executes the evaluation for this use-case.""" + """Executes the evaluation for this use-case. + + Arguments: + input: Interface to be passed to the task that shall be evaluated. + logger: Debug logger used for tracing of tasks. + expected_output: Output that is expected from the task run with the supplied input. + Returns: + Evaluation: interface of the metrics that come from the evaluated task. + + The implementation of this method is responsible for running a task (usually supplied by the __init__ method) + and making any comparisons relevant to the evaluation. + Based on the results, it should create an `Evaluation` class with all the metrics and return it. + """ pass def evaluate_dataset( self, dataset: Dataset[Input, ExpectedOutput], logger: DebugLogger ) -> AggregatedEvaluation: - """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`.""" + """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`. + + Arguments: + dataset: Dataset that will be used to evaluate a task. + logger: Logger used for tracing. + Returns: + AggregatedEvaluation: The aggregated results of an evaluation run with a dataset. + + This will call the `run` method for each example in the dataset. + Finally, it will call the `aggregate` method and return the aggregated results. + """ with ThreadPoolExecutor(max_workers=10) as executor: evaluations = list( tqdm( @@ -87,5 +109,14 @@ def evaluate_dataset( @abstractmethod def aggregate(self, evaluations: Sequence[Evaluation]) -> AggregatedEvaluation: - """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`.""" + """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`. + + Arguments: + evalautions: The results from running `evaluate_dataset` with a task. + Returns: + AggregatedEvaluation: The aggregated results of an evaluation run with a dataset. + + This method is responsible for taking the results of an evaluation run and aggregating all the results. + It should create an `AggregatedEvaluation` class and return it at the end. + """ pass diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py index 2f9fccaad..4da6ffbc0 100644 --- a/src/intelligence_layer/core/task.py +++ b/src/intelligence_layer/core/task.py @@ -91,7 +91,17 @@ def inner( @abstractmethod def run(self, input: Input, logger: DebugLogger) -> Output: - """Executes the process for this use-case.""" + """Executes the implementation of run for this use case. + + Args: + input: Generic input defined by the task implementation + Returns: + output: Generic output defined by the task implementation + + This takes an input and runs the implementation to generate an output. + It takes a `DebugLogger` for tracing of the process. + The Input and Output are logged by default. + """ ... def run_concurrently(