diff --git a/README.md b/README.md
index 85b37ee46..d69f84b01 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Aleph Alpha Intelligence Layer ☯️
 
-The  Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs).
+The Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs).
 With a unified framework for LLM-based workflows, it facilitates seamless AI product development, from prototyping and prompt experimentation to result evaluation and deployment.
 
 The key features of the Intelligence Layer are:
@@ -77,8 +77,8 @@ Set your access token:
 ```bash
 GITHUB_TOKEN=<YOUR_GITHUB_TOKEN>
 ```
-We recommend setting up a dedicated virtual environment. You can do so by using [conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [venv](https://docs.python.org/3/library/venv.html).
 
+We recommend setting up a dedicated virtual environment. You can do so by using [conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) or [venv](https://docs.python.org/3/library/venv.html).
 
 Let's install the package:
 
@@ -118,6 +118,10 @@ From here, you can customize everything, including the prompt, model, and more i
 This not only saves you time but also ensures you're building on a tried and tested foundation.
 Therefore, think of these use-cases as stepping stones, guiding you towards crafting tailored solutions that best fit your unique requirements.
 
+## References
+
+- Full documentation: https://glowing-tribble-223446r.pages.github.io/
+
 ## License
 
 This project can only be used after signing the agreement with Aleph Alpha®. Please refer to the [LICENSE](LICENSE.md) file for more details.
diff --git a/src/intelligence_layer/core/evaluator.py b/src/intelligence_layer/core/evaluator.py
index 8f9656097..b41805859 100644
--- a/src/intelligence_layer/core/evaluator.py
+++ b/src/intelligence_layer/core/evaluator.py
@@ -61,13 +61,35 @@ def evaluate(
         logger: DebugLogger,
         expected_output: ExpectedOutput,
     ) -> Evaluation:
-        """Executes the evaluation for this use-case."""
+        """Executes the evaluation for this use-case.
+        
+        Arguments:
+            input: Interface to be passed to the task that shall be evaluated.
+            logger: Debug logger used for tracing of tasks.
+            expected_output: Output that is expected from the task run with the supplied input.
+        Returns:
+            Evaluation: interface of the metrics that come from the evaluated task.
+
+        The implementation of this method is responsible for running a task (usually supplied by the __init__ method)
+        and making any comparisons relevant to the evaluation. 
+        Based on the results, it should create an `Evaluation` class with all the metrics and return it.
+        """
         pass
 
     def evaluate_dataset(
         self, dataset: Dataset[Input, ExpectedOutput], logger: DebugLogger
     ) -> AggregatedEvaluation:
-        """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`."""
+        """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`.
+        
+        Arguments:
+            dataset: Dataset that will be used to evaluate a task.
+            logger: Logger used for tracing.
+        Returns:
+            AggregatedEvaluation: The aggregated results of an evaluation run with a dataset.
+
+        This will call the `run` method for each example in the dataset.
+        Finally, it will call the `aggregate` method and return the aggregated results.
+        """
         with ThreadPoolExecutor(max_workers=10) as executor:
             evaluations = list(
                 tqdm(
@@ -87,5 +109,14 @@ def evaluate_dataset(
 
     @abstractmethod
     def aggregate(self, evaluations: Sequence[Evaluation]) -> AggregatedEvaluation:
-        """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`."""
+        """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`.
+        
+        Arguments:
+            evalautions: The results from running `evaluate_dataset` with a task. 
+        Returns:
+            AggregatedEvaluation: The aggregated results of an evaluation run with a dataset.
+
+        This method is responsible for taking the results of an evaluation run and aggregating all the results.
+        It should create an `AggregatedEvaluation` class and return it at the end. 
+        """
         pass
diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py
index 2f9fccaad..4da6ffbc0 100644
--- a/src/intelligence_layer/core/task.py
+++ b/src/intelligence_layer/core/task.py
@@ -91,7 +91,17 @@ def inner(
 
     @abstractmethod
     def run(self, input: Input, logger: DebugLogger) -> Output:
-        """Executes the process for this use-case."""
+        """Executes the implementation of run for this use case.
+
+        Args:
+            input: Generic input defined by the task implementation 
+        Returns:
+            output: Generic output defined by the task implementation 
+
+        This takes an input and runs the implementation to generate an output. 
+        It takes a `DebugLogger` for tracing of the process.
+        The Input and Output are logged by default.
+        """
         ...
 
     def run_concurrently(