diff --git a/evaluator-example/README.md b/evaluator-example/README.md
new file mode 100644
index 000000000..7897b4aa5
--- /dev/null
+++ b/evaluator-example/README.md
@@ -0,0 +1,38 @@
+## Evaluator Example
+
+This is an example of how to use the evaluator. The evaluator is a tool that 
+can be used to evaluate the performance of a model on a dataset. 
+It can be used to evaluate the performance of a model on a dataset, 
+or to compare the performance of multiple models on a dataset.
+
+This module contains an example that you only have to copy to your project and 
+adapt to your needs. 
+
+### Pre-requisites
+
+You need to have the following installed:
+
+- [Install Poetry](https://python-poetry.org/docs/#installing-with-pipx) 
+- **Python 3.10.0:** you can configure it with virtualenv
+```bash
+virtualenv venv --python=python3.10.0.
+source venv/bin/activate.
+```
+
+When you have Poetry installed, you can install the dependencies. You have to
+move to `evalTest` folder and execute the following command:
+
+```bash
+poetry install
+```
+
+### Usage
+
+To try this example, you can run the following command:
+
+```bash
+./gradlew evaluator
+```
+
+After running the command, you will have the results saved 
+in a web, that you can see opening the file: `evalTest/index.html`
diff --git a/evaluator-example/build.gradle.kts b/evaluator-example/build.gradle.kts
new file mode 100644
index 000000000..2694e6891
--- /dev/null
+++ b/evaluator-example/build.gradle.kts
@@ -0,0 +1,60 @@
+import java.io.OutputStream
+
+plugins {
+    id(libs.plugins.kotlin.jvm.get().pluginId)
+    id(libs.plugins.kotlinx.serialization.get().pluginId)
+    alias(libs.plugins.spotless)
+}
+
+repositories { mavenCentral() }
+
+java {
+    sourceCompatibility = JavaVersion.VERSION_11
+    targetCompatibility = JavaVersion.VERSION_11
+    toolchain { languageVersion = JavaLanguageVersion.of(11) }
+}
+
+dependencies {
+    implementation(projects.xefCore)
+    implementation(projects.xefOpenai)
+    implementation(projects.xefEvaluator)
+    implementation(libs.suspendApp.core)
+    implementation(libs.bundles.arrow)
+}
+
+spotless {
+    kotlin {
+        target("**/*.kt")
+        ktfmt().googleStyle().configure { it.setRemoveUnusedImport(true) }
+    }
+}
+
+tasks.create<JavaExec>("test-example") {
+    dependsOn("compileKotlin")
+
+    workingDir("./evalTest")
+
+    group = "Execution"
+    description = "Test example"
+    classpath = sourceSets.main.get().runtimeClasspath
+    mainClass = "com.xebia.funcional.xef.evaluator.examples.TestExample"
+
+    doLast {
+        println(">> data.json created!")
+    }
+}
+
+tasks.create<Exec>("evaluator") {
+    dependsOn("test-example")
+
+    this.standardOutput = OutputStream.nullOutputStream()
+
+    workingDir("./evalTest")
+
+    commandLine("poetry", "run", "deepeval", "test", "run", "py-evaluator/test_evaluator.py")
+
+    doLast {
+        println(">> Open evalTest/publish/index.html in your browser")
+    }
+}
+
diff --git a/evaluator-example/evalTest/.gitignore b/evaluator-example/evalTest/.gitignore
new file mode 100644
index 000000000..8bbfc017c
--- /dev/null
+++ b/evaluator-example/evalTest/.gitignore
@@ -0,0 +1,6 @@
+__pycache__
+results.json
+data.json
+publish/content.js
+.pytest_cache
+poetry.lock
diff --git a/evaluator-example/evalTest/publish/index.html b/evaluator-example/evalTest/publish/index.html
new file mode 100644
index 000000000..eae13b286
--- /dev/null
+++ b/evaluator-example/evalTest/publish/index.html
@@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html lang="es">
+<head>
+    <meta charset="UTF-8">
+    <title>Tests</title>
+    <link rel="stylesheet" href="styles.css">
+    <script src="content.js"></script>
+    <script src="script.js" defer></script>
+</head>
+<body>
+<div id="test-container"></div>
+</body>
+</html>
diff --git a/evaluator-example/evalTest/publish/script.js b/evaluator-example/evalTest/publish/script.js
new file mode 100644
index 000000000..000ec719f
--- /dev/null
+++ b/evaluator-example/evalTest/publish/script.js
@@ -0,0 +1,60 @@
+document.addEventListener('DOMContentLoaded', function() {
+
+    const container = document.getElementById('test-container');
+    const summaryDiv = document.createElement('div');
+    summaryDiv.classList.add('test-summary');
+
+    testData.results.forEach(block => {
+        const blockDiv = document.createElement('div');
+        blockDiv.classList.add('test-block');
+
+        const title = document.createElement('h2');
+        title.classList.add('test-title');
+        title.textContent = block.description;
+        blockDiv.appendChild(title);
+
+        block.tests.forEach(test => {
+            const inputDiv = document.createElement('div');
+            inputDiv.classList.add(test.assert ? 'input-passed' : 'input-failed');
+            inputDiv.textContent = 'Input: ' + test.input;
+            blockDiv.appendChild(inputDiv);
+
+            const outputDiv = document.createElement('div');
+            outputDiv.classList.add('output');
+            outputDiv.textContent = 'Output: ' + test.output;
+            outputDiv.addEventListener('click', function() {
+                this.classList.toggle('expanded');
+            });
+            blockDiv.appendChild(outputDiv);
+
+            const scoreDiv = document.createElement('div');
+            scoreDiv.classList.add('score', test.assert ? 'score-passed' : 'score-failed');
+            scoreDiv.textContent = 'Score: ' + test.score.toFixed(3);
+            blockDiv.appendChild(scoreDiv);
+        });
+
+        const avgScoreDiv = document.createElement('div');
+        avgScoreDiv.classList.add('avg-score');
+        avgScoreDiv.textContent = 'Average Score: ' + block.avg.toFixed(3);
+        blockDiv.appendChild(avgScoreDiv);
+
+        const testInfoDiv = document.createElement('div');
+        testInfoDiv.classList.add('test-info');
+        testInfoDiv.innerHTML = `
+            Tests Passed: ${block.tests_successful} <br>
+            Tests Failed: ${block.tests_failures} <br>
+            Success Rate: ${block.success_rate.toFixed(2)}%
+        `;
+        blockDiv.appendChild(testInfoDiv);
+
+        container.appendChild(blockDiv);
+
+        summaryDiv.innerHTML += `
+            <h3>${block.description}</h3>
+            Average Score: ${block.avg.toFixed(3)} <br>
+            Success Rate: ${block.success_rate.toFixed(2)}% <br><br>
+        `;
+    });
+
+    container.appendChild(summaryDiv);
+});
diff --git a/evaluator-example/evalTest/publish/styles.css b/evaluator-example/evalTest/publish/styles.css
new file mode 100644
index 000000000..a14683826
--- /dev/null
+++ b/evaluator-example/evalTest/publish/styles.css
@@ -0,0 +1,87 @@
+body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #f4f4f4;
+}
+
+#test-container {
+    width: 80%;
+    margin: 20px auto;
+    padding: 15px;
+    background-color: white;
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+.test-block {
+    margin-bottom: 20px;
+    border-bottom: 1px solid #eee;
+    padding-bottom: 20px;
+}
+
+.test-title {
+    font-size: 1.2em;
+    color: #333;
+}
+
+.input, .output {
+    margin: 5px 0;
+}
+
+.input-passed {
+    margin-top: 25px;
+    color: green;
+    font-weight: bold;
+}
+
+.input-failed {
+    margin-top: 25px;
+    color: red;
+    font-weight: bold;
+}
+
+.output {
+    color: #666;
+    cursor: pointer;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+
+.output.expanded {
+    white-space: normal;
+}
+
+.score {
+    font-weight: bold;
+}
+
+.score-passed {
+    margin-bottom: 25px;
+    color: #008000;
+}
+
+.score-failed {
+    margin-bottom: 25px;
+    color: red;
+}
+
+.avg-score, .test-info {
+    font-size: 1.2em;
+    color: #d35400;
+    margin-top: 10px;
+}
+
+.test-summary {
+    background-color: #e7e7e7;
+    padding: 15px;
+    margin-top: 20px;
+    border-radius: 8px;
+}
+
+.test-summary h3 {
+    font-size: 1.1em;
+    color: #555;
+    margin-top: 0;
+}
diff --git a/evaluator-example/evalTest/py-evaluator/test_evaluator.py b/evaluator-example/evalTest/py-evaluator/test_evaluator.py
new file mode 100644
index 000000000..c54cb82f8
--- /dev/null
+++ b/evaluator-example/evalTest/py-evaluator/test_evaluator.py
@@ -0,0 +1,96 @@
+from deepeval.metrics.answer_relevancy import AnswerRelevancyMetric
+from deepeval.metrics.factual_consistency import FactualConsistencyMetric
+from deepeval.test_case import LLMTestCase
+from deepeval.evaluator import execute_test
+import json
+
+f = open('data.json')
+data = json.load(f)
+
+appDescription = data['description']
+
+outputs = data['outputs_description']
+
+numberOfOutputs = len(outputs)
+minimumScore = float(data['minimum_score'])
+metric = data['metric']
+
+print()
+print()
+print(appDescription)
+print("================")
+print()
+print(f"Using {metric} metric with {numberOfOutputs} different outputs ({minimumScore} minimum score)")
+
+currentOutput = 0
+
+metricObj = FactualConsistencyMetric(minimum_score=minimumScore)
+
+if metric == "AnswerRelevancyMetric":
+    metricObj = AnswerRelevancyMetric(minimum_score=minimumScore)
+
+jsonResponse = {
+    "description": appDescription,
+}
+
+jsonItemResultResponses = []
+
+for x in range(numberOfOutputs):
+    jsonItemResponse = {
+        "description": outputs[x],
+
+    }
+    cases = []
+    for item in data['items']:
+        context = []
+        if "context" in item:
+            context = item['context']
+        cases.append(LLMTestCase(input=item['input'], actual_output=item['actual_outputs'][x], context=context))
+
+    print()
+    results = execute_test(cases, [metricObj])
+    print(f"Results: {outputs[x]}:")
+    totalScore = 0
+
+    jsonResultResponses = []
+
+    numberTestSuccessful = 0
+    for r in results:
+        score = float(r.metrics[0].score)
+        testsSuccessful = score >= minimumScore
+        jsonResultResponse = {
+            "input": r.input,
+            "output": r.actual_output,
+            "score": score,
+            "assert": testsSuccessful
+        }
+        if testsSuccessful:
+            numberTestSuccessful += 1
+        jsonResultResponses.append(jsonResultResponse)
+        totalScore += r.metrics[0].score
+        print(f"- {r.input} -> {r.metrics[0].score}")
+    avg = totalScore / len(results)
+    successRate = numberTestSuccessful * 100 / len(results)
+    jsonItemResponse["tests"] = jsonResultResponses
+    jsonItemResponse["avg"] = avg
+    jsonItemResponse["tests_successful"] = numberTestSuccessful
+    jsonItemResponse["tests_failures"] = len(results) - numberTestSuccessful
+    jsonItemResponse["success_rate"] = successRate
+    jsonItemResultResponses.append(jsonItemResponse)
+    print()
+    print(f"Average: {avg}:")
+    print(f"Success rate: {successRate}:")
+    print()
+
+jsonResponse["results"] = jsonItemResultResponses
+
+with open("results.json", "w") as outfile:
+    json.dump(jsonResponse, outfile)
+
+with open("publish/content.js", "w") as outfile:
+    jsonStr = json.dumps(jsonResponse)
+    outfile.write(f"const testData = {jsonStr};")
+
+print()
+
+f.close()
diff --git a/evaluator-example/evalTest/pyproject.toml b/evaluator-example/evalTest/pyproject.toml
new file mode 100644
index 000000000..8293197ca
--- /dev/null
+++ b/evaluator-example/evalTest/pyproject.toml
@@ -0,0 +1,13 @@
+[tool.poetry]
+name = "py-evaluator"
+version = "0.1.0"
+description = "Python evaluator for DeepEval"
+authors = ["Xef"]
+
+[tool.poetry.dependencies]
+python = "~3.10.0"
+deepeval = "0.20.19"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/evaluator-example/src/main/kotlin/com/xebia/funcional/xef/evaluator/examples/TestExample.kt b/evaluator-example/src/main/kotlin/com/xebia/funcional/xef/evaluator/examples/TestExample.kt
new file mode 100644
index 000000000..466088640
--- /dev/null
+++ b/evaluator-example/src/main/kotlin/com/xebia/funcional/xef/evaluator/examples/TestExample.kt
@@ -0,0 +1,47 @@
+package com.xebia.funcional.xef.evaluator.examples
+
+import arrow.continuations.SuspendApp
+import com.xebia.funcional.xef.evaluator.TestSpecItem
+import com.xebia.funcional.xef.evaluator.TestsSpec
+import com.xebia.funcional.xef.evaluator.models.ContextDescription
+import com.xebia.funcional.xef.evaluator.models.OutputDescription
+import com.xebia.funcional.xef.evaluator.models.OutputResponse
+import com.xebia.functional.xef.conversation.llm.openai.OpenAI
+import com.xebia.functional.xef.conversation.llm.openai.promptMessage
+import java.io.File
+
+object TestExample {
+
+  @JvmStatic
+  fun main(args: Array<String>) = SuspendApp {
+    val output: String = args.getOrNull(0) ?: "."
+
+    val file = File("$output/data.json")
+
+    val spec =
+      TestsSpec(description = "Check GTP3.5 and fake outputs") {
+        +OutputDescription("Using GPT3.5")
+        +OutputDescription("Fake outputs with errors")
+
+        +TestSpecItem("Please provide a movie title, genre and director") {
+          +ContextDescription("Contains information about a movie")
+
+          +OutputResponse { OpenAI.conversation { promptMessage(input) } }
+
+          +OutputResponse("I don't know")
+        }
+
+        +TestSpecItem("Recipe for a chocolate cake") {
+          +ContextDescription("Contains instructions for making a cake")
+
+          +OutputResponse { OpenAI.conversation { promptMessage(input) } }
+
+          +OutputResponse("The movie is Jurassic Park")
+        }
+      }
+
+    file.writeText(spec.toJSON())
+
+    println("JSON created successfully")
+  }
+}
diff --git a/evaluator/build.gradle.kts b/evaluator/build.gradle.kts
new file mode 100644
index 000000000..d84265e83
--- /dev/null
+++ b/evaluator/build.gradle.kts
@@ -0,0 +1,24 @@
+plugins {
+    id(libs.plugins.kotlin.jvm.get().pluginId)
+    id(libs.plugins.kotlinx.serialization.get().pluginId)
+    alias(libs.plugins.spotless)
+}
+
+repositories { mavenCentral() }
+
+java {
+    sourceCompatibility = JavaVersion.VERSION_11
+    targetCompatibility = JavaVersion.VERSION_11
+    toolchain { languageVersion = JavaLanguageVersion.of(11) }
+}
+
+dependencies {
+    api(libs.kotlinx.serialization.json)
+}
+
+spotless {
+    kotlin {
+        target("**/*.kt")
+        ktfmt().googleStyle().configure { it.setRemoveUnusedImport(true) }
+    }
+}
diff --git a/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/SuiteBuilder.kt b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/SuiteBuilder.kt
new file mode 100644
index 000000000..e1186c9b4
--- /dev/null
+++ b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/SuiteBuilder.kt
@@ -0,0 +1,63 @@
+package com.xebia.funcional.xef.evaluator
+
+import com.xebia.funcional.xef.evaluator.models.OutputDescription
+import kotlin.jvm.JvmSynthetic
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.encodeToString
+import kotlinx.serialization.json.Json
+
+class SuiteBuilder(private val description: String, private val metric: String) {
+
+  private val outputsDescription: MutableList<String> = mutableListOf()
+
+  private var minimumScore: Double = 0.7
+
+  private val items = mutableListOf<TestSpecItem>()
+
+  operator fun TestSpecItem.unaryPlus() {
+    items.add(this)
+  }
+
+  operator fun OutputDescription.unaryPlus() {
+    outputsDescription.add(this.value)
+  }
+
+  fun build() = TestsSpec(description, metric, outputsDescription, minimumScore, items)
+}
+
+@Serializable
+data class TestsSpec(
+  val description: String,
+  val metric: String,
+  @SerialName("outputs_description") val outputsDescription: List<String>,
+  @SerialName("minimum_score") val minimumScore: Double,
+  val items: List<TestSpecItem>
+) {
+
+  fun toJSON(): String = Json.encodeToString(this)
+
+  companion object {
+    @JvmSynthetic
+    suspend operator fun invoke(
+      description: String,
+      metric: String = "FactualConsistencyMetric",
+      block: suspend SuiteBuilder.() -> Unit
+    ): TestsSpec = SuiteBuilder(description, metric).apply { block() }.build()
+  }
+}
+
+@Serializable
+data class TestSpecItem(
+  val input: String,
+  val context: List<String>,
+  @SerialName("actual_outputs") val outputs: List<String>
+) {
+  companion object {
+    @JvmSynthetic
+    suspend operator fun invoke(
+      input: String,
+      block: suspend TestItemBuilder.() -> Unit
+    ): TestSpecItem = TestItemBuilder(input).apply { block() }.build()
+  }
+}
diff --git a/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/TestItemBuilder.kt b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/TestItemBuilder.kt
new file mode 100644
index 000000000..047386123
--- /dev/null
+++ b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/TestItemBuilder.kt
@@ -0,0 +1,21 @@
+package com.xebia.funcional.xef.evaluator
+
+import com.xebia.funcional.xef.evaluator.models.ContextDescription
+import com.xebia.funcional.xef.evaluator.models.OutputResponse
+
+class TestItemBuilder(val input: String) {
+
+  private val context = mutableListOf<String>()
+
+  private val outputs = mutableListOf<String>()
+
+  operator fun ContextDescription.unaryPlus() {
+    context.add(value)
+  }
+
+  operator fun OutputResponse.unaryPlus() {
+    outputs.add(value)
+  }
+
+  fun build() = TestSpecItem(input, context, outputs)
+}
diff --git a/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/models/TestModels.kt b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/models/TestModels.kt
new file mode 100644
index 000000000..e933d50ae
--- /dev/null
+++ b/evaluator/src/main/kotlin/com/xebia/funcional/xef/evaluator/models/TestModels.kt
@@ -0,0 +1,15 @@
+package com.xebia.funcional.xef.evaluator.models
+
+import kotlin.jvm.JvmSynthetic
+
+data class OutputDescription(val value: String)
+
+data class OutputResponse(val value: String) {
+  companion object {
+    @JvmSynthetic
+    suspend operator fun invoke(block: suspend () -> String): OutputResponse =
+      OutputResponse(block())
+  }
+}
+
+data class ContextDescription(val value: String)
diff --git a/settings.gradle.kts b/settings.gradle.kts
index d985c66d8..97bac7835 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -66,6 +66,12 @@ project(":xef-examples").projectDir = file("examples")
 include("xef-reasoning")
 project(":xef-reasoning").projectDir = file("reasoning")
 
+include("xef-evaluator")
+project(":xef-evaluator").projectDir = file("evaluator")
+
+include("xef-evaluator-example")
+project(":xef-evaluator-example").projectDir = file("evaluator-example")
+
 //<editor-fold desc="Kotlin">
 include("xef-server")
 project(":xef-server").projectDir = file("server")