diff --git a/src/documentation/evaluate_with_studio.ipynb b/src/documentation/evaluate_with_studio.ipynb
index 2f5e3495..06261c58 100644
--- a/src/documentation/evaluate_with_studio.ipynb
+++ b/src/documentation/evaluate_with_studio.ipynb
@@ -2,20 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import json\n",
"from pathlib import Path\n",
@@ -61,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -79,7 +68,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -104,21 +93,9 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'label': 'Finance',\n",
- " 'message': 'I just traveled to Paris for a conference, where can I get the train ride refunded?'}"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data[0]"
]
@@ -145,17 +122,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Dataset ID: 5e3ce59d-f87c-448d-b811-c02ecae1588d\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"all_labels = list(set(item[\"label\"] for item in data))\n",
"dataset = studio_dataset_repository.create_dataset(\n",
@@ -183,17 +152,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Benchmark ID: ec40c09a-0472-4c70-bd48-7427e4abc87f\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import random\n",
"import string\n",
@@ -224,32 +185,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Running Task: 100%|██████████| 24/24 [00:56<00:00, 2.37s/it]\n",
- "Evaluating: 24it [00:00, 92691.80it/s]\n",
- "Submitting traces to Studio: 100%|██████████| 24/24 [00:01<00:00, 21.70it/s]\n"
- ]
- },
- {
- "ename": "AttributeError",
- "evalue": "'list' object has no attribute 'model_dump_json'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPromptBasedClassify\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mClassify v0.0 with Luminous\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py:161\u001b[0m, in \u001b[0;36mStudioBenchmark.execute\u001b[0;34m(self, task, name, description, labels, metadata)\u001b[0m\n\u001b[1;32m 155\u001b[0m trace_ids\u001b[38;5;241m.\u001b[39mappend(trace_id)\n\u001b[1;32m 157\u001b[0m benchmark_lineages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_benchmark_lineages(\n\u001b[1;32m 158\u001b[0m eval_lineages\u001b[38;5;241m=\u001b[39mevaluation_lineages,\n\u001b[1;32m 159\u001b[0m trace_ids\u001b[38;5;241m=\u001b[39mtrace_ids,\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmit_benchmark_lineages\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_execution_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m benchmark_execution_id\n",
- "File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/connectors/studio/studio.py:501\u001b[0m, in \u001b[0;36mStudioClient.submit_benchmark_lineages\u001b[0;34m(self, benchmark_lineages, benchmark_id, execution_id)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msubmit_benchmark_lineages\u001b[39m(\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 489\u001b[0m benchmark_lineages: PostBenchmarkLineagesRequest,\n\u001b[1;32m 490\u001b[0m benchmark_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 491\u001b[0m execution_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 492\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 493\u001b[0m url \u001b[38;5;241m=\u001b[39m urljoin(\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl,\n\u001b[1;32m 495\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/api/projects/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/evaluation/benchmarks/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/executions/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexecution_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/lineages\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 496\u001b[0m )\n\u001b[1;32m 498\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\n\u001b[1;32m 499\u001b[0m url,\n\u001b[1;32m 500\u001b[0m headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_headers,\n\u001b[0;32m--> 501\u001b[0m data\u001b[38;5;241m=\u001b[39m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_dump_json\u001b[49m(),\n\u001b[1;32m 502\u001b[0m )\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_for_status(response)\n\u001b[1;32m 505\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(response\u001b[38;5;241m.\u001b[39mjson())\n",
- "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'model_dump_json'"
- ]
- }
- ],
+ "outputs": [],
"source": [
"benchmark.execute(PromptBasedClassify(), \"Classify v0.0 with Luminous\")"
]
@@ -265,19 +203,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "ename": "NameError",
- "evalue": "name 'benchmark' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241m.\u001b[39mexecute(\n\u001b[1;32m 2\u001b[0m PromptBasedClassify(model\u001b[38;5;241m=\u001b[39mLlama3InstructModel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mllama-3.1-8b-instruct\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClassify v0.1 with Llama\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m )\n",
- "\u001b[0;31mNameError\u001b[0m: name 'benchmark' is not defined"
- ]
- }
- ],
+ "outputs": [],
"source": [
"benchmark.execute(\n",
" PromptBasedClassify(model=Llama3InstructModel(\"llama-3.1-8b-instruct\")),\n",
diff --git a/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb b/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb
index 1562abd6..e212fa24 100644
--- a/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb
+++ b/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb
@@ -2,20 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Running Task: 100%|██████████| 2/2 [00:00<00:00, 5866.16it/s]\n",
- "Running Task: 100%|██████████| 2/2 [00:00<00:00, 6533.18it/s]\n",
- "Evaluating: 2it [00:00, 73584.28it/s]\n",
- "Evaluating: 2it [00:00, 66576.25it/s]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from example_data import DummyEvaluation, example_data\n",
"\n",
@@ -43,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -81,207 +70,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " input | \n",
- " expected_output | \n",
- " metadata | \n",
- " data | \n",
- " output | \n",
- " result | \n",
- " tracer | \n",
- " lineage | \n",
- "
\n",
- " \n",
- " example_id | \n",
- " evaluation_id | \n",
- " run_id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 42738278-d0c7-4be1-a004-5a2c6c2f87dd | \n",
- " 22060193-9854-40dd-b368-507ace613fc8 | \n",
- " 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 | \n",
- " input1 | \n",
- " expected_output1 | \n",
- " None | \n",
- " data1 | \n",
- " input1 -> output | \n",
- " eval='input1, expected_output1, (input1 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f | \n",
- " input1 | \n",
- " expected_output1 | \n",
- " None | \n",
- " data1 | \n",
- " input1 -> output | \n",
- " eval='input1, expected_output1, (input1 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " e02b58a2-a1c1-49d3-99db-c13cc79a2359 | \n",
- " 22060193-9854-40dd-b368-507ace613fc8 | \n",
- " 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 | \n",
- " input0 | \n",
- " expected_output0 | \n",
- " None | \n",
- " data0 | \n",
- " input0 -> output | \n",
- " eval='input0, expected_output0, (input0 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f | \n",
- " input0 | \n",
- " expected_output0 | \n",
- " None | \n",
- " data0 | \n",
- " input0 -> output | \n",
- " eval='input0, expected_output0, (input0 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " input \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 \n",
- "\n",
- " expected_output \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output0 \n",
- "\n",
- " metadata \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n",
- "\n",
- " data \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data0 \n",
- "\n",
- " output \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 -> output \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 -> output \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 -> output \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 -> output \n",
- "\n",
- " result \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input1, expected_output1, (input1 -> out... \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input1, expected_output1, (input1 -> out... \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input0, expected_output0, (input0 -> out... \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input0, expected_output0, (input0 -> out... \n",
- "\n",
- " tracer \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 Run Lineage\n",
- "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n",
- "│ ├── Input\n",
- "│ │ └── input0\n",
- "│ └── Expected Output\n",
- "│ └── expected_output0\n",
- "├── Outputs\n",
- "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n",
- "│ │ └── input0 -> output\n",
- "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n",
- "│ └── input0 -> output\n",
- "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n",
- " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n",
- "\n"
- ],
- "text/plain": [
- "Run Lineage\n",
- "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n",
- "│ ├── Input\n",
- "│ │ └── input0\n",
- "│ └── Expected Output\n",
- "│ └── expected_output0\n",
- "├── Outputs\n",
- "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n",
- "│ │ └── input0 -> output\n",
- "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n",
- "│ └── input0 -> output\n",
- "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n",
- " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Step 1 - retrieve linked data across all repositories\n",
"\n",
@@ -313,207 +104,9 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " input | \n",
- " expected_output | \n",
- " metadata | \n",
- " data | \n",
- " output | \n",
- " result | \n",
- " tracer | \n",
- " lineage | \n",
- "
\n",
- " \n",
- " example_id | \n",
- " evaluation_id | \n",
- " run_id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 42738278-d0c7-4be1-a004-5a2c6c2f87dd | \n",
- " 22060193-9854-40dd-b368-507ace613fc8 | \n",
- " 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 | \n",
- " input1 | \n",
- " expected_output1 | \n",
- " None | \n",
- " data1 | \n",
- " input1 -> output | \n",
- " eval='input1, expected_output1, (input1 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f | \n",
- " input1 | \n",
- " expected_output1 | \n",
- " None | \n",
- " data1 | \n",
- " input1 -> output | \n",
- " eval='input1, expected_output1, (input1 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " e02b58a2-a1c1-49d3-99db-c13cc79a2359 | \n",
- " 22060193-9854-40dd-b368-507ace613fc8 | \n",
- " 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 | \n",
- " input0 | \n",
- " expected_output0 | \n",
- " None | \n",
- " data0 | \n",
- " input0 -> output | \n",
- " eval='input0, expected_output0, (input0 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f | \n",
- " input0 | \n",
- " expected_output0 | \n",
- " None | \n",
- " data0 | \n",
- " input0 -> output | \n",
- " eval='input0, expected_output0, (input0 -> out... | \n",
- " <intelligence_layer.core.tracer.in_memory_trac... | \n",
- " <intelligence_layer.evaluation.infrastructure.... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " input \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 \n",
- "\n",
- " expected_output \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output0 \n",
- "\n",
- " metadata \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n",
- "\n",
- " data \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data1 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data1 \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data0 \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data0 \n",
- "\n",
- " output \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 -> output \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 -> output \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 -> output \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 -> output \n",
- "\n",
- " result \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input1, expected_output1, (input1 -> out... \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input1, expected_output1, (input1 -> out... \n",
- "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input0, expected_output0, (input0 -> out... \n",
- " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input0, expected_output0, (input0 -> out... \n",
- "\n",
- " tracer \\\n",
- "example_id evaluation_id run_id \n",
- "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 Run Lineage\n",
- "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n",
- "│ ├── Input\n",
- "│ │ └── input0\n",
- "│ └── Expected Output\n",
- "│ └── expected_output0\n",
- "├── Outputs\n",
- "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n",
- "│ │ └── input0 -> output\n",
- "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n",
- "│ └── input0 -> output\n",
- "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n",
- " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n",
- "\n"
- ],
- "text/plain": [
- "Run Lineage\n",
- "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n",
- "│ ├── Input\n",
- "│ │ └── input0\n",
- "│ └── Expected Output\n",
- "│ └── expected_output0\n",
- "├── Outputs\n",
- "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n",
- "│ │ └── input0 -> output\n",
- "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n",
- "│ └── input0 -> output\n",
- "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n",
- " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Step 1 - retrieve linked data via the `Runner`/`Evaluator`\n",
"my_evaluator = example_data.evaluator\n",