From 2427592331bb3061db057a7953cc60ac33c0abd6 Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Thu, 28 Nov 2024 17:46:56 +0100 Subject: [PATCH] fix: clean notebooks --- src/documentation/evaluate_with_studio.ipynb | 100 +---- .../how_to_retrieve_data_for_analysis.ipynb | 421 +----------------- 2 files changed, 20 insertions(+), 501 deletions(-) diff --git a/src/documentation/evaluate_with_studio.ipynb b/src/documentation/evaluate_with_studio.ipynb index 2f5e3495..06261c58 100644 --- a/src/documentation/evaluate_with_studio.ipynb +++ b/src/documentation/evaluate_with_studio.ipynb @@ -2,20 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import json\n", "from pathlib import Path\n", @@ -61,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -104,21 +93,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'label': 'Finance',\n", - " 'message': 'I just traveled to Paris for a conference, where can I get the train ride refunded?'}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data[0]" ] @@ -145,17 +122,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset ID: 5e3ce59d-f87c-448d-b811-c02ecae1588d\n" - ] - } - ], + "outputs": [], "source": [ "all_labels = list(set(item[\"label\"] for item in data))\n", "dataset = studio_dataset_repository.create_dataset(\n", @@ -183,17 +152,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Benchmark ID: ec40c09a-0472-4c70-bd48-7427e4abc87f\n" - ] - } - ], + "outputs": [], "source": [ "import random\n", "import string\n", @@ -224,32 +185,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running Task: 100%|██████████| 24/24 [00:56<00:00, 2.37s/it]\n", - "Evaluating: 24it [00:00, 92691.80it/s]\n", - "Submitting traces to Studio: 100%|██████████| 24/24 [00:01<00:00, 21.70it/s]\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "'list' object has no attribute 'model_dump_json'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPromptBasedClassify\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mClassify v0.0 with Luminous\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py:161\u001b[0m, in \u001b[0;36mStudioBenchmark.execute\u001b[0;34m(self, task, name, description, labels, metadata)\u001b[0m\n\u001b[1;32m 155\u001b[0m trace_ids\u001b[38;5;241m.\u001b[39mappend(trace_id)\n\u001b[1;32m 157\u001b[0m benchmark_lineages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_benchmark_lineages(\n\u001b[1;32m 158\u001b[0m eval_lineages\u001b[38;5;241m=\u001b[39mevaluation_lineages,\n\u001b[1;32m 159\u001b[0m trace_ids\u001b[38;5;241m=\u001b[39mtrace_ids,\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmit_benchmark_lineages\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_execution_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m benchmark_execution_id\n", - "File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/connectors/studio/studio.py:501\u001b[0m, in \u001b[0;36mStudioClient.submit_benchmark_lineages\u001b[0;34m(self, benchmark_lineages, benchmark_id, execution_id)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msubmit_benchmark_lineages\u001b[39m(\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 489\u001b[0m benchmark_lineages: PostBenchmarkLineagesRequest,\n\u001b[1;32m 490\u001b[0m benchmark_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 491\u001b[0m execution_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 492\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 493\u001b[0m url \u001b[38;5;241m=\u001b[39m urljoin(\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl,\n\u001b[1;32m 495\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/api/projects/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/evaluation/benchmarks/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/executions/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexecution_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/lineages\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 496\u001b[0m )\n\u001b[1;32m 498\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\n\u001b[1;32m 499\u001b[0m url,\n\u001b[1;32m 500\u001b[0m headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_headers,\n\u001b[0;32m--> 501\u001b[0m data\u001b[38;5;241m=\u001b[39m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_dump_json\u001b[49m(),\n\u001b[1;32m 502\u001b[0m )\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_for_status(response)\n\u001b[1;32m 505\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(response\u001b[38;5;241m.\u001b[39mjson())\n", - "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'model_dump_json'" - ] - } - ], + "outputs": [], "source": [ "benchmark.execute(PromptBasedClassify(), \"Classify v0.0 with Luminous\")" ] @@ -265,19 +203,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'benchmark' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241m.\u001b[39mexecute(\n\u001b[1;32m 2\u001b[0m PromptBasedClassify(model\u001b[38;5;241m=\u001b[39mLlama3InstructModel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mllama-3.1-8b-instruct\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClassify v0.1 with Llama\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m )\n", - "\u001b[0;31mNameError\u001b[0m: name 'benchmark' is not defined" - ] - } - ], + "outputs": [], "source": [ "benchmark.execute(\n", " PromptBasedClassify(model=Llama3InstructModel(\"llama-3.1-8b-instruct\")),\n", diff --git a/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb b/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb index 1562abd6..e212fa24 100644 --- a/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb +++ b/src/documentation/how_tos/how_to_retrieve_data_for_analysis.ipynb @@ -2,20 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running Task: 100%|██████████| 2/2 [00:00<00:00, 5866.16it/s]\n", - "Running Task: 100%|██████████| 2/2 [00:00<00:00, 6533.18it/s]\n", - "Evaluating: 2it [00:00, 73584.28it/s]\n", - "Evaluating: 2it [00:00, 66576.25it/s]\n" - ] - } - ], + "outputs": [], "source": [ "from example_data import DummyEvaluation, example_data\n", "\n", @@ -43,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -81,207 +70,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
inputexpected_outputmetadatadataoutputresulttracerlineage
example_idevaluation_idrun_id
42738278-d0c7-4be1-a004-5a2c6c2f87dd22060193-9854-40dd-b368-507ace613fc880dd94d6-a188-4e34-8d80-f2ed56ac3e77input1expected_output1Nonedata1input1 -> outputeval='input1, expected_output1, (input1 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
2c9fe8be-db65-4589-9b28-7aba7ab74f0finput1expected_output1Nonedata1input1 -> outputeval='input1, expected_output1, (input1 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
e02b58a2-a1c1-49d3-99db-c13cc79a235922060193-9854-40dd-b368-507ace613fc880dd94d6-a188-4e34-8d80-f2ed56ac3e77input0expected_output0Nonedata0input0 -> outputeval='input0, expected_output0, (input0 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
2c9fe8be-db65-4589-9b28-7aba7ab74f0finput0expected_output0Nonedata0input0 -> outputeval='input0, expected_output0, (input0 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
\n", - "
" - ], - "text/plain": [ - " input \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 \n", - "\n", - " expected_output \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output0 \n", - "\n", - " metadata \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n", - "\n", - " data \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data0 \n", - "\n", - " output \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 -> output \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 -> output \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 -> output \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 -> output \n", - "\n", - " result \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input1, expected_output1, (input1 -> out... \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input1, expected_output1, (input1 -> out... \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input0, expected_output0, (input0 -> out... \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input0, expected_output0, (input0 -> out... \n", - "\n", - " tracer \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 Run Lineage\n", - "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n", - "│ ├── Input\n", - "│ │ └── input0\n", - "│ └── Expected Output\n", - "│ └── expected_output0\n", - "├── Outputs\n", - "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n", - "│ │ └── input0 -> output\n", - "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n", - "│ └── input0 -> output\n", - "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n", - " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n", - "\n" - ], - "text/plain": [ - "Run Lineage\n", - "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n", - "│ ├── Input\n", - "│ │ └── input0\n", - "│ └── Expected Output\n", - "│ └── expected_output0\n", - "├── Outputs\n", - "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n", - "│ │ └── input0 -> output\n", - "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n", - "│ └── input0 -> output\n", - "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n", - " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Step 1 - retrieve linked data across all repositories\n", "\n", @@ -313,207 +104,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
inputexpected_outputmetadatadataoutputresulttracerlineage
example_idevaluation_idrun_id
42738278-d0c7-4be1-a004-5a2c6c2f87dd22060193-9854-40dd-b368-507ace613fc880dd94d6-a188-4e34-8d80-f2ed56ac3e77input1expected_output1Nonedata1input1 -> outputeval='input1, expected_output1, (input1 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
2c9fe8be-db65-4589-9b28-7aba7ab74f0finput1expected_output1Nonedata1input1 -> outputeval='input1, expected_output1, (input1 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
e02b58a2-a1c1-49d3-99db-c13cc79a235922060193-9854-40dd-b368-507ace613fc880dd94d6-a188-4e34-8d80-f2ed56ac3e77input0expected_output0Nonedata0input0 -> outputeval='input0, expected_output0, (input0 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
2c9fe8be-db65-4589-9b28-7aba7ab74f0finput0expected_output0Nonedata0input0 -> outputeval='input0, expected_output0, (input0 -> out...<intelligence_layer.core.tracer.in_memory_trac...<intelligence_layer.evaluation.infrastructure....
\n", - "
" - ], - "text/plain": [ - " input \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 \n", - "\n", - " expected_output \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 expected_output0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f expected_output0 \n", - "\n", - " metadata \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 None \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f None \n", - "\n", - " data \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data1 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data1 \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 data0 \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f data0 \n", - "\n", - " output \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input1 -> output \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input1 -> output \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 input0 -> output \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f input0 -> output \n", - "\n", - " result \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input1, expected_output1, (input1 -> out... \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input1, expected_output1, (input1 -> out... \n", - "e02b58a2-a1c1-49d3-99db-c13cc79a2359 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 eval='input0, expected_output0, (input0 -> out... \n", - " 2c9fe8be-db65-4589-9b28-7aba7ab74f0f eval='input0, expected_output0, (input0 -> out... \n", - "\n", - " tracer \\\n", - "example_id evaluation_id run_id \n", - "42738278-d0c7-4be1-a004-5a2c6c2f87dd 22060193-9854-40dd-b368-507ace613fc8 80dd94d6-a188-4e34-8d80-f2ed56ac3e77 Run Lineage\n", - "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n", - "│ ├── Input\n", - "│ │ └── input0\n", - "│ └── Expected Output\n", - "│ └── expected_output0\n", - "├── Outputs\n", - "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n", - "│ │ └── input0 -> output\n", - "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n", - "│ └── input0 -> output\n", - "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n", - " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n", - "\n" - ], - "text/plain": [ - "Run Lineage\n", - "├── Example: e02b58a2-a1c1-49d3-99db-c13cc79a2359\n", - "│ ├── Input\n", - "│ │ └── input0\n", - "│ └── Expected Output\n", - "│ └── expected_output0\n", - "├── Outputs\n", - "│ ├── Output: 80dd94d6-a188-4e34-8d80-f2ed56ac3e77\n", - "│ │ └── input0 -> output\n", - "│ └── Output: 2c9fe8be-db65-4589-9b28-7aba7ab74f0f\n", - "│ └── input0 -> output\n", - "└── Evaluation: 22060193-9854-40dd-b368-507ace613fc8\n", - " └── eval='input0, expected_output0, (input0 -> output, input0 -> output) -> evaluation'\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Step 1 - retrieve linked data via the `Runner`/`Evaluator`\n", "my_evaluator = example_data.evaluator\n",