Skip to content

Commit

Permalink
fix: clean notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
MerlinKallenbornAA committed Nov 28, 2024
1 parent 10afe8b commit 2427592
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 501 deletions.
100 changes: 13 additions & 87 deletions src/documentation/evaluate_with_studio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"import json\n",
"from pathlib import Path\n",
Expand Down Expand Up @@ -61,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -79,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -104,21 +93,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'label': 'Finance',\n",
" 'message': 'I just traveled to Paris for a conference, where can I get the train ride refunded?'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"data[0]"
]
Expand All @@ -145,17 +122,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset ID: 5e3ce59d-f87c-448d-b811-c02ecae1588d\n"
]
}
],
"outputs": [],
"source": [
"all_labels = list(set(item[\"label\"] for item in data))\n",
"dataset = studio_dataset_repository.create_dataset(\n",
Expand Down Expand Up @@ -183,17 +152,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Benchmark ID: ec40c09a-0472-4c70-bd48-7427e4abc87f\n"
]
}
],
"outputs": [],
"source": [
"import random\n",
"import string\n",
Expand Down Expand Up @@ -224,32 +185,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Running Task: 100%|██████████| 24/24 [00:56<00:00, 2.37s/it]\n",
"Evaluating: 24it [00:00, 92691.80it/s]\n",
"Submitting traces to Studio: 100%|██████████| 24/24 [00:01<00:00, 21.70it/s]\n"
]
},
{
"ename": "AttributeError",
"evalue": "'list' object has no attribute 'model_dump_json'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPromptBasedClassify\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mClassify v0.0 with Luminous\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py:161\u001b[0m, in \u001b[0;36mStudioBenchmark.execute\u001b[0;34m(self, task, name, description, labels, metadata)\u001b[0m\n\u001b[1;32m 155\u001b[0m trace_ids\u001b[38;5;241m.\u001b[39mappend(trace_id)\n\u001b[1;32m 157\u001b[0m benchmark_lineages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_benchmark_lineages(\n\u001b[1;32m 158\u001b[0m eval_lineages\u001b[38;5;241m=\u001b[39mevaluation_lineages,\n\u001b[1;32m 159\u001b[0m trace_ids\u001b[38;5;241m=\u001b[39mtrace_ids,\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmit_benchmark_lineages\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_lineages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecution_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbenchmark_execution_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m benchmark_execution_id\n",
"File \u001b[0;32m~/Documents/GitHub/intelligence-layer-sdk/src/intelligence_layer/connectors/studio/studio.py:501\u001b[0m, in \u001b[0;36mStudioClient.submit_benchmark_lineages\u001b[0;34m(self, benchmark_lineages, benchmark_id, execution_id)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msubmit_benchmark_lineages\u001b[39m(\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 489\u001b[0m benchmark_lineages: PostBenchmarkLineagesRequest,\n\u001b[1;32m 490\u001b[0m benchmark_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 491\u001b[0m execution_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 492\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 493\u001b[0m url \u001b[38;5;241m=\u001b[39m urljoin(\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl,\n\u001b[1;32m 495\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/api/projects/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/evaluation/benchmarks/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/executions/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexecution_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/lineages\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 496\u001b[0m )\n\u001b[1;32m 498\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mpost(\n\u001b[1;32m 499\u001b[0m url,\n\u001b[1;32m 500\u001b[0m headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_headers,\n\u001b[0;32m--> 501\u001b[0m data\u001b[38;5;241m=\u001b[39m\u001b[43mbenchmark_lineages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_dump_json\u001b[49m(),\n\u001b[1;32m 502\u001b[0m )\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_for_status(response)\n\u001b[1;32m 505\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(response\u001b[38;5;241m.\u001b[39mjson())\n",
"\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'model_dump_json'"
]
}
],
"outputs": [],
"source": [
"benchmark.execute(PromptBasedClassify(), \"Classify v0.0 with Luminous\")"
]
Expand All @@ -265,19 +203,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'benchmark' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mbenchmark\u001b[49m\u001b[38;5;241m.\u001b[39mexecute(\n\u001b[1;32m 2\u001b[0m PromptBasedClassify(model\u001b[38;5;241m=\u001b[39mLlama3InstructModel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mllama-3.1-8b-instruct\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClassify v0.1 with Llama\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m )\n",
"\u001b[0;31mNameError\u001b[0m: name 'benchmark' is not defined"
]
}
],
"outputs": [],
"source": [
"benchmark.execute(\n",
" PromptBasedClassify(model=Llama3InstructModel(\"llama-3.1-8b-instruct\")),\n",
Expand Down
Loading

0 comments on commit 2427592

Please sign in to comment.