Skip to content

Commit

Permalink
Merge pull request #241 from Renumics/story/fine_tune
Browse files Browse the repository at this point in the history
Story/fine tune
  • Loading branch information
neindochoh authored Sep 21, 2023
2 parents 69fa300 + ef13acd commit e78a0ee
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 1 deletion.
116 changes: 116 additions & 0 deletions playbook/stories/fine_tune_img.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Easily fine-tune a ViT with images from Bing search"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Use the sliceguard library and Spotlight to fine-tune a ViT model for image classification and detect problematic clusters in a dataset created from Bing search in a few lines of code."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The Imports\n",
"from renumics import spotlight\n",
"from sliceguard.data import create_imagedataset_from_bing\n",
"from sliceguard.models.huggingface import (\n",
" finetune_image_classifier,\n",
" generate_image_pred_probs_embeddings,\n",
")\n",
"from sliceguard.embeddings import generate_image_embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create an Image Dataset from Bing\n",
"class_names = [\n",
" \"Blue Tang\",\n",
" \"Clownfish\",\n",
" \"Spotted Eagle Ray\",\n",
" \"Longnose Butterfly Fish\",\n",
" \"Moorish Idol\",\n",
" \"Royal Gramma Fish\",\n",
"]\n",
"df = create_imagedataset_from_bing(\n",
" class_names, 25, \"data\", test_split=0.2, license=\"Free to share and use\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fine-tune a ViT Model with the data (in 1-2 minutes on a GPU)\n",
"finetune_image_classifier(\n",
" df[df[\"split\"] == \"train\"],\n",
" model_name=\"google/vit-base-patch16-224-in21k\",\n",
" output_model_folder=\"./model_folder\",\n",
" epochs=15,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Enrich the DataFrame with Predictions, Probabilities and Embeddings\n",
"df[\"prediction\"], df[\"probs\"], df[\"embeddings\"] = generate_image_pred_probs_embeddings(\n",
" df[\"image\"].values, model_name=\"./model_folder\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check the result and detect problematic clusters\n",
"spotlight.show(\n",
" df, layout=\"https://spotlight.renumics.com/resources/image_classification_v1.0.json\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion playbook/stories/navigating_data_issues.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"metadata": {},
"source": [
"## Navigating Data Issues\n",
"This notebook is part of an article not published, yet. URL tbd"
"This notebook is part of an [article at medium.](https://medium.com/@markus.stoll/navigate-data-issues-d05ae8c45841)\n"
]
},
{
Expand Down

0 comments on commit e78a0ee

Please sign in to comment.