-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add examples for image-to-text task (#2754)
* add blip2 examples * fix formatting * address pr comments * . * fix formatting * use blip model * use blip for cli examples * format
- Loading branch information
1 parent
1e45121
commit 78b637d
Showing
7 changed files
with
1,187 additions
and
0 deletions.
There are no files selected for viewing
9 changes: 9 additions & 0 deletions
9
cli/foundation-models/system/inference/image-to-text/deploy-batch.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json | ||
name: demo | ||
description: "Batch endpoint for image-to-text task" | ||
type: model | ||
resources: | ||
instance_count: 1 | ||
settings: | ||
mini_batch_size: 1 | ||
|
12 changes: 12 additions & 0 deletions
12
cli/foundation-models/system/inference/image-to-text/deploy-online.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | ||
name: image-to-text-demo | ||
instance_type: Standard_DS5_v2 | ||
instance_count: 1 | ||
liveness_probe: | ||
initial_delay: 180 | ||
period: 180 | ||
failure_threshold: 49 | ||
timeout: 299 | ||
request_settings: | ||
request_timeout_ms: 90000 | ||
|
111 changes: 111 additions & 0 deletions
111
cli/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
set -x | ||
# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-blip-image-to-text | ||
# The sample scoring file available in the same folder as the above notebook. | ||
|
||
# script inputs | ||
registry_name="azureml" | ||
subscription_id="<SUBSCRIPTION_ID>" | ||
resource_group_name="<RESOURCE_GROUP>" | ||
workspace_name="<WORKSPACE_NAME>" | ||
|
||
# This is the model from system registry that needs to be deployed | ||
model_name="Salesforce-BLIP-image-captioning-base" | ||
model_label="latest" | ||
|
||
deployment_compute="cpu-cluster" | ||
# todo: fetch deployment_sku from the min_inference_sku tag of the model | ||
deployment_sku="Standard_DS5_v2" | ||
|
||
|
||
version=$(date +%s) | ||
endpoint_name="image-to-text-$version" | ||
deployment_name="demo-$version" | ||
|
||
# Prepare data for deployment | ||
data_path="data_batch" | ||
python ./prepare_data.py --mode "batch" --data_path $data_path | ||
# sample request data in folder of csv files with image and text columns | ||
sample_request_csv_folder="./data_batch/odfridgeObjects/batch" | ||
|
||
# 1. Setup pre-requisites | ||
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \ | ||
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \ | ||
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then | ||
echo "Please update the script with the subscription_id, resource_group_name and workspace_name" | ||
exit 1 | ||
fi | ||
|
||
az account set -s $subscription_id | ||
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" | ||
|
||
# 2. Check if the model exists in the registry | ||
# Need to confirm model show command works for registries outside the tenant (aka system registry) | ||
if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name | ||
then | ||
echo "Model $model_name:$model_label does not exist in registry $registry_name" | ||
exit 1 | ||
fi | ||
|
||
# Get the latest model version | ||
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) | ||
|
||
# 3. Check if compute $deployment_compute exists, else create it | ||
if az ml compute show --name $deployment_compute $workspace_info | ||
then | ||
echo "Compute cluster $deployment_compute already exists" | ||
else | ||
echo "Creating compute cluster $deployment_compute" | ||
az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || { | ||
echo "Failed to create compute cluster $deployment_compute" | ||
exit 1 | ||
} | ||
fi | ||
|
||
# 4. Deploy the model to an endpoint | ||
# Create batch endpoint | ||
az ml batch-endpoint create --name $endpoint_name $workspace_info || { | ||
echo "endpoint create failed"; exit 1; | ||
} | ||
|
||
# Deploy model from registry to endpoint in workspace | ||
az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \ | ||
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ | ||
compute=$deployment_compute \ | ||
name=$deployment_name || { | ||
echo "deployment create failed"; exit 1; | ||
} | ||
|
||
# 5 Try a scoring request with csv file | ||
|
||
# Check if scoring data file exists | ||
if [ -d $sample_request_csv_folder ]; then | ||
echo "Invoking endpoint $endpoint_name with following input:\n\n" | ||
echo "\n\n" | ||
else | ||
echo "Scoring file $sample_request_csv_folder does not exist" | ||
exit 1 | ||
fi | ||
|
||
# Invoke the endpoint | ||
# Note: If job failed with Out of Memory Error then | ||
# please try splitting your input into smaller csv files or | ||
# decrease the mini_batch_size for the deployment (see deploy-batch.yaml). | ||
csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ | ||
--deployment-name $deployment_name --input $sample_request_csv_folder --input-type \ | ||
uri_folder $workspace_info --query name --output tsv) || { | ||
echo "endpoint invoke failed"; exit 1; | ||
} | ||
|
||
# wait for the job to complete | ||
az ml job stream --name $csv_inference_job $workspace_info || { | ||
echo "job stream failed"; exit 1; | ||
} | ||
|
||
# 6. Delete the endpoint | ||
# Batch endpoints use compute resources only when jobs are submitted. You can keep the | ||
# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. | ||
# If you created your compute cluster to have zero minimum instances and scale down soon after being idle, | ||
# you won't be charged for an unused compute. | ||
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || { | ||
echo "endpoint delete failed"; exit 1; | ||
} |
80 changes: 80 additions & 0 deletions
80
cli/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
set -x | ||
# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-blip-image-to-text | ||
# The sample scoring file available in the same folder as the above notebook | ||
|
||
# script inputs | ||
registry_name="azureml" | ||
subscription_id="<SUBSCRIPTION_ID>" | ||
resource_group_name="<RESOURCE_GROUP>" | ||
workspace_name="<WORKSPACE_NAME>" | ||
|
||
# This is the model from system registry that needs to be deployed | ||
model_name="Salesforce-BLIP-image-captioning-base" | ||
model_label="latest" | ||
|
||
version=$(date +%s) | ||
endpoint_name="image-to-text-$version" | ||
|
||
# Todo: fetch deployment_sku from the min_inference_sku tag of the model | ||
deployment_sku="Standard_DS5_v2" | ||
|
||
# Prepare data for deployment | ||
data_path="./data_online" | ||
python ./prepare_data.py --data_path $data_path --mode "online" | ||
# sample_request_data | ||
sample_request_data="$data_path/odfridgeObjects/sample_request_data.json" | ||
# 1. Setup pre-requisites | ||
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \ | ||
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \ | ||
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then | ||
echo "Please update the script with the subscription_id, resource_group_name and workspace_name" | ||
exit 1 | ||
fi | ||
|
||
az account set -s $subscription_id | ||
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" | ||
|
||
# 2. Check if the model exists in the registry | ||
# Need to confirm model show command works for registries outside the tenant (aka system registry) | ||
if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name | ||
then | ||
echo "Model $model_name:$model_label does not exist in registry $registry_name" | ||
exit 1 | ||
fi | ||
|
||
# Get the latest model version | ||
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) | ||
|
||
# 3. Deploy the model to an endpoint | ||
# Create online endpoint | ||
az ml online-endpoint create --name $endpoint_name $workspace_info || { | ||
echo "endpoint create failed"; exit 1; | ||
} | ||
|
||
# Deploy model from registry to endpoint in workspace | ||
az ml online-deployment create --file deploy-online.yaml $workspace_info --all-traffic --set \ | ||
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ | ||
instance_type=$deployment_sku || { | ||
echo "deployment create failed"; exit 1; | ||
} | ||
|
||
# 4. Try a sample scoring request | ||
|
||
# Check if scoring data file exists | ||
if [ -f $sample_request_data ]; then | ||
echo "Invoking endpoint $endpoint_name with $sample_request_data\n\n" | ||
else | ||
echo "Scoring file $sample_request_data does not exist" | ||
exit 1 | ||
fi | ||
|
||
az ml online-endpoint invoke --name $endpoint_name --request-file $sample_request_data $workspace_info || { | ||
echo "endpoint invoke failed"; exit 1; | ||
} | ||
|
||
# 6. Delete the endpoint and sample_request_data.json | ||
az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { | ||
echo "endpoint delete failed"; exit 1; | ||
} | ||
|
||
rm $sample_request_data |
146 changes: 146 additions & 0 deletions
146
cli/foundation-models/system/inference/image-to-text/prepare_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import argparse | ||
import base64 | ||
import json | ||
import os | ||
import shutil | ||
import urllib.request | ||
import pandas as pd | ||
from zipfile import ZipFile | ||
|
||
|
||
def download_and_unzip(dataset_parent_dir: str) -> None: | ||
"""Download image dataset and unzip it. | ||
:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded | ||
:type dataset_parent_dir: str | ||
""" | ||
# Create directory, if it does not exist | ||
os.makedirs(dataset_parent_dir, exist_ok=True) | ||
|
||
# download data | ||
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip" | ||
print(f"Downloading data from {download_url}") | ||
|
||
# Extract current dataset name from dataset url | ||
dataset_name = os.path.basename(download_url).split(".")[0] | ||
# Get dataset path for later use | ||
dataset_dir = os.path.join(dataset_parent_dir, dataset_name) | ||
|
||
if os.path.exists(dataset_dir): | ||
shutil.rmtree(dataset_dir) | ||
|
||
# Get the name of zip file | ||
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") | ||
|
||
# Download data from public url | ||
urllib.request.urlretrieve(download_url, filename=data_file) | ||
|
||
# extract files | ||
with ZipFile(data_file, "r") as zip: | ||
print("extracting files...") | ||
zip.extractall(path=dataset_parent_dir) | ||
print("done") | ||
# delete zip file | ||
os.remove(data_file) | ||
return dataset_dir | ||
|
||
|
||
def read_image(image_path: str) -> bytes: | ||
"""Read image from path. | ||
:param image_path: image path | ||
:type image_path: str | ||
:return: image in bytes format | ||
:rtype: bytes | ||
""" | ||
with open(image_path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
def prepare_data_for_online_inference(dataset_dir: str) -> None: | ||
"""Prepare request json for online inference. | ||
:param dataset_dir: dataset directory | ||
:type dataset_dir: str | ||
""" | ||
sample_image_1 = os.path.join(dataset_dir, "images", "99.jpg") | ||
sample_image_2 = os.path.join(dataset_dir, "images", "1.jpg") | ||
|
||
request_json = { | ||
"input_data": { | ||
"columns": ["image"], | ||
"index": [0, 1], | ||
"data": [ | ||
[base64.encodebytes(read_image(sample_image_1)).decode("utf-8")], | ||
[base64.encodebytes(read_image(sample_image_2)).decode("utf-8")], | ||
], | ||
} | ||
} | ||
|
||
request_file_name = os.path.join(dataset_dir, "sample_request_data.json") | ||
|
||
with open(request_file_name, "w") as request_file: | ||
json.dump(request_json, request_file) | ||
|
||
|
||
def prepare_data_for_batch_inference(dataset_dir: str) -> None: | ||
"""Prepare image folder and csv file for batch inference. | ||
This function will create a folder of csv files with images in base64 format. | ||
:param dataset_dir: dataset directory | ||
:type dataset_dir: str | ||
""" | ||
|
||
csv_folder_path = os.path.join(dataset_dir, "batch") | ||
os.makedirs(csv_folder_path, exist_ok=True) | ||
batch_input_file = "batch_input.csv" | ||
dataset_dir = os.path.join(dataset_dir, "images") | ||
|
||
image_list = [] | ||
for image_name in os.listdir(dataset_dir): | ||
image = read_image(os.path.join(dataset_dir, image_name)) | ||
data = base64.encodebytes(image).decode("utf-8") | ||
image_list.append(data) | ||
|
||
# Divide the image list into files of 10 rows each | ||
batch_size_per_predict = 10 | ||
divided_list = [ | ||
image_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict] | ||
for i in range( | ||
(len(image_list) + batch_size_per_predict - 1) // batch_size_per_predict | ||
) | ||
] | ||
|
||
list_num = 0 | ||
for l in divided_list: | ||
batch_df = pd.DataFrame(l, columns=["image"]) | ||
filepath = os.path.join(csv_folder_path, str(list_num) + batch_input_file) | ||
list_num = list_num + 1 | ||
batch_df.to_csv(filepath) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Prepare data for image-to-text task") | ||
parser.add_argument( | ||
"--data_path", type=str, default="data", help="Dataset location" | ||
) | ||
parser.add_argument( | ||
"--mode", | ||
type=str, | ||
default="online", | ||
help="prepare data for online or batch inference", | ||
) | ||
|
||
args, unknown = parser.parse_known_args() | ||
args_dict = vars(args) | ||
|
||
dataset_dir = download_and_unzip( | ||
dataset_parent_dir=os.path.join( | ||
os.path.dirname(os.path.realpath(__file__)), args.data_path | ||
), | ||
) | ||
|
||
if args.mode == "online": | ||
prepare_data_for_online_inference(dataset_dir=dataset_dir) | ||
else: | ||
prepare_data_for_batch_inference(dataset_dir=dataset_dir) |
Oops, something went wrong.