-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add blip2 examples * fix formatting * address pr comments * . * fix formatting * use blip model * use blip for cli examples * add examples for vqa * . * fix inference data * address pr comments * . * .
- Loading branch information
1 parent
9c762dd
commit cc86c0a
Showing
9 changed files
with
1,285 additions
and
0 deletions.
There are no files selected for viewing
9 changes: 9 additions & 0 deletions
9
cli/foundation-models/system/inference/visual-question-answering/deploy-batch.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json | ||
name: demo | ||
description: "Batch endpoint for visual-question-answering task" | ||
type: model | ||
resources: | ||
instance_count: 1 | ||
settings: | ||
mini_batch_size: 1 | ||
|
12 changes: 12 additions & 0 deletions
12
cli/foundation-models/system/inference/visual-question-answering/deploy-online.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | ||
name: vqa-demo | ||
instance_type: Standard_DS5_v2 | ||
instance_count: 1 | ||
liveness_probe: | ||
initial_delay: 180 | ||
period: 180 | ||
failure_threshold: 49 | ||
timeout: 299 | ||
request_settings: | ||
request_timeout_ms: 90000 | ||
|
20 changes: 20 additions & 0 deletions
20
cli/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
What is the brand of the half and half in the carton? | ||
What is the color of the bottle of yogurt? | ||
What is the color of the can of soda on the table? | ||
What is the name of the bottled water brand in the image? | ||
Is the bottle tilted? | ||
Is the object tilted? | ||
Is the bottle tilted? | ||
Is the can tilted? | ||
What is the difference between the two beverages in terms of their contents and purpose? | ||
What is the difference between the two beverages on the table? | ||
What are the beverages in the image? | ||
What is the difference between the two beverages, one being a milk drink and the other being a juice drink? | ||
What is the brand of the half and half that is sitting next to the can of soda? | ||
What color is the can of soda? | ||
What are the drinks on the table? | ||
Is there a soda on the table? | ||
How many objects are in the image? | ||
Describe the objects in the image? | ||
What objects are in the image? | ||
How many objects are in the image? |
171 changes: 171 additions & 0 deletions
171
cli/foundation-models/system/inference/visual-question-answering/prepare_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import argparse | ||
import base64 | ||
import json | ||
import os | ||
import shutil | ||
import urllib.request | ||
import pandas as pd | ||
from zipfile import ZipFile | ||
|
||
|
||
def download_and_unzip(dataset_parent_dir: str) -> None: | ||
"""Download image dataset and unzip it. | ||
:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded | ||
:type dataset_parent_dir: str | ||
""" | ||
# Create directory, if it does not exist | ||
os.makedirs(dataset_parent_dir, exist_ok=True) | ||
|
||
# download data | ||
download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip" | ||
print(f"Downloading data from {download_url}") | ||
|
||
# Extract current dataset name from dataset url | ||
dataset_name = os.path.basename(download_url).split(".")[0] | ||
# Get dataset path for later use | ||
dataset_dir = os.path.join(dataset_parent_dir, dataset_name) | ||
|
||
if os.path.exists(dataset_dir): | ||
shutil.rmtree(dataset_dir) | ||
|
||
# Get the name of zip file | ||
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") | ||
|
||
# Download data from public url | ||
urllib.request.urlretrieve(download_url, filename=data_file) | ||
|
||
# extract files | ||
with ZipFile(data_file, "r") as zip: | ||
print("extracting files...") | ||
zip.extractall(path=dataset_parent_dir) | ||
print("done") | ||
# delete zip file | ||
os.remove(data_file) | ||
return dataset_dir | ||
|
||
|
||
def read_image(image_path: str) -> bytes: | ||
"""Read image from path. | ||
:param image_path: image path | ||
:type image_path: str | ||
:return: image in bytes format | ||
:rtype: bytes | ||
""" | ||
with open(image_path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
def prepare_data_for_online_inference(dataset_dir: str) -> None: | ||
"""Prepare request json for online inference. | ||
:param dataset_dir: dataset directory | ||
:type dataset_dir: str | ||
""" | ||
sample_image_1 = os.path.join(dataset_dir, "images", "99.jpg") | ||
sample_image_2 = os.path.join(dataset_dir, "images", "1.jpg") | ||
|
||
request_json = { | ||
"input_data": { | ||
"columns": ["image", "text"], | ||
"index": [0, 1], | ||
"data": [ | ||
[ | ||
base64.encodebytes(read_image(sample_image_1)).decode("utf-8"), | ||
# For BLIP2 append "Answer:" to the below prompt | ||
"Describe the beverage in the image?", | ||
], | ||
[ | ||
base64.encodebytes(read_image(sample_image_2)).decode("utf-8"), | ||
# For BLIP2 append "Answer:" to the below prompt | ||
"What are the drinks on the table?", | ||
], | ||
], | ||
} | ||
} | ||
|
||
request_file_name = os.path.join(dataset_dir, "sample_request_data.json") | ||
|
||
with open(request_file_name, "w") as request_file: | ||
json.dump(request_json, request_file) | ||
|
||
|
||
def prepare_data_for_batch_inference(dataset_dir: str) -> None: | ||
"""Prepare image folder and csv file for batch inference. | ||
This function will create a folder of csv files with images in base64 format. | ||
:param dataset_dir: dataset directory | ||
:type dataset_dir: str | ||
""" | ||
|
||
csv_folder_path = os.path.join(dataset_dir, "batch") | ||
os.makedirs(csv_folder_path, exist_ok=True) | ||
batch_input_file = "batch_input.csv" | ||
dataset_dir = os.path.join(dataset_dir, "images") | ||
|
||
# Take 20 images | ||
image_list = [] | ||
for i in range(1, 21): | ||
image_path = os.path.join(dataset_dir, str(i) + ".jpg") | ||
data = read_image(image_path) | ||
data = base64.encodebytes(data).decode("utf-8") | ||
image_list.append(data) | ||
|
||
# Read questions list file | ||
with open("list_of_questions.txt", "r") as f: | ||
data = f.read() | ||
question_list = data.split("\n") | ||
# For BLIP2, append "Answer: " to the questions | ||
# question_list = [s + " Answer:" for s in question_list] | ||
|
||
# Divide the image & questions list into files of 10 rows each | ||
batch_size_per_predict = 10 | ||
divided_image_list = [ | ||
image_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict] | ||
for i in range( | ||
(len(image_list) + batch_size_per_predict - 1) // batch_size_per_predict | ||
) | ||
] | ||
divided_question_list = [ | ||
question_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict] | ||
for i in range( | ||
(len(question_list) + batch_size_per_predict - 1) // batch_size_per_predict | ||
) | ||
] | ||
|
||
# Write to CSV files | ||
for l in range(0, len(divided_image_list)): | ||
dictionary = {"image": divided_image_list[l], "text": divided_question_list[l]} | ||
batch_df = pd.DataFrame(dictionary) | ||
filepath = os.path.join(csv_folder_path, str(l) + batch_input_file) | ||
batch_df.to_csv(filepath) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Prepare data for visual-question-answering task" | ||
) | ||
parser.add_argument( | ||
"--data_path", type=str, default="data", help="Dataset location" | ||
) | ||
parser.add_argument( | ||
"--mode", | ||
type=str, | ||
default="online", | ||
help="prepare data for online or batch inference", | ||
) | ||
|
||
args, unknown = parser.parse_known_args() | ||
args_dict = vars(args) | ||
|
||
dataset_dir = download_and_unzip( | ||
dataset_parent_dir=os.path.join( | ||
os.path.dirname(os.path.realpath(__file__)), args.data_path | ||
), | ||
) | ||
|
||
if args.mode == "online": | ||
prepare_data_for_online_inference(dataset_dir=dataset_dir) | ||
else: | ||
prepare_data_for_batch_inference(dataset_dir=dataset_dir) |
111 changes: 111 additions & 0 deletions
111
...ls/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
set -x | ||
# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-blip-image-to-text | ||
# The sample scoring file available in the same folder as the above notebook. | ||
|
||
# script inputs | ||
registry_name="azureml" | ||
subscription_id="<SUBSCRIPTION_ID>" | ||
resource_group_name="<RESOURCE_GROUP>" | ||
workspace_name="<WORKSPACE_NAME>" | ||
|
||
# This is the model from system registry that needs to be deployed | ||
model_name="Salesforce-BLIP-vqa-base" | ||
model_label="latest" | ||
|
||
deployment_compute="cpu-cluster" | ||
# todo: fetch deployment_sku from the min_inference_sku tag of the model | ||
deployment_sku="Standard_DS5_v2" | ||
|
||
|
||
version=$(date +%s) | ||
endpoint_name="vqa-$version" | ||
deployment_name="demo-$version" | ||
|
||
# Prepare data for deployment | ||
data_path="data_batch" | ||
python ./prepare_data.py --mode "batch" --data_path $data_path | ||
# sample request data in folder of csv files with image and text columns | ||
sample_request_csv_folder="./data_batch/odfridgeObjects/batch" | ||
|
||
# 1. Setup pre-requisites | ||
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \ | ||
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \ | ||
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then | ||
echo "Please update the script with the subscription_id, resource_group_name and workspace_name" | ||
exit 1 | ||
fi | ||
|
||
az account set -s $subscription_id | ||
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" | ||
|
||
# 2. Check if the model exists in the registry | ||
# Need to confirm model show command works for registries outside the tenant (aka system registry) | ||
if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name | ||
then | ||
echo "Model $model_name:$model_label does not exist in registry $registry_name" | ||
exit 1 | ||
fi | ||
|
||
# Get the latest model version | ||
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv) | ||
|
||
# 3. Check if compute $deployment_compute exists, else create it | ||
if az ml compute show --name $deployment_compute $workspace_info | ||
then | ||
echo "Compute cluster $deployment_compute already exists" | ||
else | ||
echo "Creating compute cluster $deployment_compute" | ||
az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || { | ||
echo "Failed to create compute cluster $deployment_compute" | ||
exit 1 | ||
} | ||
fi | ||
|
||
# 4. Deploy the model to an endpoint | ||
# Create batch endpoint | ||
az ml batch-endpoint create --name $endpoint_name $workspace_info || { | ||
echo "endpoint create failed"; exit 1; | ||
} | ||
|
||
# Deploy model from registry to endpoint in workspace | ||
az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \ | ||
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ | ||
compute=$deployment_compute \ | ||
name=$deployment_name || { | ||
echo "deployment create failed"; exit 1; | ||
} | ||
|
||
# 5 Try a scoring request with csv file | ||
|
||
# Check if scoring data file exists | ||
if [ -d $sample_request_csv_folder ]; then | ||
echo "Invoking endpoint $endpoint_name with following input:\n\n" | ||
echo "\n\n" | ||
else | ||
echo "Scoring file $sample_request_csv_folder does not exist" | ||
exit 1 | ||
fi | ||
|
||
# Invoke the endpoint | ||
# Note: If job failed with Out of Memory Error then | ||
# please try splitting your input into smaller csv files or | ||
# decrease the mini_batch_size for the deployment (see deploy-batch.yaml). | ||
csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \ | ||
--deployment-name $deployment_name --input $sample_request_csv_folder --input-type \ | ||
uri_folder $workspace_info --query name --output tsv) || { | ||
echo "endpoint invoke failed"; exit 1; | ||
} | ||
|
||
# wait for the job to complete | ||
az ml job stream --name $csv_inference_job $workspace_info || { | ||
echo "job stream failed"; exit 1; | ||
} | ||
|
||
# 6. Delete the endpoint | ||
# Batch endpoints use compute resources only when jobs are submitted. You can keep the | ||
# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. | ||
# If you created your compute cluster to have zero minimum instances and scale down soon after being idle, | ||
# you won't be charged for an unused compute. | ||
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || { | ||
echo "endpoint delete failed"; exit 1; | ||
} |
Oops, something went wrong.