Skip to content

Commit

Permalink
Add batch inference notebook and cli examples for CLIP (#2644)
Browse files Browse the repository at this point in the history
* start batch inference notebook

* clean up batch notebook

* add cli example

* update call to prepare_data

* infer labels from directories for batch inference

* run black reformatter

* address pr comments

* add comments to clarify csv format

* update csv name in bash script

* respond to PR comments

* restore registry name

* change batch input from single csv to folder of csv

* update cli example to use folder of csv, run black reformatter

* update comments for mini_batch_size

* update get model from mlclient

* respond to PR comments
  • Loading branch information
bhimar authored Oct 10, 2023
1 parent f2b2826 commit a93774b
Show file tree
Hide file tree
Showing 4 changed files with 676 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
description: "Batch endpoint for zero-shot-image-classification task"
type: model
resources:
instance_count: 1
settings:
mini_batch_size: 1

Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,48 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None:
with open(request_file_name, "w") as request_file:
json.dump(request_json, request_file)

def prepare_data_for_batch_inference(dataset_dir: str) -> None:
"""Prepare image folder and csv file for batch inference.
This function will move all images to a single image folder and also create a folder of csv
files with images in base64 format and the candidate labels.
:param dataset_dir: dataset directory
:type dataset_dir: str
"""
image_list = []

dir_names = []
for dir_name in os.listdir(dataset_dir):
dir_names.append(dir_name)
dir_path = os.path.join(dataset_dir, dir_name)
for path, _, files in os.walk(dir_path):
for file in files:
image = read_image(os.path.join(path, file))
image_list.append(base64.encodebytes(image).decode("utf-8"))
shutil.move(os.path.join(path, file), dataset_dir)
if os.path.isdir(dir_path):
shutil.rmtree(dir_path)
else:
os.remove(dir_path)

# labels are only added to the first row
# all other rows in the "text" column are ignored
labels = ",".join(dir_names)
data = [[image, ""] for image in image_list]
batch_df = pd.DataFrame(data, columns=["image", "text"])

csv_folder_path = os.path.join(dataset_dir, "batch")
os.makedirs(csv_folder_path, exist_ok=True)
batch_input_file = "batch_input.csv"
# Divide this into files of 10 rows each
batch_size_per_predict = 10
for i in range(0, len(batch_df), batch_size_per_predict):
j = i + batch_size_per_predict
batch_df["text"].iloc[
i
] = labels # each csv file includes the labels in the first row
batch_df[i:j].to_csv(os.path.join(csv_folder_path, str(i) + batch_input_file))


if __name__ == "__main__":
parser = argparse.ArgumentParser(
Expand All @@ -116,3 +158,5 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None:

if args.mode == "online":
prepare_data_for_online_inference(dataset_dir=dataset_dir)
else:
prepare_data_for_batch_inference(dataset_dir=dataset_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
set -x
# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-image-classification
# The sample scoring file available in the same folder as the above notebook.

# script inputs
registry_name="azureml"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

# This is the model from system registry that needs to be deployed
model_name="openai-clip-vit-base-patch32"
model_label="latest"

deployment_compute="cpu-cluster"
# todo: fetch deployment_sku from the min_inference_sku tag of the model
deployment_sku="Standard_DS3_v2"


version=$(date +%s)
endpoint_name="zero-shot-image-cls-$version"
deployment_name="demo-$version"

# Prepare data for deployment
data_path="data_batch"
python ./prepare_data.py --mode "batch" --data_path $data_path
# sample request data in folder of csv files with image and text columns
sample_request_csv_folder="./data_batch/fridgeObjects/batch"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
exit 1
fi

az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# 2. Check if the model exists in the registry
# Need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name
then
echo "Model $model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# Get the latest model version
model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 3. Check if compute $deployment_compute exists, else create it
if az ml compute show --name $deployment_compute $workspace_info
then
echo "Compute cluster $deployment_compute already exists"
else
echo "Creating compute cluster $deployment_compute"
az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || {
echo "Failed to create compute cluster $deployment_compute"
exit 1
}
fi

# 4. Deploy the model to an endpoint
# Create batch endpoint
az ml batch-endpoint create --name $endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# Deploy model from registry to endpoint in workspace
az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
compute=$deployment_compute \
name=$deployment_name || {
echo "deployment create failed"; exit 1;
}

# 5 Try a scoring request with csv file

# Check if scoring data file exists
if [ -d $sample_request_csv_folder ]; then
echo "Invoking endpoint $endpoint_name with following input:\n\n"
echo "\n\n"
else
echo "Scoring file $sample_request_csv_folder does not exist"
exit 1
fi

# Invoke the endpoint
# Note: If job failed with Out of Memory Error then
# please try splitting your input into smaller csv files or
# decrease the mini_batch_size for the deployment (see deploy-batch.yaml).
csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
--deployment-name $deployment_name --input $sample_request_csv_folder --input-type \
uri_folder $workspace_info --query name --output tsv) || {
echo "endpoint invoke failed"; exit 1;
}

# wait for the job to complete
az ml job stream --name $csv_inference_job $workspace_info || {
echo "job stream failed"; exit 1;
}

# 6. Delete the endpoint
# Batch endpoints use compute resources only when jobs are submitted. You can keep the
# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint.
# If you created your compute cluster to have zero minimum instances and scale down soon after being idle,
# you won't be charged for an unused compute.
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}
Loading

0 comments on commit a93774b

Please sign in to comment.