From cc86c0a0d1a4283a2c9f64f5f893ccb095a54ed3 Mon Sep 17 00:00:00 2001
From: Riti Sharma <52715641+sharma-riti@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:46:03 -0800
Subject: [PATCH] Add BLIP vqa examples (#2790)

* add blip2 examples

* fix formatting

* address pr comments

* .

* fix formatting

* use blip model

* use blip for cli examples

* add examples for vqa

* .

* fix inference data

* address pr comments

* .

* .
---
 .../deploy-batch.yaml                         |   9 +
 .../deploy-online.yaml                        |  12 +
 .../list_of_questions.txt                     |  20 +
 .../visual-question-answering/prepare_data.py | 171 ++++++
 ...isual-question-answering-batch-endpoint.sh | 111 ++++
 ...sual-question-answering-online-endpoint.sh |  80 +++
 .../list_of_questions.txt                     |  20 +
 ...al-question-answering-batch-endpoint.ipynb | 503 ++++++++++++++++++
 ...l-question-answering-online-endpoint.ipynb | 359 +++++++++++++
 9 files changed, 1285 insertions(+)
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/deploy-batch.yaml
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/deploy-online.yaml
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/prepare_data.py
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.sh
 create mode 100644 sdk/python/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
 create mode 100644 sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb
 create mode 100644 sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb

diff --git a/cli/foundation-models/system/inference/visual-question-answering/deploy-batch.yaml b/cli/foundation-models/system/inference/visual-question-answering/deploy-batch.yaml
new file mode 100644
index 0000000000..b5125f802a
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/deploy-batch.yaml
@@ -0,0 +1,9 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+description: "Batch endpoint for visual-question-answering task"
+type: model
+resources:
+    instance_count: 1
+settings:
+    mini_batch_size: 1
+
diff --git a/cli/foundation-models/system/inference/visual-question-answering/deploy-online.yaml b/cli/foundation-models/system/inference/visual-question-answering/deploy-online.yaml
new file mode 100644
index 0000000000..a0af4c956a
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/deploy-online.yaml
@@ -0,0 +1,12 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+name: vqa-demo
+instance_type: Standard_DS5_v2
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 90000
+
diff --git a/cli/foundation-models/system/inference/visual-question-answering/list_of_questions.txt b/cli/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
new file mode 100644
index 0000000000..65f93ff672
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
@@ -0,0 +1,20 @@
+What is the brand of the half and half in the carton?
+What is the color of the bottle of yogurt?
+What is the color of the can of soda on the table?
+What is the name of the bottled water brand in the image?
+Is the bottle tilted?
+Is the object tilted?
+Is the bottle tilted?
+Is the can tilted?
+What is the difference between the two beverages in terms of their contents and purpose?
+What is the difference between the two beverages on the table?
+What are the beverages in the image?
+What is the difference between the two beverages, one being a milk drink and the other being a juice drink?
+What is the brand of the half and half that is sitting next to the can of soda?
+What color is the can of soda?
+What are the drinks on the table?
+Is there a soda on the table?
+How many objects are in the image?
+Describe the objects in the image?
+What objects are in the image?
+How many objects are in the image?
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py
new file mode 100644
index 0000000000..bcec0eadf5
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py
@@ -0,0 +1,171 @@
+import argparse
+import base64
+import json
+import os
+import shutil
+import urllib.request
+import pandas as pd
+from zipfile import ZipFile
+
+
+def download_and_unzip(dataset_parent_dir: str) -> None:
+    """Download image dataset and unzip it.
+
+    :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
+    :type dataset_parent_dir: str
+    """
+    # Create directory, if it does not exist
+    os.makedirs(dataset_parent_dir, exist_ok=True)
+
+    # download data
+    download_url = "https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip"
+    print(f"Downloading data from {download_url}")
+
+    # Extract current dataset name from dataset url
+    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Get dataset path for later use
+    dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
+
+    if os.path.exists(dataset_dir):
+        shutil.rmtree(dataset_dir)
+
+    # Get the name of zip file
+    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
+
+    # Download data from public url
+    urllib.request.urlretrieve(download_url, filename=data_file)
+
+    # extract files
+    with ZipFile(data_file, "r") as zip:
+        print("extracting files...")
+        zip.extractall(path=dataset_parent_dir)
+        print("done")
+    # delete zip file
+    os.remove(data_file)
+    return dataset_dir
+
+
+def read_image(image_path: str) -> bytes:
+    """Read image from path.
+
+    :param image_path: image path
+    :type image_path: str
+    :return: image in bytes format
+    :rtype: bytes
+    """
+    with open(image_path, "rb") as f:
+        return f.read()
+
+
+def prepare_data_for_online_inference(dataset_dir: str) -> None:
+    """Prepare request json for online inference.
+
+    :param dataset_dir: dataset directory
+    :type dataset_dir: str
+    """
+    sample_image_1 = os.path.join(dataset_dir, "images", "99.jpg")
+    sample_image_2 = os.path.join(dataset_dir, "images", "1.jpg")
+
+    request_json = {
+        "input_data": {
+            "columns": ["image", "text"],
+            "index": [0, 1],
+            "data": [
+                [
+                    base64.encodebytes(read_image(sample_image_1)).decode("utf-8"),
+                    # For BLIP2 append "Answer:" to the below prompt
+                    "Describe the beverage in the image?",
+                ],
+                [
+                    base64.encodebytes(read_image(sample_image_2)).decode("utf-8"),
+                    # For BLIP2 append "Answer:" to the below prompt
+                    "What are the drinks on the table?",
+                ],
+            ],
+        }
+    }
+
+    request_file_name = os.path.join(dataset_dir, "sample_request_data.json")
+
+    with open(request_file_name, "w") as request_file:
+        json.dump(request_json, request_file)
+
+
+def prepare_data_for_batch_inference(dataset_dir: str) -> None:
+    """Prepare image folder and csv file for batch inference.
+
+    This function will create a folder of csv files with images in base64 format.
+    :param dataset_dir: dataset directory
+    :type dataset_dir: str
+    """
+
+    csv_folder_path = os.path.join(dataset_dir, "batch")
+    os.makedirs(csv_folder_path, exist_ok=True)
+    batch_input_file = "batch_input.csv"
+    dataset_dir = os.path.join(dataset_dir, "images")
+
+    # Take 20 images
+    image_list = []
+    for i in range(1, 21):
+        image_path = os.path.join(dataset_dir, str(i) + ".jpg")
+        data = read_image(image_path)
+        data = base64.encodebytes(data).decode("utf-8")
+        image_list.append(data)
+
+    # Read questions list file
+    with open("list_of_questions.txt", "r") as f:
+        data = f.read()
+        question_list = data.split("\n")
+        # For BLIP2, append "Answer: " to the questions
+        # question_list = [s + " Answer:" for s in question_list]
+
+    # Divide the image & questions list into files of 10 rows each
+    batch_size_per_predict = 10
+    divided_image_list = [
+        image_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict]
+        for i in range(
+            (len(image_list) + batch_size_per_predict - 1) // batch_size_per_predict
+        )
+    ]
+    divided_question_list = [
+        question_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict]
+        for i in range(
+            (len(question_list) + batch_size_per_predict - 1) // batch_size_per_predict
+        )
+    ]
+
+    # Write to CSV files
+    for l in range(0, len(divided_image_list)):
+        dictionary = {"image": divided_image_list[l], "text": divided_question_list[l]}
+        batch_df = pd.DataFrame(dictionary)
+        filepath = os.path.join(csv_folder_path, str(l) + batch_input_file)
+        batch_df.to_csv(filepath)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare data for visual-question-answering task"
+    )
+    parser.add_argument(
+        "--data_path", type=str, default="data", help="Dataset location"
+    )
+    parser.add_argument(
+        "--mode",
+        type=str,
+        default="online",
+        help="prepare data for online or batch inference",
+    )
+
+    args, unknown = parser.parse_known_args()
+    args_dict = vars(args)
+
+    dataset_dir = download_and_unzip(
+        dataset_parent_dir=os.path.join(
+            os.path.dirname(os.path.realpath(__file__)), args.data_path
+        ),
+    )
+
+    if args.mode == "online":
+        prepare_data_for_online_inference(dataset_dir=dataset_dir)
+    else:
+        prepare_data_for_batch_inference(dataset_dir=dataset_dir)
diff --git a/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.sh b/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.sh
new file mode 100644
index 0000000000..8677d2d518
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.sh
@@ -0,0 +1,111 @@
+set -x
+# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-blip-image-to-text
+# The sample scoring file available in the same folder as the above notebook.
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="Salesforce-BLIP-vqa-base"
+model_label="latest"
+
+deployment_compute="cpu-cluster"
+# todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS5_v2"
+
+
+version=$(date +%s)
+endpoint_name="vqa-$version"
+deployment_name="demo-$version"
+
+# Prepare data for deployment
+data_path="data_batch"
+python ./prepare_data.py --mode "batch" --data_path $data_path
+# sample request data in folder of csv files with image and text columns
+sample_request_csv_folder="./data_batch/odfridgeObjects/batch"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# Need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_label does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Get the latest model version
+model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+
+# 3. Check if compute $deployment_compute exists, else create it
+if az ml compute show --name $deployment_compute $workspace_info
+then
+    echo "Compute cluster $deployment_compute already exists"
+else
+    echo "Creating compute cluster $deployment_compute"
+    az ml compute create --name $deployment_compute --type amlcompute --min-instances 0 --max-instances 2 --size $deployment_sku $workspace_info || {
+        echo "Failed to create compute cluster $deployment_compute"
+        exit 1
+    }
+fi
+
+# 4. Deploy the model to an endpoint
+# Create batch endpoint
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# Deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file ./deploy-batch.yaml $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  compute=$deployment_compute \
+  name=$deployment_name || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 5 Try a scoring request with csv file
+
+# Check if scoring data file exists
+if [ -d $sample_request_csv_folder ]; then
+    echo "Invoking endpoint $endpoint_name with following input:\n\n"
+    echo "\n\n"
+else
+    echo "Scoring file $sample_request_csv_folder does not exist"
+    exit 1
+fi
+
+# Invoke the endpoint
+# Note: If job failed with Out of Memory Error then 
+# please try splitting your input into smaller csv files or
+# decrease the mini_batch_size for the deployment (see deploy-batch.yaml).
+csv_inference_job=$(az ml batch-endpoint invoke --name $endpoint_name \
+ --deployment-name $deployment_name --input $sample_request_csv_folder --input-type \
+  uri_folder $workspace_info --query name --output tsv) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# wait for the job to complete
+az ml job stream --name $csv_inference_job $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 6. Delete the endpoint
+# Batch endpoints use compute resources only when jobs are submitted. You can keep the 
+# batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. 
+# If you created your compute cluster to have zero minimum instances and scale down soon after being idle, 
+# you won't be charged for an unused compute.
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
diff --git a/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.sh b/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.sh
new file mode 100644
index 0000000000..931a1fef5d
--- /dev/null
+++ b/cli/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.sh
@@ -0,0 +1,80 @@
+set -x
+# The commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-blip-vqa
+# The sample scoring file available in the same folder as the above notebook
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="Salesforce-BLIP-vqa-base"
+model_label="latest"
+
+version=$(date +%s)
+endpoint_name="vqa-$version"
+
+# Todo: fetch deployment_sku from the min_inference_sku tag of the model
+deployment_sku="Standard_DS5_v2"
+
+# Prepare data for deployment
+data_path="./data_online"
+python ./prepare_data.py --data_path $data_path --mode "online"
+# sample_request_data
+sample_request_data="$data_path/odfridgeObjects/sample_request_data.json"
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# Need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_label does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Get the latest model version
+model_version=$(az ml model show --name $model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+
+# 3. Deploy the model to an endpoint
+# Create online endpoint 
+az ml online-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# Deploy model from registry to endpoint in workspace
+az ml online-deployment create --file deploy-online.yaml $workspace_info --all-traffic --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Try a sample scoring request
+
+# Check if scoring data file exists
+if [ -f $sample_request_data ]; then
+    echo "Invoking endpoint $endpoint_name with $sample_request_data\n\n"
+else
+    echo "Scoring file $sample_request_data does not exist"
+    exit 1
+fi
+
+az ml online-endpoint invoke --name $endpoint_name --request-file $sample_request_data $workspace_info || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+# 6. Delete the endpoint and sample_request_data.json
+az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+rm $sample_request_data
diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/list_of_questions.txt b/sdk/python/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
new file mode 100644
index 0000000000..65f93ff672
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/visual-question-answering/list_of_questions.txt
@@ -0,0 +1,20 @@
+What is the brand of the half and half in the carton?
+What is the color of the bottle of yogurt?
+What is the color of the can of soda on the table?
+What is the name of the bottled water brand in the image?
+Is the bottle tilted?
+Is the object tilted?
+Is the bottle tilted?
+Is the can tilted?
+What is the difference between the two beverages in terms of their contents and purpose?
+What is the difference between the two beverages on the table?
+What are the beverages in the image?
+What is the difference between the two beverages, one being a milk drink and the other being a juice drink?
+What is the brand of the half and half that is sitting next to the can of soda?
+What color is the can of soda?
+What are the drinks on the table?
+Is there a soda on the table?
+How many objects are in the image?
+Describe the objects in the image?
+What objects are in the image?
+How many objects are in the image?
\ No newline at end of file
diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb
new file mode 100644
index 0000000000..e15f270a02
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb
@@ -0,0 +1,503 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visual Question Answering Inference using Batch Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `visual-question-answering` type models to a batch endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`visual-question-answering` takes in image(s) and question text prompt(s) corresponding to the image. For each image, it answers the user's question.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `visual-question-answering` task are tagged with `visual-question-answering`. We will use the `Salesforce/blip-vqa-base` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset.\n",
+    "\n",
+    "\n",
+    "### Outline\n",
+    "1. Setup pre-requisites\n",
+    "2. Pick a model to deploy\n",
+    "3. Prepare data for inference - Using a folder of CSV files with base64 images and text prompt corresponding to each image\n",
+    "4. Deploy the model to a batch endpoint\n",
+    "5. Test the endpoint - Using a folder of CSV files\n",
+    "6. Clean up resources - delete the endpoint"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient, Input\n",
+    "from azure.ai.ml.constants import AssetTypes\n",
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "    workspace_name = workspace_ml_client.workspace_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace_name = \"<WORKSPACE_NAME>\"\n",
+    "\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, workspace_name\n",
+    ")\n",
+    "\n",
+    "# The models are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id,\n",
+    "    resource_group,\n",
+    "    registry_name=\"azureml\",\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a compute cluster\n",
+    "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as size below. If you already have a sufficient compute cluster, you can simply define the name in compute_name in the following code block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import AmlCompute\n",
+    "from azure.core.exceptions import ResourceNotFoundError\n",
+    "\n",
+    "compute_name = \"cpu-cluster\"\n",
+    "\n",
+    "try:\n",
+    "    _ = workspace_ml_client.compute.get(compute_name)\n",
+    "    print(\"Found existing compute target.\")\n",
+    "except ResourceNotFoundError:\n",
+    "    print(\"Creating a new compute target...\")\n",
+    "    compute_config = AmlCompute(\n",
+    "        name=compute_name,\n",
+    "        description=\"An AML compute cluster\",\n",
+    "        size=\"STANDARD_DS5_V2\",\n",
+    "        min_instances=0,\n",
+    "        max_instances=3,\n",
+    "        idle_time_before_scale_down=120,\n",
+    "    )\n",
+    "    workspace_ml_client.begin_create_or_update(compute_config).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `visual-question-answering` task. In this example, we use the `Salesforce-BLIP-vqa-base` model. If you have opened this notebook for a different model, replace the model name accordingly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"Salesforce-BLIP-vqa-base\"\n",
+    "\n",
+    "# Use model name below for BLIP-2\n",
+    "# model_name = \"Salesforce-BLIP-2-opt-2-7b-vqa\"\n",
+    "\n",
+    "foundation_model = registry_ml_client.models.get(name=model_name, label=\"latest\")\n",
+    "print(\n",
+    "    f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference - Using a folder of csv files with base64 images\n",
+    "\n",
+    "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for this image-to-text task.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib\n",
+    "import shutil\n",
+    "from zipfile import ZipFile\n",
+    "\n",
+    "# Change to a different location if you prefer\n",
+    "dataset_parent_dir = \"./batchdata\"\n",
+    "\n",
+    "# create data folder if it doesnt exist.\n",
+    "os.makedirs(dataset_parent_dir, exist_ok=True)\n",
+    "\n",
+    "# Download data\n",
+    "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n",
+    "\n",
+    "# Extract current dataset name from dataset url\n",
+    "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n",
+    "# Get dataset path for later use\n",
+    "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n",
+    "\n",
+    "if os.path.exists(dataset_dir):\n",
+    "    shutil.rmtree(dataset_dir)\n",
+    "\n",
+    "# Get the data zip file path\n",
+    "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n",
+    "\n",
+    "# Download the dataset\n",
+    "urllib.request.urlretrieve(download_url, filename=data_file)\n",
+    "\n",
+    "# Extract files\n",
+    "with ZipFile(data_file, \"r\") as zip:\n",
+    "    print(\"extracting files...\")\n",
+    "    zip.extractall(path=dataset_parent_dir)\n",
+    "    print(\"done\")\n",
+    "\n",
+    "# Delete zip file\n",
+    "os.remove(data_file)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3.1 Prepare a folder of CSV files with base64 images and text prompt corresponding to each image for batch inference input\n",
+    "\n",
+    "We can provide input images to batch inference in a csv file containing a column named \"image\" having either images in base64 format or publicly accessible image URLs. The text prompt is added under column named \"text\".\n",
+    "The deployment in the `Create batch deployment` section below takes the argument `mini_batch_size`, which is the number of CSV files processed by the model in a single mini_batch. To limit the number of images processed in each mini_batch we split the dataset into multiple csv files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "\n",
+    "csv_folder_path = os.path.join(dataset_parent_dir, \"batch\")\n",
+    "os.makedirs(csv_folder_path, exist_ok=True)\n",
+    "batch_input_file = \"batch_input.csv\"\n",
+    "dataset_dir = os.path.join(dataset_dir, \"images\")\n",
+    "\n",
+    "# Take 20 images\n",
+    "image_list = []\n",
+    "for i in range(1, 21):\n",
+    "    image_path = os.path.join(dataset_dir, str(i) + \".jpg\")\n",
+    "    with open(image_path, \"rb\") as f:\n",
+    "        data = f.read()\n",
+    "        data = base64.encodebytes(data).decode(\"utf-8\")\n",
+    "        image_list.append(data)\n",
+    "\n",
+    "# Read questions list file\n",
+    "with open(\"list_of_questions.txt\", \"r\") as f:\n",
+    "    data = f.read()\n",
+    "    question_list = data.split(\"\\n\")\n",
+    "    # For BLIP2, append \"Answer: \" to the questions\n",
+    "    # question_list = [s + \" Answer:\" for s in question_list]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Divide the image & questions list into files of 10 rows each\n",
+    "batch_size_per_predict = 10\n",
+    "divided_image_list = [\n",
+    "    image_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict]\n",
+    "    for i in range(\n",
+    "        (len(image_list) + batch_size_per_predict - 1) // batch_size_per_predict\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "divided_question_list = [\n",
+    "    question_list[i * batch_size_per_predict : (i + 1) * batch_size_per_predict]\n",
+    "    for i in range(\n",
+    "        (len(question_list) + batch_size_per_predict - 1) // batch_size_per_predict\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "assert len(divided_image_list) == len(divided_question_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write to CSV files\n",
+    "for l in range(0, len(divided_image_list)):\n",
+    "    dictionary = {\"image\": divided_image_list[l], \"text\": divided_question_list[l]}\n",
+    "    batch_df = pd.DataFrame(dictionary)\n",
+    "    filepath = os.path.join(csv_folder_path, str(l) + batch_input_file)\n",
+    "    batch_df.to_csv(filepath)\n",
+    "\n",
+    "# Check out the first and last file name created\n",
+    "input_paths = sorted(Path(csv_folder_path).iterdir(), key=os.path.getmtime)\n",
+    "input_files = [os.path.basename(path) for path in input_paths]\n",
+    "print(input_files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import Image\n",
+    "\n",
+    "sample_image = os.path.join(dataset_dir, \"99.jpg\")\n",
+    "Image(filename=sample_image)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to a batch endpoint\n",
+    "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n",
+    "\n",
+    "* Create a batch endpoint.\n",
+    "* Create a batch deployment.\n",
+    "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a batch endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from azure.ai.ml.entities import (\n",
+    "    BatchEndpoint,\n",
+    "    BatchDeployment,\n",
+    "    BatchRetrySettings,\n",
+    ")\n",
+    "\n",
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "endpoint_name = \"vqa-\" + str(timestamp)\n",
+    "# Create a batch endpoint\n",
+    "endpoint = BatchEndpoint(\n",
+    "    name=endpoint_name,\n",
+    "    description=\"Batch endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for visual-question-answering task\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a batch deployment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "deployment_name = \"vqa-demo\"\n",
+    "\n",
+    "deployment = BatchDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    compute=compute_name,\n",
+    "    error_threshold=0,\n",
+    "    instance_count=1,\n",
+    "    logging_level=\"info\",\n",
+    "    max_concurrency_per_instance=1,\n",
+    "    mini_batch_size=2,\n",
+    "    output_file_name=\"predictions.csv\",\n",
+    "    retry_settings=BatchRetrySettings(max_retries=3, timeout=600),\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(deployment).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set the deployment as default"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "endpoint.defaults.deployment_name = deployment_name\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()\n",
+    "\n",
+    "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n",
+    "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint - Using a folder of CSV files\n",
+    "\n",
+    "Invoke the batch endpoint with the input parameter pointing to the folder of CSV files containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete.  \n",
+    "Note: If job failed with Out of Memory Error then please try splitting your input into smaller csv files or decreasing `mini_batch_size` for the deployment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job = None\n",
+    "input = Input(path=csv_folder_path, type=AssetTypes.URI_FOLDER)\n",
+    "num_retries = 3\n",
+    "for i in range(num_retries):\n",
+    "    try:\n",
+    "        job = workspace_ml_client.batch_endpoints.invoke(\n",
+    "            endpoint_name=endpoint.name, input=input\n",
+    "        )\n",
+    "        break\n",
+    "    except Exception as e:\n",
+    "        if i == num_retries - 1:\n",
+    "            raise e\n",
+    "        else:\n",
+    "            print(\"Endpoint invocation failed. Retrying after 5 seconds...\")\n",
+    "            time.sleep(5)\n",
+    "if job is not None:\n",
+    "    workspace_ml_client.jobs.stream(job.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n",
+    "\n",
+    "workspace_ml_client.jobs.download(\n",
+    "    name=scoring_job.name,\n",
+    "    download_path=os.path.join(dataset_parent_dir, \"csv-output\"),\n",
+    "    output_name=\"score\",\n",
+    ")\n",
+    "\n",
+    "predictions_file = os.path.join(\n",
+    "    dataset_parent_dir, \"csv-output\", \"named-outputs\", \"score\", \"predictions.csv\"\n",
+    ")\n",
+    "\n",
+    "# Load the batch predictions file with no headers into a dataframe and set your column names\n",
+    "score_df = pd.read_csv(\n",
+    "    predictions_file,\n",
+    "    header=None,\n",
+    "    names=[\"row_number_per_file\", \"text\", \"file_name\"],\n",
+    ")\n",
+    "print(score_df[\"text\"].tolist())"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Clean up resources - delete the endpoint\n",
+    "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "sdkv2",
+   "language": "python",
+   "name": "sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb
new file mode 100644
index 0000000000..f3341e62f2
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb
@@ -0,0 +1,359 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visual Question Answering Inference using Online Endpoints\n",
+    "\n",
+    "This sample shows how to deploy `visual-question-answering` type models to an online endpoint for inference.\n",
+    "\n",
+    "### Task\n",
+    "`visual-question-answering` takes in images and for each image, generates a text/caption describing the image.\n",
+    "\n",
+    "### Model\n",
+    "Models that can perform the `visual-question-answering` task are tagged with `visual-question-answering`. We will use the `Salesforce/blip-vqa-base` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name.\n",
+    "\n",
+    "### Inference data\n",
+    "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset.\n",
+    "\n",
+    "\n",
+    "### Outline\n",
+    "1. Setup pre-requisites\n",
+    "2. Pick a model to deploy\n",
+    "3. Prepare data for inference\n",
+    "4. Deploy the model to an online endpoint for real time inference\n",
+    "5. Test the endpoint\n",
+    "6. Clean up resources - delete the online endpoint"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Setup pre-requisites\n",
+    "* Install dependencies\n",
+    "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace  `<WORKSPACE_NAME>`, `<RESOURCE_GROUP>` and `<SUBSCRIPTION_ID>` below.\n",
+    "* Connect to `azureml` system registry"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml import MLClient\n",
+    "from azure.identity import (\n",
+    "    DefaultAzureCredential,\n",
+    "    InteractiveBrowserCredential,\n",
+    ")\n",
+    "import time\n",
+    "\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    credential = InteractiveBrowserCredential()\n",
+    "\n",
+    "try:\n",
+    "    workspace_ml_client = MLClient.from_config(credential)\n",
+    "    subscription_id = workspace_ml_client.subscription_id\n",
+    "    resource_group = workspace_ml_client.resource_group_name\n",
+    "    workspace_name = workspace_ml_client.workspace_name\n",
+    "except Exception as ex:\n",
+    "    print(ex)\n",
+    "    # Enter details of your AML workspace\n",
+    "    subscription_id = \"<SUBSCRIPTION_ID>\"\n",
+    "    resource_group = \"<RESOURCE_GROUP>\"\n",
+    "    workspace_name = \"<WORKSPACE_NAME>\"\n",
+    "workspace_ml_client = MLClient(\n",
+    "    credential, subscription_id, resource_group, workspace_name\n",
+    ")\n",
+    "\n",
+    "# The models are available in the AzureML system registry, \"azureml\"\n",
+    "registry_ml_client = MLClient(\n",
+    "    credential,\n",
+    "    subscription_id,\n",
+    "    resource_group,\n",
+    "    registry_name=\"azureml\",\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Pick a model to deploy\n",
+    "\n",
+    "Browse models in the Model Catalog in the AzureML Studio, filtering by the `visual-question-answering` task. In this example, we use the `Salesforce-BLIP-vqa-base` model. If you have opened this notebook for a different model, replace the model name accordingly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"Salesforce-BLIP-vqa-base\"\n",
+    "\n",
+    "# Use model name below for BLIP-2\n",
+    "# model_name = \"Salesforce-BLIP-2-opt-2-7b-vqa\"\n",
+    "\n",
+    "foundation_model = registry_ml_client.models.get(name=model_name, label=\"latest\")\n",
+    "print(\n",
+    "    f\"\\n\\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Prepare data for inference\n",
+    "\n",
+    "We will use the [odFridgeObjects](https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip) dataset for this image-to-text task."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib\n",
+    "from zipfile import ZipFile\n",
+    "\n",
+    "# Change to a different location if you prefer\n",
+    "dataset_parent_dir = \"./data\"\n",
+    "\n",
+    "# Create data folder if it doesnt exist.\n",
+    "os.makedirs(dataset_parent_dir, exist_ok=True)\n",
+    "\n",
+    "# Download data\n",
+    "download_url = \"https://cvbp-secondary.z19.web.core.windows.net/datasets/object_detection/odFridgeObjects.zip\"\n",
+    "\n",
+    "# Extract current dataset name from dataset url\n",
+    "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n",
+    "# Get dataset path for later use\n",
+    "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n",
+    "\n",
+    "# Get the data zip file path\n",
+    "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n",
+    "\n",
+    "# Download the dataset\n",
+    "urllib.request.urlretrieve(download_url, filename=data_file)\n",
+    "\n",
+    "# Extract files\n",
+    "with ZipFile(data_file, \"r\") as zip:\n",
+    "    print(\"extracting files...\")\n",
+    "    zip.extractall(path=dataset_parent_dir)\n",
+    "    print(\"done\")\n",
+    "# Delete zip file\n",
+    "os.remove(data_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import Image\n",
+    "\n",
+    "sample_image = os.path.join(dataset_dir, \"images\", \"99.jpg\")\n",
+    "Image(filename=sample_image)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. Deploy the model to an online endpoint for real time inference\n",
+    "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from azure.ai.ml.entities import (\n",
+    "    ManagedOnlineEndpoint,\n",
+    "    ManagedOnlineDeployment,\n",
+    ")\n",
+    "\n",
+    "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n",
+    "timestamp = int(time.time())\n",
+    "online_endpoint_name = \"vqa-\" + str(timestamp)\n",
+    "# Create an online endpoint\n",
+    "endpoint = ManagedOnlineEndpoint(\n",
+    "    name=online_endpoint_name,\n",
+    "    description=\"Online endpoint for \"\n",
+    "    + foundation_model.name\n",
+    "    + \", for visual-question-answering task\",\n",
+    "    auth_mode=\"key\",\n",
+    ")\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).wait()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings\n",
+    "\n",
+    "deployment_name = \"vqa-mlflow-deploy\"\n",
+    "\n",
+    "# Create a deployment\n",
+    "demo_deployment = ManagedOnlineDeployment(\n",
+    "    name=deployment_name,\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    model=foundation_model.id,\n",
+    "    instance_type=\"Standard_DS5_V2\",  # Use GPU instance type like Standard_NC6s_v3 for faster inference\n",
+    "    instance_count=1,\n",
+    "    request_settings=OnlineRequestSettings(\n",
+    "        max_concurrent_requests_per_instance=1,\n",
+    "        request_timeout_ms=90000,\n",
+    "        max_queue_wait_ms=500,\n",
+    "    ),\n",
+    "    liveness_probe=ProbeSettings(\n",
+    "        failure_threshold=49,\n",
+    "        success_threshold=1,\n",
+    "        timeout=299,\n",
+    "        period=180,\n",
+    "        initial_delay=180,\n",
+    "    ),\n",
+    "    readiness_probe=ProbeSettings(\n",
+    "        failure_threshold=10,\n",
+    "        success_threshold=1,\n",
+    "        timeout=10,\n",
+    "        period=10,\n",
+    "        initial_delay=10,\n",
+    "    ),\n",
+    ")\n",
+    "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n",
+    "endpoint.traffic = {deployment_name: 100}\n",
+    "workspace_ml_client.begin_create_or_update(endpoint).result()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Test the endpoint\n",
+    "\n",
+    "We will fetch some sample data from the test dataset and submit to online endpoint for inference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "import json\n",
+    "\n",
+    "sample_image_1 = os.path.join(dataset_dir, \"images\", \"99.jpg\")\n",
+    "sample_image_2 = os.path.join(dataset_dir, \"images\", \"1.jpg\")\n",
+    "\n",
+    "\n",
+    "def read_image(image_path):\n",
+    "    with open(image_path, \"rb\") as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "\n",
+    "request_json = {\n",
+    "    \"input_data\": {\n",
+    "        \"columns\": [\"image\", \"text\"],\n",
+    "        \"index\": [0, 1],\n",
+    "        \"data\": [\n",
+    "            [\n",
+    "                base64.encodebytes(read_image(sample_image_1)).decode(\"utf-8\"),\n",
+    "                # For BLIP2 append \"Answer:\" to the below prompt\n",
+    "                \"Describe the beverage in the image?\",\n",
+    "            ],\n",
+    "            [\n",
+    "                base64.encodebytes(read_image(sample_image_2)).decode(\"utf-8\"),\n",
+    "                # For BLIP2 append \"Answer:\" to the below prompt\n",
+    "                \"What are the drinks on the table?\",\n",
+    "            ],\n",
+    "        ],\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# Create request json\n",
+    "request_file_name = \"sample_request_data.json\"\n",
+    "with open(request_file_name, \"w\") as request_file:\n",
+    "    json.dump(request_json, request_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n",
+    "response = workspace_ml_client.online_endpoints.invoke(\n",
+    "    endpoint_name=online_endpoint_name,\n",
+    "    deployment_name=demo_deployment.name,\n",
+    "    request_file=request_file_name,\n",
+    ")\n",
+    "\n",
+    "print(f\"raw response: {response}\\n\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Clean up resources - delete the online endpoint\n",
+    "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "sdkv2",
+   "language": "python",
+   "name": "sdkv2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}