Merge branch 'main' into vis-yadav/distillation_notebook

Azure · Nov 20, 2024 · f71bedc · f71bedc
2 parents d253098 + 17c67f3
commit f71bedc
Show file tree

Hide file tree

Showing 27 changed files with 3,775 additions and 1,235 deletions.
diff --git a/cli/foundation-models/system/distillation/conversation/README.md b/cli/foundation-models/system/distillation/conversation/README.md
@@ -0,0 +1,33 @@
+# Distillation with CLI (Conversation)
+
+## 1. Create the Job
+Ensure you have the proper setup.
+1. Run `az version` and ensure the `ml` extension is installed. `ml` version should be greater or equal to 2.32.0.
+2. If the `ml` extension is not installed, run `az extension add -n ml`
+
+Run the Distillation CLI command pointing to the .YAML file in this folder and fill out the Azure ML IDs needed:
+
+```text
+az ml job create --file distillation_conversation.yaml --workspace-name [YOUR_AZURE_WORKSPACE] --resource-group [YOUR_AZURE_RESOURCE_GROUP] --subscription [YOUR_AZURE_SUBSCRIPTION]
+```
+
+**Note:** To see how the train and validation files were created, see section 2 of this [notebook](/sdk/python/foundation-models/system/distillation/conversation/distillation_conversational_task.ipynb)
+
+## 2. Deploy to Endpoint
+Once the distilled model is ready, you can deploy the model through the UI or CLI.
+
+### UI Deployment
+1. Navigate to the `model` tab in [ml studio](https://ml.azure.com) or navigate to the `Finetuning` tab in the [ai platform](https://ai.azure.com)
+2. If using the ml studio, locate the model using the `name` of the `registered_model` in the yaml file used to create this job. Select deploy to deploy a serverless endpoint. If using the ai platform, search for the name of the job, which in this example is `Distillation-conversation-llama`. Click on that name, and select Deploy to deploy a serverless endpoint.
+
+### CLI Deployment
+Fill out the serverless_endpoint.yaml file in this folder. The necessary information can be found by 
+1. Navigating to the `model` tab in [ml studio](https://ml.azure.com).
+2. Using the `name` of the `registered_model` in the yaml file used to create this job, select the model with that `name`. In this example, the name to use is `llama-conversation-distilled`
+3. Use the `asset_id` to fill out the `model_id` in the yaml.
+
+With the information filled out, run the command
+
+```text
+az ml serverless-endpoint create -f serverless_endpoint.yaml
+```
diff --git a/cli/foundation-models/system/distillation/conversation/distillation_conversation.yaml b/cli/foundation-models/system/distillation/conversation/distillation_conversation.yaml
@@ -0,0 +1,55 @@
+type: distillation
+
+name: "Distillation-conversation-llama"
+description: "Distill student model using a teacher model"
+experiment_name: "Distillation-Conversation"
+
+# Data Generation Properties
+data_generation_type: label_generation
+data_generation_task_type: conversation
+
+# Input data
+training_data:
+  type: uri_file
+  path: ./train_conversation.jsonl
+validation_data:
+  type: uri_file
+  path: ./validation_conversation.jsonl
+
+# Teacher model serverless endpoint information
+# REPLACE WITH YOUR ENDPOINT INFORMATION
+teacher_model_endpoint_connection: 
+  type: serverless
+  name: Meta-Llama-3-1-405B-Instruct-vkn
+  endpoint: https://Meta-Llama-3-1-405B-Instruct-vkn.westus3.models.ai.azure.com/chat/completions
+  api_key: EXAMPLE_API_KEY
+
+# Model ID
+student_model: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/2
+
+# Output distilled model
+outputs:
+  registered_model:
+    type: mlflow_model
+    name: llama-conversation-distilled
+
+
+# Teacher model related properties (OPTIONAL)
+teacher_model_settings:
+  inference_parameters:
+    temperature: 0.1
+    max_tokens: 100
+    top_p: 0.95
+  endpoint_request_settings:
+    request_batch_size: 10
+    min_endpoint_success_ratio: 0.7
+
+# For finetuning (OPTIONAL)
+hyperparameters:
+  learning_rate_multiplier: "0.2"
+  n_epochs: "5"
+  batch_size: "2"
+
+# Resource for Data Generation Step (OPTIONAL)
+resources:
+  instance_type: Standard_D2_v2
diff --git a/cli/foundation-models/system/distillation/conversation/serverless_endpoint.yaml b/cli/foundation-models/system/distillation/conversation/serverless_endpoint.yaml
@@ -0,0 +1,2 @@
+name: llama-conversation-distilled
+model_id: azureml://locations/{AI_PROJECT_LOCATION}/workspaces/{WORKSPACE_ID}/models/llama-conversation-distilled/versions/{VERSION}
diff --git a/cli/foundation-models/system/distillation/conversation/train_conversation.jsonl b/cli/foundation-models/system/distillation/conversation/train_conversation.jsonl
diff --git a/cli/foundation-models/system/distillation/conversation/validation_conversation.jsonl b/cli/foundation-models/system/distillation/conversation/validation_conversation.jsonl
diff --git a/cli/foundation-models/system/distillation/math/README.md b/cli/foundation-models/system/distillation/math/README.md
@@ -0,0 +1,32 @@
+# Distillation with CLI (Math)
+
+## 1. Create the Job
+Ensure you have the proper setup.
+1. Run `az version` and ensure the `ml` extension is installed. `ml` version should be greater or equal to 2.32.0.
+2. If the `ml` extension is not installed, run `az extension add -n ml`
+
+Run the Distillation CLI command pointing to the .YAML file in this folder and fill out the Azure ML IDs needed:
+
+```text
+az ml job create --file distillation_math.yaml --workspace-name [YOUR_AZURE_WORKSPACE] --resource-group [YOUR_AZURE_RESOURCE_GROUP] --subscription [YOUR_AZURE_SUBSCRIPTION]
+```
+**Note:** To see how the train and validation files were created, see section 2 of this [notebook](/sdk/python/foundation-models/system/distillation/math/distillation_math.ipynb)
+
+## 2. Deploy to Endpoint
+Once the distilled model is ready, you can deploy the model through the UI or CLI.
+
+### UI Deployment
+1. Navigate to the `model` tab in [ml studio](https://ml.azure.com) or navigate to the `Finetuning` tab in the [ai platform](https://ai.azure.com)
+2. If using the ml studio, locate the model using the `name` of the `registered_model` in the yaml file used to create this job. Select deploy to deploy a serverless endpoint. If using the ai platform, search for the name of the job, which in this example is `Distillation-math-llama`. Click on that name, and select Deploy to deploy a serverless endpoint.
+
+### CLI Deployment
+Fill out the serverless_endpoint.yaml file in this folder. The necessary information can be found by 
+1. Navigating to the `model` tab in [ml studio](https://ml.azure.com).
+2. Using the `name` of the `registered_model` in the yaml file used to create this job, select the model with that `name`. In this example, the name to use is `llama-math-distilled`
+3. Use the `asset_id` to fill out the `model_id` in the yaml.
+
+With the information filled out, run the command
+
+```text
+az ml serverless-endpoint create -f serverless_endpoint.yaml
+```
diff --git a/cli/foundation-models/system/distillation/math/distillation_math.yaml b/cli/foundation-models/system/distillation/math/distillation_math.yaml
@@ -0,0 +1,59 @@
+type: distillation
+
+name: "Distillation-math-llama"
+description: "Distill student model using a teacher model"
+experiment_name: "Distillation-Math"
+
+# Data Generation Properties
+data_generation_type: label_generation
+data_generation_task_type: math
+
+# Input data
+training_data: 
+  type: uri_file
+  path: ./train_math.jsonl
+validation_data: 
+  type: uri_file
+  path: ./validation_math.jsonl
+
+# Teacher model serverless endpoint information
+# REPLACE WITH YOUR ENDPOINT INFORMATION
+teacher_model_endpoint_connection: 
+  type: serverless
+  name: Meta-Llama-3-1-405B-Instruct-vkn
+  endpoint: https://Meta-Llama-3-1-405B-Instruct-vkn.westus3.models.ai.azure.com/chat/completions
+  api_key: EXAMPLE_API_KEY
+
+# Model ID
+student_model: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/2
+
+# Output distilled model
+outputs:
+  registered_model:
+    type: mlflow_model
+    name: llama-math-distilled
+
+
+# Teacher model related properties (OPTIONAL)
+teacher_model_settings:
+  inference_parameters:
+    temperature: 0.1
+    max_tokens: 1024
+    top_p: 0.95
+  endpoint_request_settings:
+    request_batch_size: 10
+    min_endpoint_success_ratio: 0.7
+
+# System prompt settings (OPTIONAL)
+prompt_settings:
+  enable_chain_of_thought: true
+
+# For finetuning (OPTIONAL)
+hyperparameters:
+  learning_rate_multiplier: "0.2"
+  n_epochs: "5"
+  batch_size: "2"
+
+# Resource for Data Generation Step (OPTIONAL)
+resources:
+  instance_type: Standard_D2_v2
diff --git a/cli/foundation-models/system/distillation/math/serverless_endpoint.yaml b/cli/foundation-models/system/distillation/math/serverless_endpoint.yaml
@@ -0,0 +1,2 @@
+name: llama-math-distilled
+model_id: azureml://locations/{AI_PROJECT_LOCATION}/workspaces/{WORKSPACE_ID}/models/llama-math-distilled/versions/{VERSION}