diff --git a/sdk/python/distillation/benchmarking/pipelines/nli/snli.yaml b/sdk/python/distillation/benchmarking/pipelines/nli/snli.yaml
new file mode 100644
index 0000000000..ebc7773450
--- /dev/null
+++ b/sdk/python/distillation/benchmarking/pipelines/nli/snli.yaml
@@ -0,0 +1,242 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+display_name: snli_eval
+description: Evaluate distilled models on snli dataset
+inputs:
+  task: question-answering
+  sample_ratio: 0.01
+  ground_truth_column_name: completion
+  prediction_column_name: prediction
+  # batch_score inputs
+  endpoint_url: ""
+  deployment_name: ""
+  authentication_type: azureml_workspace_connection
+  connection_name: ""
+  debug_mode: False
+jobs:
+  downloader:
+    type: command
+    component: azureml://registries/azureml/components/dataset_downloader/labels/latest
+    limits: {}
+    inputs:
+      dataset_name: stanfordnlp/snli
+      split: validation
+    outputs:
+      output_dataset:
+        type: uri_folder
+  sampler:
+    type: command
+    component: azureml://registries/azureml/components/dataset_sampler/labels/latest
+    limits: {}
+    inputs:
+      dataset:
+        type: uri_folder
+        path: ${{parent.jobs.downloader.outputs.output_dataset}}
+      sampling_style: head
+      sampling_ratio: ${{parent.inputs.sample_ratio}}
+      random_seed: 0
+    outputs:
+      output_dataset:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  downloader_fewshot:
+    type: command
+    component: azureml://registries/azureml/components/dataset_downloader/labels/latest
+    limits: {}
+    inputs:
+      dataset_name: stanfordnlp/snli
+      split: test
+    outputs:
+      output_dataset:
+        type: uri_folder
+  sampler_fewshot:
+    type: command
+    component: azureml://registries/azureml/components/dataset_sampler/labels/latest
+    limits: {}
+    inputs:
+      dataset:
+        type: uri_folder
+        path: ${{parent.jobs.downloader_fewshot.outputs.output_dataset}}
+      sampling_style: head
+      n_samples: 8
+      random_seed: 0
+    outputs:
+      output_dataset:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  promptcrafter:
+    type: command
+    component: azureml://registries/azureml/components/prompt_crafter/labels/latest
+    limits: {}
+    inputs:
+      test_data:
+        type: uri_folder
+        path: ${{parent.jobs.sampler.outputs.output_dataset}}
+      few_shot_data:
+        type: uri_file
+        path: ${{parent.jobs.sampler_fewshot.outputs.output_dataset}}
+      prompt_type: completions
+      prompt_pattern: 'Premise: {{premise}}
+
+        Hypothesis: {{hypothesis}}
+
+        The label is: '
+      n_shots: 5
+      output_pattern: '{{label}}'
+      few_shot_separator: "\n\n"
+      prefix: Given a partial description of an event as premise and hypothesis, 
+        your task is to select the most appropriate label from the 3 options. 
+        The 3 options 0, 1, 2 are - entailment, contradiction, and neutral respectively.
+        Carefully analyze the context and use your understanding of the world to make 
+        the best choice.\n 
+      random_seed: 0
+    outputs:
+      output_file:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  batch_score_preprocessor:
+    type: command
+    component: azureml://registries/azureml/components/batch_inference_preparer/versions/0.0.14
+    limits: {}
+    inputs:
+      input_dataset:
+        type: uri_file
+        path: ${{parent.jobs.promptcrafter.outputs.output_file}}
+      model_type: oai
+      batch_input_pattern: '{"messages": [{"role": "user", "content": "###<prompt>"}], "temperature": 0.6, "top_p": 1.0, "max_new_tokens": 2048, "frequency_penalty": 0.0, "presence_penalty": 0.0}'
+      label_column_name: ${{parent.inputs.ground_truth_column_name}}
+      is_performance_test: false
+    outputs:
+      formatted_data:
+        type: mltable
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
+      ground_truth_metadata:
+        type: uri_folder
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
+  config_generator:
+    type: command
+    component: azureml://registries/azureml/components/batch_benchmark_config_generator/versions/0.0.9
+    inputs:
+      scoring_url: ${{parent.inputs.endpoint_url}}
+      deployment_name: ${{parent.inputs.deployment_name}}
+      authentication_type: ${{parent.inputs.authentication_type}}
+      connection_name: ${{parent.inputs.connection_name}}
+      additional_headers: ""
+      debug_mode: ${{parent.inputs.debug_mode}}
+      ensure_ascii: false
+      max_retry_time_interval: 300
+      initial_worker_count: 5
+      max_worker_count: 200
+      model_type: oss
+    outputs:
+      batch_score_config:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  # Batch score job
+  batch_score:
+    type: parallel
+    component: azureml://registries/azureml-preview-test1/components/batch_score_oss/versions/0.0.1.dev0
+    inputs:
+      async_mode: False
+      data_input_table: ${{parent.jobs.batch_score_preprocessor.outputs.formatted_data}}
+      configuration_file: ${{parent.jobs.config_generator.outputs.batch_score_config}}
+    outputs:
+      job_output_path:
+        type: uri_file
+      mini_batch_results_output_directory:
+        type: uri_folder
+    resources:
+      instance_count: 1
+    max_concurrency_per_instance:  8
+    retry_settings:
+      timeout: 6000
+      max_retries: 10
+    environment_variables:
+      BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180'
+      BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False'
+      BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300'
+  batch_score_postprocessor:
+    type: command
+    component: azureml://registries/azureml/components/batch_output_formatter/versions/0.0.14
+    limits: {}
+    inputs:
+      batch_inference_output:
+        type: uri_folder
+        path: ${{parent.jobs.batch_score.outputs.mini_batch_results_output_directory}}
+      ground_truth_input:
+        type: uri_file
+        path: ${{parent.jobs.batch_score_preprocessor.outputs.ground_truth_metadata}}
+      model_type: oai
+      label_column_name: ${{parent.inputs.ground_truth_column_name}}
+      endpoint_url: ${{parent.inputs.endpoint_url}}
+      handle_response_failure: use_fallback
+      min_endpoint_success_ratio: 0.0
+      is_performance_test: false
+      use_tiktoken: false
+    outputs:
+      predictions:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+      performance_metadata:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+      ground_truth:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+      successful_requests:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+      failed_requests:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+      unsafe_content_blocked_requests:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  postprocessor:
+    type: command
+    component: azureml://registries/azureml/components/inference_postprocessor/labels/latest
+    limits: {}
+    inputs:
+      ground_truth_dataset:
+        type: uri_folder
+        path: ${{parent.jobs.batch_score_postprocessor.outputs.ground_truth}}
+      prediction_dataset:
+        type: uri_folder
+        path: ${{parent.jobs.batch_score_postprocessor.outputs.predictions}}
+      ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
+      prediction_column_name: ${{parent.inputs.prediction_column_name}}
+      separator: '
+
+
+        '
+      find_first: 0,1,2
+    outputs:
+      output_dataset_result:
+        type: uri_file
+        path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
+  compute_metrics:
+    type: command
+    component: azureml://registries/azureml/components/compute_metrics/labels/latest
+    limits: {}
+    inputs:
+      ground_truth:
+        type: uri_folder
+        path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
+      prediction:
+        type: uri_folder
+        path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
+      task: ${{parent.inputs.task}}
+      ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
+      prediction_column_name: ${{parent.inputs.prediction_column_name}}
+      evaluation_config_params: '{"regexes_to_ignore": ["\\W"]}'
+    outputs:
+      evaluation_result:
+        type: uri_folder
+tags:
+  workflow: distill_llm_benchmark
+  evaluation_type: text-generation
+properties:
+  _azureml.evaluation_run: Benchmark
+settings:
+  force_rerun: false
+  default_compute: azureml:serverless
\ No newline at end of file