diff --git a/.Triage.py b/.Triage.py
new file mode 100644
index 000000000..8b5974aab
--- /dev/null
+++ b/.Triage.py
@@ -0,0 +1,502 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+import unittest
+
+import requests
+
+unittest.TestLoader.sortTestMethodsUsing = None
+
+
+# unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b)
+class RunCmd:
+
+ def run(self, cmd):
+ import subprocess
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
+ (output, err) = p.communicate()
+ p_status = p.wait()
+ # print("Command exit status/return code : ", p_status)
+ return p_status, output
+
+
+class ProfileUtility:
+
+ def __init__(self):
+ # self.servicename = service_name
+ self.docker_name = ""
+ self.start_profile_func = None
+ self.stop_profile_func = None
+ return
+
+ def set_profile_funcs(self, docker_name):
+ if docker_name == "vllm-service":
+ self.docker_name = docker_name
+ self.start_profile_func = self.vllm_start_profile
+ self.stop_profile_func = self.vllm_stop_profile
+ self.get_profile_result_func = self.vllm_get_profile_result
+ else:
+ self.start_profile_func = None
+ self.stop_profile_func = None
+ self.docker_name = ""
+
+ def vllm_start_profile(self, ip, port, data):
+ print("vllm_start_profile")
+ # Send out test request
+ endpoint = ip + ":" + port + "/" + "start_profile"
+ data.pop("messages")
+ response = requests.post(
+ url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None}
+ )
+ return
+
+ def vllm_stop_profile(self, ip, port, data):
+ print("vllm_stop_profile")
+ endpoint = ip + ":" + port + "/" + "stop_profile"
+ data.pop("messages")
+ response = requests.post(
+ url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None}
+ )
+ return
+
+ def vllm_get_profile_result(self, result_folder):
+ print("vllm_get_profile_result, result folder : " + result_folder)
+ result_folder_path = "./" + result_folder
+ cmd = "docker cp " + self.docker_name + ":/mnt/ " + result_folder_path
+ status, output = RunCmd().run(cmd)
+
+ pattern = r".*\.pt.trace.json.gz$" # Match all files ending with ".txt"
+ files_list = []
+ import os
+ import re
+
+ for filename in os.listdir(result_folder_path + os.sep + "mnt/"):
+ if re.search(pattern, filename):
+ files_list.append(filename)
+ return files_list
+
+
+class TriageUtility:
+
+ def __init__(self, filename):
+ self.class_name = filename.split(".")[0]
+ self.filename = filename # self.__class__.__name__
+ self.prof_utils = ProfileUtility()
+ return
+
+ def load_input_data(self, service):
+ class_name = self.class_name
+ with open(self.filename, "r") as file:
+ try:
+ data = json.load(file)
+ except:
+ print("json load failed: " + self.filename)
+ pass
+ for i in data[class_name]:
+ if i["service"] == service:
+ i.pop("service")
+ port = i.pop("port")
+ endpoint = i.pop("endpoint")
+ output = i.pop("output")
+ return i, port, endpoint, output
+ return None
+
+ def service_health_check(self, ip, port, triage_report, triage_level):
+ # Health Check
+ if triage_level < 1:
+ return
+ endpoint = ip + ":" + port + "/v1/health_check"
+ response = requests.get(url=endpoint, headers={"Content-Type": "application/json"}, proxies={"http": None})
+ triage_report.update_docker_report(port, "Health", response.status_code == 200)
+ return response.status_code
+
+ def service_test(self, ip, port, endpoint_name, data, triage_report, triage_level):
+ if triage_level < 2:
+ return
+
+ # Start Profiling
+ docker_name = triage_report.get_docker_name(port)
+ self.prof_utils.set_profile_funcs(docker_name)
+ if triage_level > 2 and self.prof_utils.start_profile_func is not None:
+ print("start profiling")
+ tmp_data = data.copy()
+ self.prof_utils.start_profile_func(ip, port, tmp_data)
+
+ # Send out test request
+ endpoint = ip + ":" + port + "/" + endpoint_name
+ response = requests.post(
+ url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None}
+ )
+
+ # End Profiling
+ if triage_level > 2 and self.prof_utils.stop_profile_func is not None:
+ print("end profiling")
+ tmp_data = data.copy()
+ self.prof_utils.stop_profile_func(ip, port, tmp_data)
+ # Save Profile results
+ profile_files_list = self.prof_utils.get_profile_result_func(triage_report.result_folder_name)
+ if profile_files_list != []:
+ triage_report.update_docker_report(port, "Profile", profile_files_list[0])
+
+ triage_report.update_docker_report(port, "Test", response.status_code == 200)
+ log_fname = triage_report.dump_docker_logs(port)
+ triage_report.update_docker_report(port, "Log", log_fname)
+ return response.status_code
+
+ def service_statistics(self, ip, port, triage_report, triage_level):
+ # statistics
+ if triage_level < 1:
+ return
+ endpoint = ip + ":" + port + "/v1/statistics"
+ response = requests.get(url=endpoint, headers={"Content-Type": "application/json"}, proxies={"http": None})
+ triage_report.update_docker_report(port, "Stats", response.status_code == 200)
+ return response.status_code
+
+
+class TriageReport:
+ def __init__(self, name):
+ self.name = name
+ self.env_vars_df = None
+ self.system_info_df = None
+ self.docker_ps = ""
+ self.docker_ps_df = None
+ self.docker_report_df = None
+ import datetime
+
+ d = datetime.datetime.now()
+ dateinfo = d.strftime("%m-%d_%H-%M")
+ self.result_folder_name = self.name + "_" + dateinfo
+ import os
+
+ if not os.path.exists(self.result_folder_name):
+ os.makedirs(self.result_folder_name)
+
+ def parse_docker_ps_to_df(self, output):
+ lines = []
+ columns = []
+ rows = []
+ for line in output.splitlines():
+ line = str(line)[2:-1]
+ row = line.split()
+ if columns != []:
+ row.pop(1)
+ if len(row) > 3:
+ for i in range(len(row)):
+ if i >= 1 and i < (len(row) - 1):
+ pr = row.pop(2)
+ row[1] = row[1].split("->")[0]
+ row[1] = row[1].split(":")[-1]
+ rows.append(row)
+ else:
+ columns = row
+
+ import pandas as pd
+
+ df = pd.DataFrame(rows, columns=columns)
+ self.docker_ps_df = df
+ return
+
+ def init_docker_report(self):
+
+ df = self.docker_ps_df.copy()
+ new_col = [""] * df.shape[0]
+ df["Health"] = new_col
+ df["Test"] = new_col
+ df["Stats"] = new_col
+ df["Log"] = new_col
+ df["Profile"] = new_col
+ self.docker_report_df = df
+
+ def update_docker_report(self, port, key, value):
+
+ df = self.docker_report_df
+ index_list = df.index[df["PORTS"] == port].tolist()
+ df.at[index_list[0], key] = value
+ return
+
+ def get_docker_name(self, port):
+
+ df = self.docker_ps_df
+ docker_name = df.loc[df["PORTS"] == port, "NAMES"].values[0]
+ return docker_name
+
+ def dump_docker_logs(self, port):
+
+ df = self.docker_ps_df
+ docker_name = df.loc[df["PORTS"] == port, "NAMES"].values[0]
+
+ cmd = "docker logs " + docker_name
+ status, output = RunCmd().run(cmd)
+ output = output.decode("utf-8")
+ filename = docker_name + "_docker_log.txt"
+ self.dump_log_to_file(output, filename)
+ return filename
+
+ def dump_log_to_file(self, output, filename):
+ filepath = self.result_folder_name + os.sep + filename
+ fd = open(filepath, "w") # append mode
+ fd.write(output)
+ fd.close()
+ return
+
+ def generate_triage_report(self):
+ import os
+ import re
+
+ print(" Example Name:" + self.name)
+ print(" ### ENV Variables###")
+ print(self.env_vars_df)
+ print(" ### System Info###")
+ print(self.system_info_df)
+ self.docker_ps_df = None
+ print(" ### Services Status###")
+ print(self.docker_report_df)
+
+ report_name = self.name + ".html"
+
+ report_path = self.result_folder_name + os.sep + report_name
+
+ # Log Files
+
+ docker_log_html_content = ""
+ pattern = r".*\_docker_log.txt$" # Match all files ending with ".txt"
+ for filename in os.listdir(self.result_folder_name):
+ if re.search(pattern, filename):
+ html_content = (
+ " \n\n
"
+ + filename
+ + "
\n"
+ + ""
+ )
+ docker_log_html_content = docker_log_html_content + html_content
+
+ with open(report_path, "w") as hfile:
+ hfile.write(
+ "\n\n 1. Services Status
\n\n"
+ + self.docker_report_df.to_html()
+ + "\n\n 2. System Info
\n\n"
+ + self.system_info_df.head().to_html()
+ + "\n\n 3. Environment Variables
\n\n"
+ + self.env_vars_df.head().to_html()
+ + "\n\n 4. Docker Log Files
\n\n"
+ + docker_log_html_content
+ )
+
+ print("\nReport File is : " + report_path)
+ import shutil
+
+ shutil.make_archive(self.result_folder_name, "zip", self.result_folder_name)
+ return
+
+
+class ChatQnA(unittest.TestCase):
+ def setUp(self):
+ self.triage_level = triage_level
+ self.triage_report = triage_report
+ self.ip = "http://0.0.0.0"
+ self.datafile = DataJsonFileName
+ self.classname = DataJsonFileName.split(".")[0]
+ self.utils = TriageUtility(self.datafile)
+ return
+
+ def tearDown(self):
+ return
+
+ def test_1_env_vars(self):
+ EmptyVar = False
+
+ service_name = "env"
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ rows = []
+ from os import environ
+
+ for key, val in data.items():
+ var = environ.get(key)
+ row = [key, var, val]
+ rows.append(row)
+ if val is True:
+ if var is None:
+ EmptyVar = True
+ import pandas as pd
+
+ columns = ["env", "value", "required"]
+ df = pd.DataFrame(rows, columns=columns)
+ self.triage_report.env_vars_df = df
+ self.assertEqual(EmptyVar, False)
+
+ def test_2_system(self):
+
+ import socket
+
+ hostname = socket.gethostname()
+ IPAddr = socket.gethostbyname(hostname)
+
+ import platform
+
+ system_info = platform.uname()
+
+ import pandas as pd
+
+ rows = []
+ columns = ["info", "value"]
+ rows.append(["hostname", hostname])
+ rows.append(["ip", IPAddr])
+ rows.append(["system", system_info.system])
+ rows.append(["node", system_info.node])
+ rows.append(["release", system_info.release])
+ rows.append(["version", system_info.version])
+ rows.append(["machine", system_info.machine])
+ rows.append(["processor", system_info.processor])
+ df = pd.DataFrame(rows, columns=columns)
+ self.triage_report.system_info_df = df
+
+ status, output = RunCmd().run("docker ps --format '" "table {{.Image}}\t{{.Ports}}\t{{.Names}}" "'")
+ self.triage_report.parse_docker_ps_to_df(output)
+ self.triage_report.docker_ps = output
+
+ self.triage_report.init_docker_report()
+
+ self.assertEqual(False, False)
+
+ def test_3_embed(self):
+
+ service_name = "embed"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+ def test_4_dataprep(self):
+
+ service_name = "dataprep"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Health Check
+ response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level)
+ self.assertEqual(response_status_code, 200)
+
+ # Testing
+ # response_status_code = utils.service_test(self.ip, port, endpoint_name, data, self.triage_report, self.triage_level)
+ # self.assertEqual(response_status_code, 200)
+
+ # Statistic
+ response_status_code = self.utils.service_statistics(self.ip, port, self.triage_report, self.triage_level)
+ self.assertEqual(response_status_code, 200)
+
+ def test_5_retrival(self):
+ service_name = "retrieval"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ import random
+
+ embedding = [random.uniform(-1, 1) for _ in range(768)]
+ data = {"text": '"text":"test","embedding":${embedding}'}
+ self.assertNotEqual(data, None)
+
+ # Health Check
+ response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level)
+ self.assertEqual(response_status_code, 200)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+ # Statistic
+ response_status_code = self.utils.service_statistics(self.ip, port, self.triage_report, self.triage_level)
+ self.assertEqual(response_status_code, 200)
+
+ def test_6_rerank(self):
+
+ service_name = "rerank"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+ def test_7_nginx(self):
+
+ service_name = "nginx"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+ def test_8_llm(self):
+
+ service_name = "llm"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+ def test_9_mega(self):
+
+ service_name = "mega"
+ # Get configs/data
+ data, port, endpoint_name, output = self.utils.load_input_data(service_name)
+ self.assertNotEqual(data, None)
+
+ # Health Check
+ response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level)
+ self.assertEqual(response_status_code, 200)
+
+ # Testing
+ response_status_code = self.utils.service_test(
+ self.ip, port, endpoint_name, data, self.triage_report, self.triage_level
+ )
+ self.assertEqual(response_status_code, 200)
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) < 2:
+ raise IndexError("Please provide data json file.")
+ triage_level = 3 # low, medium, high
+ DataJsonFileName = sys.argv[1] # "ChatQnA_Xeon.json"
+ triage_report = TriageReport(DataJsonFileName)
+ test_loader = unittest.TestLoader()
+ suite = test_loader.loadTestsFromTestCase(ChatQnA)
+ unittest.TextTestRunner(verbosity=3).run(suite)
+ triage_report.generate_triage_report()
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
index 07fde1cf1..070becd78 100755
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,7 +6,6 @@
pushd "../../../../../" > /dev/null
source .set_env.sh
popd > /dev/null
-
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
diff --git a/ChatQnA/tests/ChatQnA_AIPC.json b/ChatQnA/tests/ChatQnA_AIPC.json
new file mode 100644
index 000000000..47894a456
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_AIPC.json
@@ -0,0 +1,71 @@
+{
+ "ChatQnA_AIPC": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "EMBEDDING_MODEL_ID": true,
+ "RERANK_MODEL_ID": true,
+ "OLLAMA_MODEL": true,
+ "OLLAMA_HOST": true,
+ "INDEX_NAME": true,
+ "HUGGINGFACEHUB_API_TOKEN": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "6006",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "11434",
+ "endpoint": "api/generate",
+ "model": "llama3.2",
+ "prompt": "What is Deep Learning?",
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}
diff --git a/ChatQnA/tests/ChatQnA_Gaudi.json b/ChatQnA/tests/ChatQnA_Gaudi.json
new file mode 100644
index 000000000..4afe93879
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_Gaudi.json
@@ -0,0 +1,70 @@
+{
+ "ChatQnA_Xeon": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "EMBEDDING_MODEL_ID": true,
+ "RERANK_MODEL_ID": true,
+ "LLM_MODEL_ID": true,
+ "INDEX_NAME": true,
+ "HUGGINGFACEHUB_API_TOKEN": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "8090",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "8005",
+ "endpoint": "v1/chat/completions",
+ "model": "Intel/neural-chat-7b-v3-3",
+ "messages": [{ "role": "user", "content": "What is Deep Learning?" }],
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}
diff --git a/ChatQnA/tests/ChatQnA_Gaudi_vllm.json b/ChatQnA/tests/ChatQnA_Gaudi_vllm.json
new file mode 100644
index 000000000..f8cdefa74
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_Gaudi_vllm.json
@@ -0,0 +1,70 @@
+{
+ "ChatQnA_Xeon": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "EMBEDDING_MODEL_ID": true,
+ "RERANK_MODEL_ID": true,
+ "LLM_MODEL_ID": true,
+ "INDEX_NAME": true,
+ "HUGGINGFACEHUB_API_TOKEN": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "8090",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "8007",
+ "endpoint": "v1/chat/completions",
+ "model": "Intel/neural-chat-7b-v3-3",
+ "messages": [{ "role": "user", "content": "What is Deep Learning?" }],
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}
diff --git a/ChatQnA/tests/ChatQnA_NVGPU.json b/ChatQnA/tests/ChatQnA_NVGPU.json
new file mode 100644
index 000000000..7aefa97ed
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_NVGPU.json
@@ -0,0 +1,82 @@
+{
+ "ChatQnA_NVGPU": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "EMBEDDING_MODEL_ID": true,
+ "RERANK_MODEL_ID": true,
+ "LLM_MODEL_ID": true,
+ "INDEX_NAME": true,
+ "HUGGINGFACEHUB_API_TOKEN": true,
+ "TEI_EMBEDDING_ENDPOINT": true,
+ "MEGA_SERVICE_HOST_IP": true,
+ "RETRIEVER_SERVICE_HOST_IP": true,
+ "BACKEND_SERVICE_ENDPOINT": true,
+ "DATAPREP_SERVICE_ENDPOINT": true,
+ "DATAPREP_GET_FILE_ENDPOINT": true,
+ "DATAPREP_DELETE_FILE_ENDPOINT": true,
+ "FRONTEND_SERVICE_IP": true,
+ "FRONTEND_SERVICE_PORT": true,
+ "BACKEND_SERVICE_NAME": true,
+ "BACKEND_SERVICE_IP": true,
+ "BACKEND_SERVICE_PORT": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "8090",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "8008",
+ "endpoint": "v1/chat/completions",
+ "model": "Intel/neural-chat-7b-v3-3",
+ "messages": [{ "role": "user", "content": "What is Deep Learning?" }],
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}
diff --git a/ChatQnA/tests/ChatQnA_ROCm.json b/ChatQnA/tests/ChatQnA_ROCm.json
new file mode 100644
index 000000000..893e6bef6
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_ROCm.json
@@ -0,0 +1,95 @@
+{
+ "ChatQnA_ROCm": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "CHATQNA_HUGGINGFACEHUB_API_TOKEN": true,
+ "CHATQNA_TGI_SERVICE_IMAGE": true,
+ "CHATQNA_EMBEDDING_MODEL_ID": true,
+ "CHATQNA_RERANK_MODEL_ID": true,
+ "CHATQNA_LLM_MODEL_ID": true,
+ "CHATQNA_TGI_SERVICE_PORT": true,
+ "CHATQNA_TEI_EMBEDDING_PORT": true,
+ "CHATQNA_TEI_EMBEDDING_ENDPOINT": true,
+ "CHATQNA_TEI_RERANKING_PORT": true,
+ "CHATQNA_REDIS_VECTOR_PORT": true,
+ "CHATQNA_REDIS_VECTOR_INSIGHT_PORT": true,
+ "CHATQNA_REDIS_DATAPREP_PORT": true,
+ "CHATQNA_REDIS_RETRIEVER_PORT": true,
+ "CHATQNA_INDEX_NAME": true,
+ "CHATQNA_MEGA_SERVICE_HOST_IP": true,
+ "CHATQNA_RETRIEVER_SERVICE_HOST_IP": true,
+ "CHATQNA_BACKEND_SERVICE_ENDPOINT": true,
+ "CHATQNA_DATAPREP_SERVICE_ENDPOINT": true,
+ "CHATQNA_DATAPREP_GET_FILE_ENDPOINT": true,
+ "CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT": true,
+ "CHATQNA_FRONTEND_SERVICE_IP": true,
+ "CHATQNA_FRONTEND_SERVICE_PORT": true,
+ "CHATQNA_BACKEND_SERVICE_NAME": true,
+ "CHATQNA_BACKEND_SERVICE_IP": true,
+ "CHATQNA_BACKEND_SERVICE_PORT": true,
+ "CHATQNA_REDIS_URL": true,
+ "CHATQNA_EMBEDDING_SERVICE_HOST_IP": true,
+ "CHATQNA_RERANK_SERVICE_HOST_IP": true,
+ "CHATQNA_LLM_SERVICE_HOST_IP": true,
+ "CHATQNA_NGINX_PORT": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "8090",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "8008",
+ "endpoint": "generate",
+ "inputs": "What is Deep Learning?",
+ "parameters": { "max_new_tokens": 64, "do_sample": true },
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}
diff --git a/ChatQnA/tests/ChatQnA_Xeon.json b/ChatQnA/tests/ChatQnA_Xeon.json
new file mode 100644
index 000000000..476967abf
--- /dev/null
+++ b/ChatQnA/tests/ChatQnA_Xeon.json
@@ -0,0 +1,70 @@
+{
+ "ChatQnA_Xeon": [
+ {
+ "service": "env",
+ "port": "",
+ "endpoint": "",
+ "EMBEDDING_MODEL_ID": true,
+ "RERANK_MODEL_ID": true,
+ "LLM_MODEL_ID": true,
+ "INDEX_NAME": true,
+ "HUGGINGFACEHUB_API_TOKEN": true,
+ "http_proxy": false,
+ "https_proxy": false,
+ "no_proxy": false,
+ "output": false
+ },
+ {
+ "service": "embed",
+ "port": "6006",
+ "endpoint": "embed",
+ "inputs": "What is Deep Learning?",
+ "output": "[["
+ },
+ {
+ "service": "dataprep",
+ "port": "6007",
+ "endpoint": "/v1/dataprep",
+ "file_path": "",
+ "output": "Data preparation succeeded"
+ },
+ {
+ "service": "retrieval",
+ "port": "7000",
+ "endpoint": "/v1/retrieval",
+ "text": "test",
+ "embedding": "",
+ "output": "retrieved_docs"
+ },
+ {
+ "service": "rerank",
+ "port": "8808",
+ "endpoint": "rerank",
+ "query": "What is Deep Learning?",
+ "texts": ["Deep Learning is not...", "Deep learning is..."],
+ "output": "index"
+ },
+ {
+ "service": "llm",
+ "port": "9009",
+ "endpoint": "v1/chat/completions",
+ "model": "Intel/neural-chat-7b-v3-3",
+ "messages": [{"role": "user", "content": "What is Deep Learning?"}],
+ "output": "generated_text"
+ },
+ {
+ "service": "nginx",
+ "port": "80",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ },
+ {
+ "service": "mega",
+ "port": "8888",
+ "endpoint": "v1/chatqna",
+ "messages": "What is the revenue of Nike in 2023?",
+ "output": "data: "
+ }
+ ]
+}