diff --git a/.Triage.py b/.Triage.py new file mode 100644 index 000000000..8b5974aab --- /dev/null +++ b/.Triage.py @@ -0,0 +1,502 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import unittest + +import requests + +unittest.TestLoader.sortTestMethodsUsing = None + + +# unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b) +class RunCmd: + + def run(self, cmd): + import subprocess + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) + (output, err) = p.communicate() + p_status = p.wait() + # print("Command exit status/return code : ", p_status) + return p_status, output + + +class ProfileUtility: + + def __init__(self): + # self.servicename = service_name + self.docker_name = "" + self.start_profile_func = None + self.stop_profile_func = None + return + + def set_profile_funcs(self, docker_name): + if docker_name == "vllm-service": + self.docker_name = docker_name + self.start_profile_func = self.vllm_start_profile + self.stop_profile_func = self.vllm_stop_profile + self.get_profile_result_func = self.vllm_get_profile_result + else: + self.start_profile_func = None + self.stop_profile_func = None + self.docker_name = "" + + def vllm_start_profile(self, ip, port, data): + print("vllm_start_profile") + # Send out test request + endpoint = ip + ":" + port + "/" + "start_profile" + data.pop("messages") + response = requests.post( + url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None} + ) + return + + def vllm_stop_profile(self, ip, port, data): + print("vllm_stop_profile") + endpoint = ip + ":" + port + "/" + "stop_profile" + data.pop("messages") + response = requests.post( + url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None} + ) + return + + def vllm_get_profile_result(self, result_folder): + print("vllm_get_profile_result, result folder : " + result_folder) + result_folder_path = "./" + result_folder + cmd = "docker cp " + self.docker_name + ":/mnt/ " + result_folder_path + status, output = RunCmd().run(cmd) + + pattern = r".*\.pt.trace.json.gz$" # Match all files ending with ".txt" + files_list = [] + import os + import re + + for filename in os.listdir(result_folder_path + os.sep + "mnt/"): + if re.search(pattern, filename): + files_list.append(filename) + return files_list + + +class TriageUtility: + + def __init__(self, filename): + self.class_name = filename.split(".")[0] + self.filename = filename # self.__class__.__name__ + self.prof_utils = ProfileUtility() + return + + def load_input_data(self, service): + class_name = self.class_name + with open(self.filename, "r") as file: + try: + data = json.load(file) + except: + print("json load failed: " + self.filename) + pass + for i in data[class_name]: + if i["service"] == service: + i.pop("service") + port = i.pop("port") + endpoint = i.pop("endpoint") + output = i.pop("output") + return i, port, endpoint, output + return None + + def service_health_check(self, ip, port, triage_report, triage_level): + # Health Check + if triage_level < 1: + return + endpoint = ip + ":" + port + "/v1/health_check" + response = requests.get(url=endpoint, headers={"Content-Type": "application/json"}, proxies={"http": None}) + triage_report.update_docker_report(port, "Health", response.status_code == 200) + return response.status_code + + def service_test(self, ip, port, endpoint_name, data, triage_report, triage_level): + if triage_level < 2: + return + + # Start Profiling + docker_name = triage_report.get_docker_name(port) + self.prof_utils.set_profile_funcs(docker_name) + if triage_level > 2 and self.prof_utils.start_profile_func is not None: + print("start profiling") + tmp_data = data.copy() + self.prof_utils.start_profile_func(ip, port, tmp_data) + + # Send out test request + endpoint = ip + ":" + port + "/" + endpoint_name + response = requests.post( + url=endpoint, json=data, headers={"Content-Type": "application/json"}, proxies={"http": None} + ) + + # End Profiling + if triage_level > 2 and self.prof_utils.stop_profile_func is not None: + print("end profiling") + tmp_data = data.copy() + self.prof_utils.stop_profile_func(ip, port, tmp_data) + # Save Profile results + profile_files_list = self.prof_utils.get_profile_result_func(triage_report.result_folder_name) + if profile_files_list != []: + triage_report.update_docker_report(port, "Profile", profile_files_list[0]) + + triage_report.update_docker_report(port, "Test", response.status_code == 200) + log_fname = triage_report.dump_docker_logs(port) + triage_report.update_docker_report(port, "Log", log_fname) + return response.status_code + + def service_statistics(self, ip, port, triage_report, triage_level): + # statistics + if triage_level < 1: + return + endpoint = ip + ":" + port + "/v1/statistics" + response = requests.get(url=endpoint, headers={"Content-Type": "application/json"}, proxies={"http": None}) + triage_report.update_docker_report(port, "Stats", response.status_code == 200) + return response.status_code + + +class TriageReport: + def __init__(self, name): + self.name = name + self.env_vars_df = None + self.system_info_df = None + self.docker_ps = "" + self.docker_ps_df = None + self.docker_report_df = None + import datetime + + d = datetime.datetime.now() + dateinfo = d.strftime("%m-%d_%H-%M") + self.result_folder_name = self.name + "_" + dateinfo + import os + + if not os.path.exists(self.result_folder_name): + os.makedirs(self.result_folder_name) + + def parse_docker_ps_to_df(self, output): + lines = [] + columns = [] + rows = [] + for line in output.splitlines(): + line = str(line)[2:-1] + row = line.split() + if columns != []: + row.pop(1) + if len(row) > 3: + for i in range(len(row)): + if i >= 1 and i < (len(row) - 1): + pr = row.pop(2) + row[1] = row[1].split("->")[0] + row[1] = row[1].split(":")[-1] + rows.append(row) + else: + columns = row + + import pandas as pd + + df = pd.DataFrame(rows, columns=columns) + self.docker_ps_df = df + return + + def init_docker_report(self): + + df = self.docker_ps_df.copy() + new_col = [""] * df.shape[0] + df["Health"] = new_col + df["Test"] = new_col + df["Stats"] = new_col + df["Log"] = new_col + df["Profile"] = new_col + self.docker_report_df = df + + def update_docker_report(self, port, key, value): + + df = self.docker_report_df + index_list = df.index[df["PORTS"] == port].tolist() + df.at[index_list[0], key] = value + return + + def get_docker_name(self, port): + + df = self.docker_ps_df + docker_name = df.loc[df["PORTS"] == port, "NAMES"].values[0] + return docker_name + + def dump_docker_logs(self, port): + + df = self.docker_ps_df + docker_name = df.loc[df["PORTS"] == port, "NAMES"].values[0] + + cmd = "docker logs " + docker_name + status, output = RunCmd().run(cmd) + output = output.decode("utf-8") + filename = docker_name + "_docker_log.txt" + self.dump_log_to_file(output, filename) + return filename + + def dump_log_to_file(self, output, filename): + filepath = self.result_folder_name + os.sep + filename + fd = open(filepath, "w") # append mode + fd.write(output) + fd.close() + return + + def generate_triage_report(self): + import os + import re + + print(" Example Name:" + self.name) + print(" ### ENV Variables###") + print(self.env_vars_df) + print(" ### System Info###") + print(self.system_info_df) + self.docker_ps_df = None + print(" ### Services Status###") + print(self.docker_report_df) + + report_name = self.name + ".html" + + report_path = self.result_folder_name + os.sep + report_name + + # Log Files + + docker_log_html_content = "" + pattern = r".*\_docker_log.txt$" # Match all files ending with ".txt" + for filename in os.listdir(self.result_folder_name): + if re.search(pattern, filename): + html_content = ( + " \n\n

" + + filename + + "

\n" + + "" + ) + docker_log_html_content = docker_log_html_content + html_content + + with open(report_path, "w") as hfile: + hfile.write( + "\n\n

1. Services Status

\n\n" + + self.docker_report_df.to_html() + + "\n\n

2. System Info

\n\n" + + self.system_info_df.head().to_html() + + "\n\n

3. Environment Variables

\n\n" + + self.env_vars_df.head().to_html() + + "\n\n

4. Docker Log Files

\n\n" + + docker_log_html_content + ) + + print("\nReport File is : " + report_path) + import shutil + + shutil.make_archive(self.result_folder_name, "zip", self.result_folder_name) + return + + +class ChatQnA(unittest.TestCase): + def setUp(self): + self.triage_level = triage_level + self.triage_report = triage_report + self.ip = "http://0.0.0.0" + self.datafile = DataJsonFileName + self.classname = DataJsonFileName.split(".")[0] + self.utils = TriageUtility(self.datafile) + return + + def tearDown(self): + return + + def test_1_env_vars(self): + EmptyVar = False + + service_name = "env" + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + rows = [] + from os import environ + + for key, val in data.items(): + var = environ.get(key) + row = [key, var, val] + rows.append(row) + if val is True: + if var is None: + EmptyVar = True + import pandas as pd + + columns = ["env", "value", "required"] + df = pd.DataFrame(rows, columns=columns) + self.triage_report.env_vars_df = df + self.assertEqual(EmptyVar, False) + + def test_2_system(self): + + import socket + + hostname = socket.gethostname() + IPAddr = socket.gethostbyname(hostname) + + import platform + + system_info = platform.uname() + + import pandas as pd + + rows = [] + columns = ["info", "value"] + rows.append(["hostname", hostname]) + rows.append(["ip", IPAddr]) + rows.append(["system", system_info.system]) + rows.append(["node", system_info.node]) + rows.append(["release", system_info.release]) + rows.append(["version", system_info.version]) + rows.append(["machine", system_info.machine]) + rows.append(["processor", system_info.processor]) + df = pd.DataFrame(rows, columns=columns) + self.triage_report.system_info_df = df + + status, output = RunCmd().run("docker ps --format '" "table {{.Image}}\t{{.Ports}}\t{{.Names}}" "'") + self.triage_report.parse_docker_ps_to_df(output) + self.triage_report.docker_ps = output + + self.triage_report.init_docker_report() + + self.assertEqual(False, False) + + def test_3_embed(self): + + service_name = "embed" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + def test_4_dataprep(self): + + service_name = "dataprep" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Health Check + response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level) + self.assertEqual(response_status_code, 200) + + # Testing + # response_status_code = utils.service_test(self.ip, port, endpoint_name, data, self.triage_report, self.triage_level) + # self.assertEqual(response_status_code, 200) + + # Statistic + response_status_code = self.utils.service_statistics(self.ip, port, self.triage_report, self.triage_level) + self.assertEqual(response_status_code, 200) + + def test_5_retrival(self): + service_name = "retrieval" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + import random + + embedding = [random.uniform(-1, 1) for _ in range(768)] + data = {"text": '"text":"test","embedding":${embedding}'} + self.assertNotEqual(data, None) + + # Health Check + response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level) + self.assertEqual(response_status_code, 200) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + # Statistic + response_status_code = self.utils.service_statistics(self.ip, port, self.triage_report, self.triage_level) + self.assertEqual(response_status_code, 200) + + def test_6_rerank(self): + + service_name = "rerank" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + def test_7_nginx(self): + + service_name = "nginx" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + def test_8_llm(self): + + service_name = "llm" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + def test_9_mega(self): + + service_name = "mega" + # Get configs/data + data, port, endpoint_name, output = self.utils.load_input_data(service_name) + self.assertNotEqual(data, None) + + # Health Check + response_status_code = self.utils.service_health_check(self.ip, port, self.triage_report, self.triage_level) + self.assertEqual(response_status_code, 200) + + # Testing + response_status_code = self.utils.service_test( + self.ip, port, endpoint_name, data, self.triage_report, self.triage_level + ) + self.assertEqual(response_status_code, 200) + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + raise IndexError("Please provide data json file.") + triage_level = 3 # low, medium, high + DataJsonFileName = sys.argv[1] # "ChatQnA_Xeon.json" + triage_report = TriageReport(DataJsonFileName) + test_loader = unittest.TestLoader() + suite = test_loader.loadTestsFromTestCase(ChatQnA) + unittest.TextTestRunner(verbosity=3).run(suite) + triage_report.generate_triage_report() diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh index 07fde1cf1..070becd78 100755 --- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -6,7 +6,6 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" diff --git a/ChatQnA/tests/ChatQnA_AIPC.json b/ChatQnA/tests/ChatQnA_AIPC.json new file mode 100644 index 000000000..47894a456 --- /dev/null +++ b/ChatQnA/tests/ChatQnA_AIPC.json @@ -0,0 +1,71 @@ +{ + "ChatQnA_AIPC": [ + { + "service": "env", + "port": "", + "endpoint": "", + "EMBEDDING_MODEL_ID": true, + "RERANK_MODEL_ID": true, + "OLLAMA_MODEL": true, + "OLLAMA_HOST": true, + "INDEX_NAME": true, + "HUGGINGFACEHUB_API_TOKEN": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "6006", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "11434", + "endpoint": "api/generate", + "model": "llama3.2", + "prompt": "What is Deep Learning?", + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +} diff --git a/ChatQnA/tests/ChatQnA_Gaudi.json b/ChatQnA/tests/ChatQnA_Gaudi.json new file mode 100644 index 000000000..4afe93879 --- /dev/null +++ b/ChatQnA/tests/ChatQnA_Gaudi.json @@ -0,0 +1,70 @@ +{ + "ChatQnA_Xeon": [ + { + "service": "env", + "port": "", + "endpoint": "", + "EMBEDDING_MODEL_ID": true, + "RERANK_MODEL_ID": true, + "LLM_MODEL_ID": true, + "INDEX_NAME": true, + "HUGGINGFACEHUB_API_TOKEN": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "8090", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "8005", + "endpoint": "v1/chat/completions", + "model": "Intel/neural-chat-7b-v3-3", + "messages": [{ "role": "user", "content": "What is Deep Learning?" }], + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +} diff --git a/ChatQnA/tests/ChatQnA_Gaudi_vllm.json b/ChatQnA/tests/ChatQnA_Gaudi_vllm.json new file mode 100644 index 000000000..f8cdefa74 --- /dev/null +++ b/ChatQnA/tests/ChatQnA_Gaudi_vllm.json @@ -0,0 +1,70 @@ +{ + "ChatQnA_Xeon": [ + { + "service": "env", + "port": "", + "endpoint": "", + "EMBEDDING_MODEL_ID": true, + "RERANK_MODEL_ID": true, + "LLM_MODEL_ID": true, + "INDEX_NAME": true, + "HUGGINGFACEHUB_API_TOKEN": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "8090", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "8007", + "endpoint": "v1/chat/completions", + "model": "Intel/neural-chat-7b-v3-3", + "messages": [{ "role": "user", "content": "What is Deep Learning?" }], + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +} diff --git a/ChatQnA/tests/ChatQnA_NVGPU.json b/ChatQnA/tests/ChatQnA_NVGPU.json new file mode 100644 index 000000000..7aefa97ed --- /dev/null +++ b/ChatQnA/tests/ChatQnA_NVGPU.json @@ -0,0 +1,82 @@ +{ + "ChatQnA_NVGPU": [ + { + "service": "env", + "port": "", + "endpoint": "", + "EMBEDDING_MODEL_ID": true, + "RERANK_MODEL_ID": true, + "LLM_MODEL_ID": true, + "INDEX_NAME": true, + "HUGGINGFACEHUB_API_TOKEN": true, + "TEI_EMBEDDING_ENDPOINT": true, + "MEGA_SERVICE_HOST_IP": true, + "RETRIEVER_SERVICE_HOST_IP": true, + "BACKEND_SERVICE_ENDPOINT": true, + "DATAPREP_SERVICE_ENDPOINT": true, + "DATAPREP_GET_FILE_ENDPOINT": true, + "DATAPREP_DELETE_FILE_ENDPOINT": true, + "FRONTEND_SERVICE_IP": true, + "FRONTEND_SERVICE_PORT": true, + "BACKEND_SERVICE_NAME": true, + "BACKEND_SERVICE_IP": true, + "BACKEND_SERVICE_PORT": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "8090", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "8008", + "endpoint": "v1/chat/completions", + "model": "Intel/neural-chat-7b-v3-3", + "messages": [{ "role": "user", "content": "What is Deep Learning?" }], + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +} diff --git a/ChatQnA/tests/ChatQnA_ROCm.json b/ChatQnA/tests/ChatQnA_ROCm.json new file mode 100644 index 000000000..893e6bef6 --- /dev/null +++ b/ChatQnA/tests/ChatQnA_ROCm.json @@ -0,0 +1,95 @@ +{ + "ChatQnA_ROCm": [ + { + "service": "env", + "port": "", + "endpoint": "", + "CHATQNA_HUGGINGFACEHUB_API_TOKEN": true, + "CHATQNA_TGI_SERVICE_IMAGE": true, + "CHATQNA_EMBEDDING_MODEL_ID": true, + "CHATQNA_RERANK_MODEL_ID": true, + "CHATQNA_LLM_MODEL_ID": true, + "CHATQNA_TGI_SERVICE_PORT": true, + "CHATQNA_TEI_EMBEDDING_PORT": true, + "CHATQNA_TEI_EMBEDDING_ENDPOINT": true, + "CHATQNA_TEI_RERANKING_PORT": true, + "CHATQNA_REDIS_VECTOR_PORT": true, + "CHATQNA_REDIS_VECTOR_INSIGHT_PORT": true, + "CHATQNA_REDIS_DATAPREP_PORT": true, + "CHATQNA_REDIS_RETRIEVER_PORT": true, + "CHATQNA_INDEX_NAME": true, + "CHATQNA_MEGA_SERVICE_HOST_IP": true, + "CHATQNA_RETRIEVER_SERVICE_HOST_IP": true, + "CHATQNA_BACKEND_SERVICE_ENDPOINT": true, + "CHATQNA_DATAPREP_SERVICE_ENDPOINT": true, + "CHATQNA_DATAPREP_GET_FILE_ENDPOINT": true, + "CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT": true, + "CHATQNA_FRONTEND_SERVICE_IP": true, + "CHATQNA_FRONTEND_SERVICE_PORT": true, + "CHATQNA_BACKEND_SERVICE_NAME": true, + "CHATQNA_BACKEND_SERVICE_IP": true, + "CHATQNA_BACKEND_SERVICE_PORT": true, + "CHATQNA_REDIS_URL": true, + "CHATQNA_EMBEDDING_SERVICE_HOST_IP": true, + "CHATQNA_RERANK_SERVICE_HOST_IP": true, + "CHATQNA_LLM_SERVICE_HOST_IP": true, + "CHATQNA_NGINX_PORT": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "8090", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "8008", + "endpoint": "generate", + "inputs": "What is Deep Learning?", + "parameters": { "max_new_tokens": 64, "do_sample": true }, + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +} diff --git a/ChatQnA/tests/ChatQnA_Xeon.json b/ChatQnA/tests/ChatQnA_Xeon.json new file mode 100644 index 000000000..476967abf --- /dev/null +++ b/ChatQnA/tests/ChatQnA_Xeon.json @@ -0,0 +1,70 @@ +{ + "ChatQnA_Xeon": [ + { + "service": "env", + "port": "", + "endpoint": "", + "EMBEDDING_MODEL_ID": true, + "RERANK_MODEL_ID": true, + "LLM_MODEL_ID": true, + "INDEX_NAME": true, + "HUGGINGFACEHUB_API_TOKEN": true, + "http_proxy": false, + "https_proxy": false, + "no_proxy": false, + "output": false + }, + { + "service": "embed", + "port": "6006", + "endpoint": "embed", + "inputs": "What is Deep Learning?", + "output": "[[" + }, + { + "service": "dataprep", + "port": "6007", + "endpoint": "/v1/dataprep", + "file_path": "", + "output": "Data preparation succeeded" + }, + { + "service": "retrieval", + "port": "7000", + "endpoint": "/v1/retrieval", + "text": "test", + "embedding": "", + "output": "retrieved_docs" + }, + { + "service": "rerank", + "port": "8808", + "endpoint": "rerank", + "query": "What is Deep Learning?", + "texts": ["Deep Learning is not...", "Deep learning is..."], + "output": "index" + }, + { + "service": "llm", + "port": "9009", + "endpoint": "v1/chat/completions", + "model": "Intel/neural-chat-7b-v3-3", + "messages": [{"role": "user", "content": "What is Deep Learning?"}], + "output": "generated_text" + }, + { + "service": "nginx", + "port": "80", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + }, + { + "service": "mega", + "port": "8888", + "endpoint": "v1/chatqna", + "messages": "What is the revenue of Nike in 2023?", + "output": "data: " + } + ] +}