Merge branch 'main' into omar/gradio-default

opea-project · Dec 10, 2024 · 588b6ab · 588b6ab
2 parents 16f39ca + 4c01e14
commit 588b6ab
Show file tree

Hide file tree

Showing 38 changed files with 2,042 additions and 608 deletions.
diff --git a/AudioQnA/audioqna.py b/AudioQnA/audioqna.py
@@ -4,9 +4,11 @@
 import asyncio
 import os
 
-from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
+from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
+from comps.cores.proto.docarray import LLMParams
+from fastapi import Request
 
-MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
 ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
 ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
@@ -16,7 +18,7 @@
 TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
 
 
-class AudioQnAService:
+class AudioQnAService(Gateway):
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
@@ -50,9 +52,43 @@ def add_remote_service(self):
         self.megaservice.add(asr).add(llm).add(tts)
         self.megaservice.flow_to(asr, llm)
         self.megaservice.flow_to(llm, tts)
-        self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+    async def handle_request(self, request: Request):
+        data = await request.json()
+
+        chat_request = AudioChatCompletionRequest.parse_obj(data)
+        parameters = LLMParams(
+            # relatively lower max_tokens for audio conversation
+            max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
+            presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
+            repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
+            streaming=False,  # TODO add streaming LLM output as input to TTS
+        )
+        result_dict, runtime_graph = await self.megaservice.schedule(
+            initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
+        )
+
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]["byte_str"]
+
+        return response
+
+    def start(self):
+        super().__init__(
+            megaservice=self.megaservice,
+            host=self.host,
+            port=self.port,
+            endpoint=str(MegaServiceEndpoint.AUDIO_QNA),
+            input_datatype=AudioChatCompletionRequest,
+            output_datatype=ChatCompletionResponse,
+        )
 
 
 if __name__ == "__main__":
-    audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    audioqna = AudioQnAService(port=MEGA_SERVICE_PORT)
     audioqna.add_remote_service()
+    audioqna.start()
diff --git a/AudioQnA/audioqna_multilang.py b/AudioQnA/audioqna_multilang.py
@@ -5,9 +5,11 @@
 import base64
 import os
 
-from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
+from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
+from comps.cores.proto.docarray import LLMParams
+from fastapi import Request
 
-MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
 
 WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
@@ -52,7 +54,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
     return data
 
 
-class AudioQnAService:
+class AudioQnAService(Gateway):
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
@@ -90,9 +92,43 @@ def add_remote_service(self):
         self.megaservice.add(asr).add(llm).add(tts)
         self.megaservice.flow_to(asr, llm)
         self.megaservice.flow_to(llm, tts)
-        self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+    async def handle_request(self, request: Request):
+        data = await request.json()
+
+        chat_request = AudioChatCompletionRequest.parse_obj(data)
+        parameters = LLMParams(
+            # relatively lower max_tokens for audio conversation
+            max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
+            presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
+            repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
+            streaming=False,  # TODO add streaming LLM output as input to TTS
+        )
+        result_dict, runtime_graph = await self.megaservice.schedule(
+            initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
+        )
+
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]["byte_str"]
+
+        return response
+
+    def start(self):
+        super().__init__(
+            megaservice=self.megaservice,
+            host=self.host,
+            port=self.port,
+            endpoint=str(MegaServiceEndpoint.AUDIO_QNA),
+            input_datatype=AudioChatCompletionRequest,
+            output_datatype=ChatCompletionResponse,
+        )
 
 
 if __name__ == "__main__":
-    audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    audioqna = AudioQnAService(port=MEGA_SERVICE_PORT)
     audioqna.add_remote_service()
+    audioqna.start()
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -91,6 +91,20 @@ services:
       - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
     ipc: host
     restart: always
+  audioqna-xeon-ui-server:
+    image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
+    container_name: audioqna-xeon-ui-server
+    depends_on:
+      - audioqna-xeon-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
 
 networks:
   default:

diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -112,6 +112,20 @@ services:
       - TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
     ipc: host
     restart: always
+  audioqna-gaudi-ui-server:
+    image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
+    container_name: audioqna-gaudi-ui-server
+    depends_on:
+      - audioqna-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
 
 networks:
   default:

diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -19,7 +19,7 @@ function build_docker_images() {
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
+    service_list="audioqna audioqna-ui whisper-gaudi asr llm-tgi speecht5-gaudi tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6

diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -19,7 +19,7 @@ function build_docker_images() {
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="audioqna whisper asr llm-tgi speecht5 tts"
+    service_list="audioqna audioqna-ui whisper asr llm-tgi speecht5 tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6

diff --git a/AudioQnA/ui/svelte/package.json b/AudioQnA/ui/svelte/package.json
@@ -25,6 +25,7 @@
     "@typescript-eslint/eslint-plugin": "^5.27.0",
     "@typescript-eslint/parser": "^5.27.0",
     "autoprefixer": "^10.4.16",
+    "daisyui": "^3.5.0",
     "debug": "4.3.4",
     "neverthrow": "5.0.0",
     "pocketbase": "0.7.0",

diff --git a/AvatarChatbot/avatarchatbot.py b/AvatarChatbot/avatarchatbot.py
@@ -5,9 +5,11 @@
 import os
 import sys
 
-from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
+from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
+from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
+from comps.cores.proto.docarray import LLMParams
+from fastapi import Request
 
-MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
 ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
 ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
@@ -27,7 +29,7 @@ def check_env_vars(env_var_list):
     print("All environment variables are set.")
 
 
-class AvatarChatbotService:
+class AvatarChatbotService(Gateway):
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
@@ -70,7 +72,39 @@ def add_remote_service(self):
         self.megaservice.flow_to(asr, llm)
         self.megaservice.flow_to(llm, tts)
         self.megaservice.flow_to(tts, animation)
-        self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+    async def handle_request(self, request: Request):
+        data = await request.json()
+
+        chat_request = AudioChatCompletionRequest.model_validate(data)
+        parameters = LLMParams(
+            # relatively lower max_tokens for audio conversation
+            max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
+            streaming=False,  # TODO add streaming LLM output as input to TTS
+        )
+        # print(parameters)
+
+        result_dict, runtime_graph = await self.megaservice.schedule(
+            initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
+        )
+
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]["video_path"]
+        return response
+
+    def start(self):
+        super().__init__(
+            megaservice=self.megaservice,
+            host=self.host,
+            port=self.port,
+            endpoint=str(MegaServiceEndpoint.AVATAR_CHATBOT),
+            input_datatype=AudioChatCompletionRequest,
+            output_datatype=ChatCompletionResponse,
+        )
 
 
 if __name__ == "__main__":
@@ -89,5 +123,6 @@ def add_remote_service(self):
         ]
     )
 
-    avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    avatarchatbot = AvatarChatbotService(port=MEGA_SERVICE_PORT)
     avatarchatbot.add_remote_service()
+    avatarchatbot.start()
diff --git a/ChatQnA/README.md b/ChatQnA/README.md
@@ -196,7 +196,7 @@ In the below, we provide a table that describes for each microservice component
 Gaudi default compose.yaml
 | MicroService | Open Source Project | HW | Port | Endpoint |
 | ------------ | ------------------- | ----- | ---- | -------------------- |
-| Embedding | Langchain | Xeon | 6000 | /v1/embaddings |
+| Embedding | Langchain | Xeon | 6000 | /v1/embeddings |
 | Retriever | Langchain, Redis | Xeon | 7000 | /v1/retrieval |
 | Reranking | Langchain, TEI | Gaudi | 8000 | /v1/reranking |
 | LLM | Langchain, TGI | Gaudi | 9000 | /v1/chat/completions |