Skip to content

Commit

Permalink
Merge branch 'main' into omar/gradio-default
Browse files Browse the repository at this point in the history
  • Loading branch information
okhleif-IL authored Dec 10, 2024
2 parents 16f39ca + 4c01e14 commit 588b6ab
Show file tree
Hide file tree
Showing 38 changed files with 2,042 additions and 608 deletions.
46 changes: 41 additions & 5 deletions AudioQnA/audioqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import asyncio
import os

from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
from comps.cores.proto.docarray import LLMParams
from fastapi import Request

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
Expand All @@ -16,7 +18,7 @@
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))


class AudioQnAService:
class AudioQnAService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
Expand Down Expand Up @@ -50,9 +52,43 @@ def add_remote_service(self):
self.megaservice.add(asr).add(llm).add(tts)
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()

chat_request = AudioChatCompletionRequest.parse_obj(data)
parameters = LLMParams(
# relatively lower max_tokens for audio conversation
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=False, # TODO add streaming LLM output as input to TTS
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
)

last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["byte_str"]

return response

def start(self):
super().__init__(
megaservice=self.megaservice,
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.AUDIO_QNA),
input_datatype=AudioChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)


if __name__ == "__main__":
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
audioqna = AudioQnAService(port=MEGA_SERVICE_PORT)
audioqna.add_remote_service()
audioqna.start()
46 changes: 41 additions & 5 deletions AudioQnA/audioqna_multilang.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import base64
import os

from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
from comps.cores.proto.docarray import LLMParams
from fastapi import Request

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))

WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
Expand Down Expand Up @@ -52,7 +54,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
return data


class AudioQnAService:
class AudioQnAService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
Expand Down Expand Up @@ -90,9 +92,43 @@ def add_remote_service(self):
self.megaservice.add(asr).add(llm).add(tts)
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()

chat_request = AudioChatCompletionRequest.parse_obj(data)
parameters = LLMParams(
# relatively lower max_tokens for audio conversation
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=False, # TODO add streaming LLM output as input to TTS
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
)

last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["byte_str"]

return response

def start(self):
super().__init__(
megaservice=self.megaservice,
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.AUDIO_QNA),
input_datatype=AudioChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)


if __name__ == "__main__":
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
audioqna = AudioQnAService(port=MEGA_SERVICE_PORT)
audioqna.add_remote_service()
audioqna.start()
14 changes: 14 additions & 0 deletions AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,20 @@ services:
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
ipc: host
restart: always
audioqna-xeon-ui-server:
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
container_name: audioqna-xeon-ui-server
depends_on:
- audioqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

networks:
default:
Expand Down
14 changes: 14 additions & 0 deletions AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,20 @@ services:
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
ipc: host
restart: always
audioqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
container_name: audioqna-gaudi-ui-server
depends_on:
- audioqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

networks:
default:
Expand Down
2 changes: 1 addition & 1 deletion AudioQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
service_list="audioqna audioqna-ui whisper-gaudi asr llm-tgi speecht5-gaudi tts"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
Expand Down
2 changes: 1 addition & 1 deletion AudioQnA/tests/test_compose_on_xeon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="audioqna whisper asr llm-tgi speecht5 tts"
service_list="audioqna audioqna-ui whisper asr llm-tgi speecht5 tts"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
Expand Down
1 change: 1 addition & 0 deletions AudioQnA/ui/svelte/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"@typescript-eslint/eslint-plugin": "^5.27.0",
"@typescript-eslint/parser": "^5.27.0",
"autoprefixer": "^10.4.16",
"daisyui": "^3.5.0",
"debug": "4.3.4",
"neverthrow": "5.0.0",
"pocketbase": "0.7.0",
Expand Down
45 changes: 40 additions & 5 deletions AvatarChatbot/avatarchatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import os
import sys

from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
from comps import Gateway, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceType
from comps.cores.proto.api_protocol import AudioChatCompletionRequest, ChatCompletionResponse
from comps.cores.proto.docarray import LLMParams
from fastapi import Request

MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
Expand All @@ -27,7 +29,7 @@ def check_env_vars(env_var_list):
print("All environment variables are set.")


class AvatarChatbotService:
class AvatarChatbotService(Gateway):
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
Expand Down Expand Up @@ -70,7 +72,39 @@ def add_remote_service(self):
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.megaservice.flow_to(tts, animation)
self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)

async def handle_request(self, request: Request):
data = await request.json()

chat_request = AudioChatCompletionRequest.model_validate(data)
parameters = LLMParams(
# relatively lower max_tokens for audio conversation
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 128,
top_k=chat_request.top_k if chat_request.top_k else 10,
top_p=chat_request.top_p if chat_request.top_p else 0.95,
temperature=chat_request.temperature if chat_request.temperature else 0.01,
repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
streaming=False, # TODO add streaming LLM output as input to TTS
)
# print(parameters)

result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
)

last_node = runtime_graph.all_leaves()[-1]
response = result_dict[last_node]["video_path"]
return response

def start(self):
super().__init__(
megaservice=self.megaservice,
host=self.host,
port=self.port,
endpoint=str(MegaServiceEndpoint.AVATAR_CHATBOT),
input_datatype=AudioChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)


if __name__ == "__main__":
Expand All @@ -89,5 +123,6 @@ def add_remote_service(self):
]
)

avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
avatarchatbot = AvatarChatbotService(port=MEGA_SERVICE_PORT)
avatarchatbot.add_remote_service()
avatarchatbot.start()
2 changes: 1 addition & 1 deletion ChatQnA/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ In the below, we provide a table that describes for each microservice component
Gaudi default compose.yaml
| MicroService | Open Source Project | HW | Port | Endpoint |
| ------------ | ------------------- | ----- | ---- | -------------------- |
| Embedding | Langchain | Xeon | 6000 | /v1/embaddings |
| Embedding | Langchain | Xeon | 6000 | /v1/embeddings |
| Retriever | Langchain, Redis | Xeon | 7000 | /v1/retrieval |
| Reranking | Langchain, TEI | Gaudi | 8000 | /v1/reranking |
| LLM | Langchain, TGI | Gaudi | 9000 | /v1/chat/completions |
Expand Down
Loading

0 comments on commit 588b6ab

Please sign in to comment.