From c20b82176cc4226e5d90035d4a61cd6ae713c544 Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Tue, 6 Aug 2024 19:08:20 +0000 Subject: [PATCH 01/57] Added the pinecone support documentation and docker compose file Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 402 ++++++++++++++++++ .../docker/xeon/docker_compose_pinecone.yaml | 199 +++++++++ 2 files changed, 601 insertions(+) create mode 100644 ChatQnA/docker/xeon/README_pinecone.md create mode 100644 ChatQnA/docker/xeon/docker_compose_pinecone.yaml diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md new file mode 100644 index 000000000..a9f29e32b --- /dev/null +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -0,0 +1,402 @@ +# Build Mega Service of ChatQnA (with Pinecone) on Xeon + +This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. + +## 🚀 Apply Xeon Server on AWS + +To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage the power of 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads. + +For detailed information about these instance types, you can refer to this [link](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options. + +After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. + +**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands** + +> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow + +``` + +data_prep_service +===================== +Port 6007 - Open to 0.0.0.0/0 +Port 6008 - Open to 0.0.0.0/0 + +tei_embedding_service +===================== +Port 6006 - Open to 0.0.0.0/0 + +embedding +========= +Port 6000 - Open to 0.0.0.0/0 + +retriever +========= +Port 7000 - Open to 0.0.0.0/0 + +tei_xeon_service +================ +Port 8808 - Open to 0.0.0.0/0 + +reranking +========= +Port 8000 - Open to 0.0.0.0/0 + +tgi-service +=========== +Port 9009 - Open to 0.0.0.0/0 + +llm +=== +Port 9000 - Open to 0.0.0.0/0 + +chaqna-xeon-backend-server +========================== +Port 8888 - Open to 0.0.0.0/0 + +chaqna-xeon-ui-server +===================== +Port 5173 - Open to 0.0.0.0/0 +``` + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +``` + +### 1. Build Embedding Image + +```bash +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . +``` + +### 2. Build Retriever Image + +```bash +docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . +``` + +### 3. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile . +``` + +### 4. Build LLM Image + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 5. Build Dataprep Image + +```bash +docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile . +cd .. +``` + +### 6. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/docker +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +cd ../../.. +``` + +### 7. Build UI Docker Image + +Build frontend Docker image via below command: + +```bash +cd GenAIExamples/ChatQnA/docker/ui/ +docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +cd ../../../.. +``` + +### 8. Build Conversational React UI Docker Image (Optional) + +Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command: + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ChatQnA/docker/ui/ +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file" +docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react . +cd ../../../.. +``` + +Then run the command `docker images`, you will have the following 7 Docker Images: + +1. `opea/dataprep-pinecone:latest` +2. `opea/embedding-tei:latest` +3. `opea/retriever-pinecone:latest` +4. `opea/reranking-tei:latest` +5. `opea/llm-tgi:latest` +6. `opea/chatqna:latest` +7. `opea/chatqna-ui:latest` + +## 🚀 Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + + +**Get the PINECONE_API_KEY and the INDEX_NAME + +``` +export pinecone_api_key=${api_key} +export pinecone_index_name=${pinecone_index} +``` + +```bash +export no_proxy=${your_no_proxy} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export PINECONE_API_KEY=${pinecone_api_key} +export PINECONE_INDEX_NAME=${pinecone_index_name} +export INDEX_NAME=${pinecone_index_name} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file + +```bash +cd GenAIExamples/ChatQnA/docker/xeon/ +docker compose up -d +``` + +### Validate Microservices + +1. TEI Embedding Service + +```bash +curl ${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +2. Embedding Microservice + +```bash +curl http://${host_ip}:6000/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' +``` + +3. Retriever Microservice + To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script: + +```Python +import random +embedding = [random.uniform(-1, 1) for _ in range(768)] +print(embedding) +``` + +Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command: + +```bash +curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \ + -H 'Content-Type: application/json' +``` + +4. TEI Reranking Service + +```bash +curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +5. Reranking Microservice + +```bash +curl http://${host_ip}:8000/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` + +6. TGI Service + +```bash +curl http://${host_ip}:9009/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +7. LLM Microservice + +```bash +curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +8. MegaService + +```bash +curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' +``` + +9. Dataprep Microservice(Optional) + +If you want to update the default knowledge base, you can use the following commands: + +Update Knowledge Base via Local File Upload: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" +``` + +This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + +Add Knowledge Base via HTTP Links: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' +``` + +This command updates a knowledge base by submitting a list of HTTP links for processing. + +Also, you are able to get the file list that you uploaded: + +```bash +curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ + -H "Content-Type: application/json" +``` + +To delete the file/link you uploaded: + +```bash +# delete link +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "https://opea.dev"}' \ + -H "Content-Type: application/json" + +# delete file +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + +# delete all uploaded files and links +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" +``` + +## Enable LangSmith for Monotoring Application (Optional) + +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. + +Here's how you can do it: + +1. Install the latest version of LangSmith: + +```bash +pip install -U langsmith +``` + +2. Set the necessary environment variables: + +```bash +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=ls_... +``` + +## 🚀 Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-gaudi-ui-server: + image: opea/chatqna-ui:latest + ... + ports: + - "80:5173" +``` + +## 🚀 Launch the Conversational UI (react) + +To access the Conversational UI frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +![project-screenshot](../../assets/img/chat_ui_init.png) + +Here is an example of running ChatQnA: + +![project-screenshot](../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../assets/img/conversation_ui_response.png) diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml new file mode 100644 index 000000000..9c4ae2eb2 --- /dev/null +++ b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml @@ -0,0 +1,199 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + dataprep-pinecone-service: + image: opea/dataprep-pinecone:latest + container_name: dataprep-pinecone-server + depends_on: + - tei-embedding-service + ports: + - "6007:6007" + - "6008:6008" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PINECONE_API_KEY: ${PINECONE_API_KEY} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: opea/embedding-tei:latest + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: opea/retriever-pinecone:latest + container_name: retriever-pinecone-server + ports: + - "7000:7000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PINECONE_API_KEY: ${PINECONE_API_KEY} + INDEX_NAME: ${PINECONE_INDEX_NAME} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: opea/reranking-tei:latest + container_name: reranking-tei-xeon-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.1.0 + container_name: tgi-service + ports: + - "9009:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${LLM_MODEL_ID} + llm: + image: opea/llm-tgi:latest + container_name: llm-tgi-server + depends_on: + - tgi-service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + chaqna-xeon-backend-server: + image: opea/chatqna:latest + container_name: chatqna-xeon-backend-server + depends_on: + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP} + - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP} + - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + chaqna-xeon-ui-server: + image: opea/chatqna-ui:latest + container_name: chatqna-xeon-ui-server + depends_on: + - chaqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT} + - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT} + - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} + ipc: host + restart: always + chaqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ports: + - 5174:80 + depends_on: + - chaqna-xeon-backend-server + ipc: host + restart: always + +networks: + default: + driver: bridge From 4c60074b969ce6922c2b5885a4b3bec5c419a6b2 Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Wed, 7 Aug 2024 06:14:54 +0000 Subject: [PATCH 02/57] Updated the readme for pinecone Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index a9f29e32b..44e8e225e 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -148,7 +148,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image ### Setup Environment Variables -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. +Since the `docker_compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** @@ -213,7 +213,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc ```bash cd GenAIExamples/ChatQnA/docker/xeon/ -docker compose up -d +docker compose -f docker_compose_pinecone.yaml up -d ``` ### Validate Microservices @@ -329,28 +329,9 @@ curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ -H "Content-Type: application/json" ``` -To delete the file/link you uploaded: - -```bash -# delete link -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "https://opea.dev"}' \ - -H "Content-Type: application/json" - -# delete file -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "nke-10k-2023.pdf"}' \ - -H "Content-Type: application/json" - -# delete all uploaded files and links -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "all"}' \ - -H "Content-Type: application/json" -``` - ## Enable LangSmith for Monotoring Application (Optional) -LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. Here's how you can do it: From 6b8cbe886d7bef042ab4c414c791a097e24ac6e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 06:26:41 +0000 Subject: [PATCH 03/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index 44e8e225e..5c1a50c2e 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -172,8 +172,7 @@ export your_hf_api_token="Your_Huggingface_API_Token" export your_no_proxy=${your_no_proxy},"External_Public_IP" ``` - -**Get the PINECONE_API_KEY and the INDEX_NAME +\*\*Get the PINECONE_API_KEY and the INDEX_NAME ``` export pinecone_api_key=${api_key} From afcb18c385e6fefb69e3b59bc86c0b408caeba4f Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Thu, 8 Aug 2024 21:37:10 +0000 Subject: [PATCH 04/57] Added port 6009 to pinecone Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/docker_compose_pinecone.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml index 9c4ae2eb2..103a1a911 100644 --- a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml +++ b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml @@ -13,6 +13,7 @@ services: ports: - "6007:6007" - "6008:6008" + - "6009:6009" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} From 66ea87043c6bde3207c4eabbeb1579682fdab2b5 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 13 Aug 2024 15:20:38 -0700 Subject: [PATCH 05/57] Added the tests and renamed the files Signed-off-by: Pallavi Jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 6 +- ...se_pinecone.yaml => compose_pinecone.yaml} | 0 .../tests/_test_chatqna_pinecone_on_xeon.sh | 232 ++++++++++++++++++ 3 files changed, 235 insertions(+), 3 deletions(-) rename ChatQnA/docker/xeon/{docker_compose_pinecone.yaml => compose_pinecone.yaml} (100%) create mode 100644 ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index 5c1a50c2e..2b365c70f 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -148,7 +148,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image ### Setup Environment Variables -Since the `docker_compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. +Since the `compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** @@ -212,7 +212,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc ```bash cd GenAIExamples/ChatQnA/docker/xeon/ -docker compose -f docker_compose_pinecone.yaml up -d +docker compose -f compose_pinecone.yaml up -d ``` ### Validate Microservices @@ -330,7 +330,7 @@ curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ ## Enable LangSmith for Monotoring Application (Optional) -LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. Here's how you can do it: diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/compose_pinecone.yaml similarity index 100% rename from ChatQnA/docker/xeon/docker_compose_pinecone.yaml rename to ChatQnA/docker/xeon/compose_pinecone.yaml diff --git a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh new file mode 100644 index 000000000..392941994 --- /dev/null +++ b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh @@ -0,0 +1,232 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +echo "IMAGE_REPO=${IMAGE_REPO}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/opea-project/GenAIComps.git + cd GenAIComps + + docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile . + docker build -t opea/retriever-pinecone:latest -f comps/retrievers/langchain/pinecone/docker/Dockerfile . + docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . + docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile . + docker build -t opea/dataprep-pinecone:latest -f comps/dataprep/pinecone/docker/Dockerfile . + + cd $WORKPATH/docker + docker build --no-cache -t opea/chatqna:latest -f Dockerfile . + + cd $WORKPATH/docker/ui + docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile . + + docker images +} + +function start_services() { + cd $WORKPATH/docker/xeon + + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" + export TGI_LLM_ENDPOINT="http://${ip_address}:9009" + export PINECONE_API_KEY=${PINECONE_KEY} + export PINECONE_INDEX_NAME="langchain-test" + export INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + export RERANK_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" + export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + + if [[ "$IMAGE_REPO" != "" ]]; then + # Replace the container name with a test-specific name + echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG" + sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_pinecone.yaml + cat compose_pinecone.yaml + fi + + # Start Docker Containers + docker compose -f compose_pinecone.yaml up -d + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-service > tgi_service_start.log + if grep -q Connected tgi_service_start.log; then + break + fi + sleep 1s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tei for embedding service + validate_services \ + "${ip_address}:6006/embed" \ + "\[\[" \ + "tei-embedding" \ + "tei-embedding-server" \ + '{"inputs":"What is Deep Learning?"}' + + # embedding microservice + validate_services \ + "${ip_address}:6000/v1/embeddings" \ + '"text":"What is Deep Learning?","embedding":\[' \ + "embedding" \ + "embedding-tei-server" \ + '{"text":"What is Deep Learning?"}' + + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # retrieval microservice + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + + # tei for rerank microservice + validate_services \ + "${ip_address}:8808/rerank" \ + '{"index":1,"score":' \ + "tei-rerank" \ + "tei-reranking-server" \ + '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' + + # rerank microservice + validate_services \ + "${ip_address}:8000/v1/reranking" \ + "Deep learning is..." \ + "rerank" \ + "reranking-tei-xeon-server" \ + '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' + + # tgi for llm service + validate_services \ + "${ip_address}:9009/generate" \ + "generated_text" \ + "tgi-llm" \ + "tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-tgi-server" \ + '{"query":"What is Deep Learning?"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:8888/v1/chatqna" \ + "billion" \ + "mega-chatqna" \ + "chatqna-xeon-backend-server" \ + '{"messages": "What is the revenue of Nike in 2023?"}' + +} + +function validate_frontend() { + cd $WORKPATH/docker/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH +# conda remove -n ${conda_env_name} --all -y +# conda create -n ${conda_env_name} python=3.12 -y + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + +# conda install -c conda-forge nodejs -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker/xeon + docker compose stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" && sleep 1s + + validate_microservices + validate_megaservice + validate_frontend + + stop_docker + echo y | docker system prune + +} + +main From 1e5e9332be506ded8cb2d7f6f3580a493131dc79 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 11 Sep 2024 11:42:17 +0530 Subject: [PATCH 06/57] Updated the pinecone tests as per the xeon Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../intel/cpu}/xeon/README_pinecone.md | 8 +- .../intel/cpu}/xeon/compose_pinecone.yaml | 0 ChatQnA/docker_image_build/build.yaml | 12 + .../tests/test_chatqna_pinecone_on_xeon.sh | 258 ++++++++++++++++++ 4 files changed, 274 insertions(+), 4 deletions(-) rename ChatQnA/{docker => docker_compose/intel/cpu}/xeon/README_pinecone.md (98%) rename ChatQnA/{docker => docker_compose/intel/cpu}/xeon/compose_pinecone.yaml (100%) create mode 100644 ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md similarity index 98% rename from ChatQnA/docker/xeon/README_pinecone.md rename to ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index 2b365c70f..6cbf45135 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -70,19 +70,19 @@ cd GenAIComps ### 1. Build Embedding Image ```bash -docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . ``` ### 2. Build Retriever Image ```bash -docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . +docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pinecone/langchain/Dockerfile . ``` ### 3. Build Rerank Image ```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile . +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/langchain/Dockerfile . ``` ### 4. Build LLM Image @@ -94,7 +94,7 @@ docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_pr ### 5. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile . +docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile . cd .. ``` diff --git a/ChatQnA/docker/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml similarity index 100% rename from ChatQnA/docker/xeon/compose_pinecone.yaml rename to ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 390231320..168a78a6f 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -53,6 +53,12 @@ services: dockerfile: comps/retrievers/qdrant/haystack/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest} + retriever-pinecone: + build: + context: GenAIComps + dockerfile: comps/retrievers/pinecone/langchain/Dockerfile + extends: chatqna + image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest} reranking-tei: build: context: GenAIComps @@ -107,6 +113,12 @@ services: dockerfile: comps/dataprep/qdrant/langchain/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest} + dataprep-pinecone: + build: + context: GenAIComps + dockerfile: comps/dataprep/pinecone/langchain/Dockerfile + extends: chatqna + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} guardrails-tgi: build: context: GenAIComps diff --git a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh new file mode 100644 index 000000000..5cecd1ea9 --- /dev/null +++ b/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-pinecone embedding-tei retriever-pinecone reranking-tei llm-tgi" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker/xeon + + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" + export TGI_LLM_ENDPOINT="http://${ip_address}:9009" + export PINECONE_API_KEY=${PINECONE_KEY} + export PINECONE_INDEX_NAME="langchain-test" + export INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + export RERANK_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" + export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_pinecone.yaml up -d + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-service > tgi_service_start.log + if grep -q Connected tgi_service_start.log; then + break + fi + sleep 1s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tei for embedding service + validate_services \ + "${ip_address}:6006/embed" \ + "\[\[" \ + "tei-embedding" \ + "tei-embedding-server" \ + '{"inputs":"What is Deep Learning?"}' + + # embedding microservice + validate_services \ + "${ip_address}:6000/v1/embeddings" \ + '"text":"What is Deep Learning?","embedding":\[' \ + "embedding" \ + "embedding-tei-server" \ + '{"text":"What is Deep Learning?"}' + + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # test /v1/dataprep upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_service \ + "http://${ip_address}:6007/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "dataprep-pinecone-server" + + # test /v1/dataprep/delete_file + validate_service \ + "http://${ip_address}:6007/v1/dataprep/delete_file" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-pinecone-server" + + # retrieval microservice + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + + # tei for rerank microservice + validate_services \ + "${ip_address}:8808/rerank" \ + '{"index":1,"score":' \ + "tei-rerank" \ + "tei-reranking-server" \ + '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' + + # rerank microservice + validate_services \ + "${ip_address}:8000/v1/reranking" \ + "Deep learning is..." \ + "rerank" \ + "reranking-tei-xeon-server" \ + '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' + + # tgi for llm service + validate_services \ + "${ip_address}:9009/generate" \ + "generated_text" \ + "tgi-llm" \ + "tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-tgi-server" \ + '{"query":"What is Deep Learning?"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:8888/v1/chatqna" \ + "billion" \ + "mega-chatqna" \ + "chatqna-xeon-backend-server" \ + '{"messages": "What is the revenue of Nike in 2023?"}' + +} + +function validate_frontend() { + echo "[ TEST INFO ]: --------- frontend test started ---------" + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + echo "[ TEST INFO ]: --------- conda env activated ---------" + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose stop -f compose_pinecone.yaml && docker compose rm -f compose_pinecone.yaml +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" && sleep 1s + + if [ "${mode}" == "perf" ]; then + python3 $WORKPATH/tests/chatqna_benchmark.py + elif [ "${mode}" == "" ]; then + validate_microservices + echo "==== microservices validated ====" + validate_megaservice + echo "==== megaservice validated ====" + validate_frontend + echo "==== frontend validated ====" + fi + + stop_docker + echo y | docker system prune + +} + +main From 098a4cee00c7348c0b7f9f7e9033b2d838e83457 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 11 Sep 2024 21:12:19 +0530 Subject: [PATCH 07/57] Updated the Dockerfile pathf or rerank Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index 6cbf45135..decb374c6 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -82,7 +82,7 @@ docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_prox ### 3. Build Rerank Image ```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/langchain/Dockerfile . +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . ``` ### 4. Build LLM Image From c87911c223d1d94906aa4898edfb8edf1fe97ee6 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:33:24 +0530 Subject: [PATCH 08/57] Updated the file name for the tests Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../tests/_test_chatqna_pinecone_on_xeon.sh | 232 ------------------ ...on.sh => test_compose_pinecone_on_xeon.sh} | 2 +- 2 files changed, 1 insertion(+), 233 deletions(-) delete mode 100644 ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh rename ChatQnA/tests/{test_chatqna_pinecone_on_xeon.sh => test_compose_pinecone_on_xeon.sh} (99%) diff --git a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh deleted file mode 100644 index 392941994..000000000 --- a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -e -echo "IMAGE_REPO=${IMAGE_REPO}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - git clone https://github.com/opea-project/GenAIComps.git - cd GenAIComps - - docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile . - docker build -t opea/retriever-pinecone:latest -f comps/retrievers/langchain/pinecone/docker/Dockerfile . - docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . - docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile . - docker build -t opea/dataprep-pinecone:latest -f comps/dataprep/pinecone/docker/Dockerfile . - - cd $WORKPATH/docker - docker build --no-cache -t opea/chatqna:latest -f Dockerfile . - - cd $WORKPATH/docker/ui - docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile . - - docker images -} - -function start_services() { - cd $WORKPATH/docker/xeon - - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export RERANK_MODEL_ID="BAAI/bge-reranker-base" - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" - export TGI_LLM_ENDPOINT="http://${ip_address}:9009" - export PINECONE_API_KEY=${PINECONE_KEY} - export PINECONE_INDEX_NAME="langchain-test" - export INDEX_NAME="langchain-test" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_HOST_IP=${ip_address} - export EMBEDDING_SERVICE_HOST_IP=${ip_address} - export RETRIEVER_SERVICE_HOST_IP=${ip_address} - export RERANK_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env - - if [[ "$IMAGE_REPO" != "" ]]; then - # Replace the container name with a test-specific name - echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG" - sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_pinecone.yaml - cat compose_pinecone.yaml - fi - - # Start Docker Containers - docker compose -f compose_pinecone.yaml up -d - n=0 - until [[ "$n" -ge 200 ]]; do - docker logs tgi-service > tgi_service_start.log - if grep -q Connected tgi_service_start.log; then - break - fi - sleep 1s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # tei for embedding service - validate_services \ - "${ip_address}:6006/embed" \ - "\[\[" \ - "tei-embedding" \ - "tei-embedding-server" \ - '{"inputs":"What is Deep Learning?"}' - - # embedding microservice - validate_services \ - "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":\[' \ - "embedding" \ - "embedding-tei-server" \ - '{"text":"What is Deep Learning?"}' - - sleep 1m # retrieval can't curl as expected, try to wait for more time - - # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-pinecone-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" - - # tei for rerank microservice - validate_services \ - "${ip_address}:8808/rerank" \ - '{"index":1,"score":' \ - "tei-rerank" \ - "tei-reranking-server" \ - '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' - - # rerank microservice - validate_services \ - "${ip_address}:8000/v1/reranking" \ - "Deep learning is..." \ - "rerank" \ - "reranking-tei-xeon-server" \ - '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' - - # tgi for llm service - validate_services \ - "${ip_address}:9009/generate" \ - "generated_text" \ - "tgi-llm" \ - "tgi-service" \ - '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' - - # llm microservice - validate_services \ - "${ip_address}:9000/v1/chat/completions" \ - "data: " \ - "llm" \ - "llm-tgi-server" \ - '{"query":"What is Deep Learning?"}' - -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/chatqna" \ - "billion" \ - "mega-chatqna" \ - "chatqna-xeon-backend-server" \ - '{"messages": "What is the revenue of Nike in 2023?"}' - -} - -function validate_frontend() { - cd $WORKPATH/docker/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH -# conda remove -n ${conda_env_name} --all -y -# conda create -n ${conda_env_name} python=3.12 -y - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - -# conda install -c conda-forge nodejs -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker/xeon - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi - start_time=$(date +%s) - start_services - end_time=$(date +%s) - duration=$((end_time-start_time)) - echo "Mega service start duration is $duration s" && sleep 1s - - validate_microservices - validate_megaservice - validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh similarity index 99% rename from ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh rename to ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 5cecd1ea9..9ae9d06c7 100644 --- a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { } function start_services() { - cd $WORKPATH/docker/xeon + cd $WORKPATH/docker_compose/intel/cpu/xeon/ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" From 2698d7cb6b9795c9fd5839dc44f39e473689b04f Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:53:34 +0530 Subject: [PATCH 09/57] Corrected the docker compose for pinecone Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 9ae9d06c7..32831f012 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -54,7 +54,7 @@ function start_services() { sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env # Start Docker Containers - docker compose -f compose_pinecone.yaml up -d + docker compose up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From 63ed2799f947c2c2cfab3fb688d58482b532768e Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:54:34 +0530 Subject: [PATCH 10/57] Corrected the docker compose for pinecone Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 32831f012..6ee9147ee 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -55,6 +55,7 @@ function start_services() { # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From 5723ccf4aaff745c864a92751c70bf0cb8ed022b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 05:25:14 +0000 Subject: [PATCH 11/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 6ee9147ee..aa9ebcbd7 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -55,7 +55,7 @@ function start_services() { # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - + n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From 1622246f5cc51897c2cccfd4a66ae2aff316a44c Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Sat, 14 Sep 2024 05:42:16 +0530 Subject: [PATCH 12/57] Corrected the test script issue Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 ChatQnA/tests/test_compose_pinecone_on_xeon.sh diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh old mode 100644 new mode 100755 index aa9ebcbd7..0a562a9e3 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -51,7 +51,7 @@ function start_services() { export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" - sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log @@ -226,8 +226,9 @@ function validate_frontend() { } function stop_docker() { + echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop -f compose_pinecone.yaml && docker compose rm -f compose_pinecone.yaml + docker compose -f compose_pinecone.yaml down } function main() { From 573058162a38867650ae630e9296d22e54be3876 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:14:39 +0000 Subject: [PATCH 13/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 0a562a9e3..2c3413910 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -228,7 +228,7 @@ function validate_frontend() { function stop_docker() { echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_pinecone.yaml down + docker compose -f compose_pinecone.yaml down } function main() { From 4753a8d1cd6144fb59c8afed02e110fea5d51535 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 04:14:21 +0530 Subject: [PATCH 14/57] Fixed the issues with tests Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../intel/cpu/xeon/compose_pinecone.yaml | 1 + ChatQnA/docker_image_build/build.yaml | 2 +- .../tests/test_compose_pinecone_on_xeon.sh | 26 +++++++++++++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 103a1a911..8f1d9ad84 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -67,6 +67,7 @@ services: PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 168a78a6f..5b3e8813e 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -118,7 +118,7 @@ services: context: GenAIComps dockerfile: comps/dataprep/pinecone/langchain/Dockerfile extends: chatqna - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} guardrails-tgi: build: context: GenAIComps diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 2c3413910..676e12a19 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -28,9 +28,22 @@ function build_docker_images() { docker images && sleep 1s } +function build_pinecone_docker_images() { + echo "In pinecone build" + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="dataprep-pinecone retriever-pinecone" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export no_proxy=${no_proxy},${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" @@ -54,7 +67,7 @@ function start_services() { sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do @@ -94,6 +107,7 @@ function validate_services() { else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi + # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" @@ -111,7 +125,7 @@ function validate_microservices() { # tei for embedding service validate_services \ "${ip_address}:6006/embed" \ - "\[\[" \ + "[[" \ "tei-embedding" \ "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' @@ -119,7 +133,7 @@ function validate_microservices() { # embedding microservice validate_services \ "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":\[' \ + '"text":"What is Deep Learning?","embedding":[' \ "embedding" \ "embedding-tei-server" \ '{"text":"What is Deep Learning?"}' @@ -128,14 +142,14 @@ function validate_microservices() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - validate_service \ + validate_services \ "http://${ip_address}:6007/v1/dataprep" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-pinecone-server" # test /v1/dataprep/delete_file - validate_service \ + validate_services \ "http://${ip_address}:6007/v1/dataprep/delete_file" \ '{"status":true}' \ "dataprep_del" \ @@ -226,6 +240,7 @@ function validate_frontend() { } function stop_docker() { + echo "In stop docker" echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ docker compose -f compose_pinecone.yaml down @@ -235,6 +250,7 @@ function main() { stop_docker if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + build_pinecone_docker_images start_time=$(date +%s) start_services end_time=$(date +%s) From 7ba78b17f00ceba1c3a1c753ee71be19b7f6ea2e Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 04:21:39 +0530 Subject: [PATCH 15/57] Fixed the links Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index decb374c6..f730a91ae 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -371,12 +371,12 @@ To access the Conversational UI frontend, open the following URL in your browser - "80:80" ``` -![project-screenshot](../../assets/img/chat_ui_init.png) +![project-screenshot](../../../../assets/img/chat_ui_init.png) Here is an example of running ChatQnA: -![project-screenshot](../../assets/img/chat_ui_response.png) +![project-screenshot](../../../../assets/img/chat_ui_response.png) Here is an example of running ChatQnA with Conversational UI (React): -![project-screenshot](../../assets/img/conversation_ui_response.png) +![project-screenshot](../../../../assets/img/conversation_ui_response.png) From e37b3840836369d7e3e8b9d1a485cd807cfd19db Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 11:30:43 +0530 Subject: [PATCH 16/57] Corrected the tag Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 676e12a19..ffdd067e3 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -8,7 +8,7 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} +export TAG=latest WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" From 48c52c293557deee48393c8093c6c54f1daeaf99 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 21:03:11 +0530 Subject: [PATCH 17/57] Corrected the ports Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index ffdd067e3..0c51d75e8 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -61,8 +61,8 @@ function start_services() { export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file" sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -150,7 +150,7 @@ function validate_microservices() { # test /v1/dataprep/delete_file validate_services \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6009/v1/dataprep/delete_file" \ '{"status":true}' \ "dataprep_del" \ "dataprep-pinecone-server" From a44f173a469a5748f3d6058070727c55b68b524b Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 24 Sep 2024 14:10:50 -0700 Subject: [PATCH 18/57] Commented out dataprep as not able to connect to pineconeserver Signed-off-by: Pallavi Jaini Signed-off-by: Pallavi Jaini --- .../tests/test_compose_pinecone_on_xeon.sh | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 0c51d75e8..84b4647b6 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -140,31 +140,32 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file + # test /v1/dataprep upload file -> Not able to connect to pinecone server from test server so commenting out echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - validate_services \ - "http://${ip_address}:6007/v1/dataprep" \ - "Data preparation succeeded" \ - "dataprep_upload_file" \ - "dataprep-pinecone-server" + #validate_services \ + # "http://${ip_address}:6007/v1/dataprep" \ + # "Data preparation succeeded" \ + # "dataprep_upload_file" \ + # "dataprep-pinecone-server" # test /v1/dataprep/delete_file - validate_services \ - "http://${ip_address}:6009/v1/dataprep/delete_file" \ - '{"status":true}' \ - "dataprep_del" \ - "dataprep-pinecone-server" - + #validate_services \ + # "http://${ip_address}:6009/v1/dataprep/delete_file" \ + # '{"status":true}' \ + # "dataprep_del" \ + # "dataprep-pinecone-server" + # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-pinecone-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + #validate_services \ + # "${ip_address}:7000/v1/retrieval" \ + # " " \ + # "retrieval" \ + # "retriever-pinecone-server" \ + # "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" # tei for rerank microservice + echo "Validating reranking service" validate_services \ "${ip_address}:8808/rerank" \ '{"index":1,"score":' \ @@ -173,6 +174,7 @@ function validate_microservices() { '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' # rerank microservice + echo "Validating reranking micro service" validate_services \ "${ip_address}:8000/v1/reranking" \ "Deep learning is..." \ @@ -181,6 +183,7 @@ function validate_microservices() { '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' # tgi for llm service + echo "Validating llm service" validate_services \ "${ip_address}:9009/generate" \ "generated_text" \ @@ -189,6 +192,7 @@ function validate_microservices() { '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # llm microservice + echo "Validating llm microservice" validate_services \ "${ip_address}:9000/v1/chat/completions" \ "data: " \ From 709127084687907d035638c002f23e615181d02b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:11:29 +0000 Subject: [PATCH 19/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 84b4647b6..e4c48f1eb 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -154,7 +154,7 @@ function validate_microservices() { # '{"status":true}' \ # "dataprep_del" \ # "dataprep-pinecone-server" - + # retrieval microservice #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") #validate_services \ From f07f0d703ac3d739d6e1cca041686c02feaedee7 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 24 Sep 2024 21:25:40 -0700 Subject: [PATCH 20/57] Added more comments Signed-off-by: Pallavi Jaini Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index e4c48f1eb..822482aec 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -107,7 +107,10 @@ function validate_services() { else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - + echo "Response" + echo $RESPONSE_BODY + echo "Expected Result" + echo $EXPECTED_RESULT # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" @@ -266,7 +269,7 @@ function main() { elif [ "${mode}" == "" ]; then validate_microservices echo "==== microservices validated ====" - validate_megaservice + #validate_megaservice echo "==== megaservice validated ====" validate_frontend echo "==== frontend validated ====" From 8c6326f99f05cf96bfc412f9a96932746269d6bb Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 15 Oct 2024 18:22:08 +0000 Subject: [PATCH 21/57] Validated in IDC and enabled all the tests in pinecone Signed-off-by: Pallavi Jaini --- .../tests/test_compose_pinecone_on_xeon.sh | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 822482aec..12c56c704 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -8,7 +8,7 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} -export TAG=latest +export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -143,29 +143,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file -> Not able to connect to pinecone server from test server so commenting out + # test /v1/dataprep/delete_file + validate_services \ + "http://${ip_address}:6009/v1/dataprep/delete_file" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-pinecone-server" + + + # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - #validate_services \ - # "http://${ip_address}:6007/v1/dataprep" \ - # "Data preparation succeeded" \ - # "dataprep_upload_file" \ - # "dataprep-pinecone-server" - - # test /v1/dataprep/delete_file - #validate_services \ - # "http://${ip_address}:6009/v1/dataprep/delete_file" \ - # '{"status":true}' \ - # "dataprep_del" \ - # "dataprep-pinecone-server" + validate_services \ + "http://${ip_address}:6007/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "dataprep-pinecone-server" + # retrieval microservice - #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - #validate_services \ - # "${ip_address}:7000/v1/retrieval" \ - # " " \ - # "retrieval" \ - # "retriever-pinecone-server" \ - # "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" # tei for rerank microservice echo "Validating reranking service" @@ -269,10 +271,8 @@ function main() { elif [ "${mode}" == "" ]; then validate_microservices echo "==== microservices validated ====" - #validate_megaservice + validate_megaservice echo "==== megaservice validated ====" - validate_frontend - echo "==== frontend validated ====" fi stop_docker From 93e37804902cc11e31327594ab43b3f40470c19d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:23:35 +0000 Subject: [PATCH 22/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 12c56c704..0a624188e 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -151,7 +151,7 @@ function validate_microservices() { "dataprep-pinecone-server" - # test /v1/dataprep upload file + # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_services \ "http://${ip_address}:6007/v1/dataprep" \ @@ -159,7 +159,7 @@ function validate_microservices() { "dataprep_upload_file" \ "dataprep-pinecone-server" - + # retrieval microservice test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") validate_services \ From 9c30df4e9322129364d2ee4ca71e5294d4d972e9 Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Tue, 6 Aug 2024 19:08:20 +0000 Subject: [PATCH 23/57] Added the pinecone support documentation and docker compose file Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 402 ++++++++++++++++++ .../docker/xeon/docker_compose_pinecone.yaml | 199 +++++++++ 2 files changed, 601 insertions(+) create mode 100644 ChatQnA/docker/xeon/README_pinecone.md create mode 100644 ChatQnA/docker/xeon/docker_compose_pinecone.yaml diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md new file mode 100644 index 000000000..a9f29e32b --- /dev/null +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -0,0 +1,402 @@ +# Build Mega Service of ChatQnA (with Pinecone) on Xeon + +This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. + +## 🚀 Apply Xeon Server on AWS + +To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage the power of 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads. + +For detailed information about these instance types, you can refer to this [link](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options. + +After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. + +**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands** + +> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow + +``` + +data_prep_service +===================== +Port 6007 - Open to 0.0.0.0/0 +Port 6008 - Open to 0.0.0.0/0 + +tei_embedding_service +===================== +Port 6006 - Open to 0.0.0.0/0 + +embedding +========= +Port 6000 - Open to 0.0.0.0/0 + +retriever +========= +Port 7000 - Open to 0.0.0.0/0 + +tei_xeon_service +================ +Port 8808 - Open to 0.0.0.0/0 + +reranking +========= +Port 8000 - Open to 0.0.0.0/0 + +tgi-service +=========== +Port 9009 - Open to 0.0.0.0/0 + +llm +=== +Port 9000 - Open to 0.0.0.0/0 + +chaqna-xeon-backend-server +========================== +Port 8888 - Open to 0.0.0.0/0 + +chaqna-xeon-ui-server +===================== +Port 5173 - Open to 0.0.0.0/0 +``` + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +``` + +### 1. Build Embedding Image + +```bash +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . +``` + +### 2. Build Retriever Image + +```bash +docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . +``` + +### 3. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile . +``` + +### 4. Build LLM Image + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 5. Build Dataprep Image + +```bash +docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile . +cd .. +``` + +### 6. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/docker +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +cd ../../.. +``` + +### 7. Build UI Docker Image + +Build frontend Docker image via below command: + +```bash +cd GenAIExamples/ChatQnA/docker/ui/ +docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +cd ../../../.. +``` + +### 8. Build Conversational React UI Docker Image (Optional) + +Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command: + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ChatQnA/docker/ui/ +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file" +docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react . +cd ../../../.. +``` + +Then run the command `docker images`, you will have the following 7 Docker Images: + +1. `opea/dataprep-pinecone:latest` +2. `opea/embedding-tei:latest` +3. `opea/retriever-pinecone:latest` +4. `opea/reranking-tei:latest` +5. `opea/llm-tgi:latest` +6. `opea/chatqna:latest` +7. `opea/chatqna-ui:latest` + +## 🚀 Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + + +**Get the PINECONE_API_KEY and the INDEX_NAME + +``` +export pinecone_api_key=${api_key} +export pinecone_index_name=${pinecone_index} +``` + +```bash +export no_proxy=${your_no_proxy} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export PINECONE_API_KEY=${pinecone_api_key} +export PINECONE_INDEX_NAME=${pinecone_index_name} +export INDEX_NAME=${pinecone_index_name} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file + +```bash +cd GenAIExamples/ChatQnA/docker/xeon/ +docker compose up -d +``` + +### Validate Microservices + +1. TEI Embedding Service + +```bash +curl ${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +2. Embedding Microservice + +```bash +curl http://${host_ip}:6000/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' +``` + +3. Retriever Microservice + To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script: + +```Python +import random +embedding = [random.uniform(-1, 1) for _ in range(768)] +print(embedding) +``` + +Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command: + +```bash +curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \ + -H 'Content-Type: application/json' +``` + +4. TEI Reranking Service + +```bash +curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +5. Reranking Microservice + +```bash +curl http://${host_ip}:8000/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` + +6. TGI Service + +```bash +curl http://${host_ip}:9009/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +7. LLM Microservice + +```bash +curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +8. MegaService + +```bash +curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' +``` + +9. Dataprep Microservice(Optional) + +If you want to update the default knowledge base, you can use the following commands: + +Update Knowledge Base via Local File Upload: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" +``` + +This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + +Add Knowledge Base via HTTP Links: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' +``` + +This command updates a knowledge base by submitting a list of HTTP links for processing. + +Also, you are able to get the file list that you uploaded: + +```bash +curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ + -H "Content-Type: application/json" +``` + +To delete the file/link you uploaded: + +```bash +# delete link +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "https://opea.dev"}' \ + -H "Content-Type: application/json" + +# delete file +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + +# delete all uploaded files and links +curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" +``` + +## Enable LangSmith for Monotoring Application (Optional) + +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. + +Here's how you can do it: + +1. Install the latest version of LangSmith: + +```bash +pip install -U langsmith +``` + +2. Set the necessary environment variables: + +```bash +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=ls_... +``` + +## 🚀 Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-gaudi-ui-server: + image: opea/chatqna-ui:latest + ... + ports: + - "80:5173" +``` + +## 🚀 Launch the Conversational UI (react) + +To access the Conversational UI frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +![project-screenshot](../../assets/img/chat_ui_init.png) + +Here is an example of running ChatQnA: + +![project-screenshot](../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../assets/img/conversation_ui_response.png) diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml new file mode 100644 index 000000000..9c4ae2eb2 --- /dev/null +++ b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml @@ -0,0 +1,199 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + dataprep-pinecone-service: + image: opea/dataprep-pinecone:latest + container_name: dataprep-pinecone-server + depends_on: + - tei-embedding-service + ports: + - "6007:6007" + - "6008:6008" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PINECONE_API_KEY: ${PINECONE_API_KEY} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: opea/embedding-tei:latest + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: opea/retriever-pinecone:latest + container_name: retriever-pinecone-server + ports: + - "7000:7000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PINECONE_API_KEY: ${PINECONE_API_KEY} + INDEX_NAME: ${PINECONE_INDEX_NAME} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: opea/reranking-tei:latest + container_name: reranking-tei-xeon-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.1.0 + container_name: tgi-service + ports: + - "9009:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${LLM_MODEL_ID} + llm: + image: opea/llm-tgi:latest + container_name: llm-tgi-server + depends_on: + - tgi-service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + chaqna-xeon-backend-server: + image: opea/chatqna:latest + container_name: chatqna-xeon-backend-server + depends_on: + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP} + - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP} + - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + chaqna-xeon-ui-server: + image: opea/chatqna-ui:latest + container_name: chatqna-xeon-ui-server + depends_on: + - chaqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT} + - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT} + - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} + ipc: host + restart: always + chaqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ports: + - 5174:80 + depends_on: + - chaqna-xeon-backend-server + ipc: host + restart: always + +networks: + default: + driver: bridge From 22e918fb395b78748aa1ad70cae607b15de4e971 Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Wed, 7 Aug 2024 06:14:54 +0000 Subject: [PATCH 24/57] Updated the readme for pinecone Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index a9f29e32b..44e8e225e 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -148,7 +148,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image ### Setup Environment Variables -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. +Since the `docker_compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** @@ -213,7 +213,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc ```bash cd GenAIExamples/ChatQnA/docker/xeon/ -docker compose up -d +docker compose -f docker_compose_pinecone.yaml up -d ``` ### Validate Microservices @@ -329,28 +329,9 @@ curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ -H "Content-Type: application/json" ``` -To delete the file/link you uploaded: - -```bash -# delete link -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "https://opea.dev"}' \ - -H "Content-Type: application/json" - -# delete file -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "nke-10k-2023.pdf"}' \ - -H "Content-Type: application/json" - -# delete all uploaded files and links -curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \ - -d '{"file_path": "all"}' \ - -H "Content-Type: application/json" -``` - ## Enable LangSmith for Monotoring Application (Optional) -LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. Here's how you can do it: From 20adebe6b429a1a8a3193ab8aa1d414b7db6cfd1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 06:26:41 +0000 Subject: [PATCH 25/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index 44e8e225e..5c1a50c2e 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -172,8 +172,7 @@ export your_hf_api_token="Your_Huggingface_API_Token" export your_no_proxy=${your_no_proxy},"External_Public_IP" ``` - -**Get the PINECONE_API_KEY and the INDEX_NAME +\*\*Get the PINECONE_API_KEY and the INDEX_NAME ``` export pinecone_api_key=${api_key} From 6bae689097d787f5ad87337a48d61ae1e7c884e6 Mon Sep 17 00:00:00 2001 From: pallavi jaini Date: Thu, 8 Aug 2024 21:37:10 +0000 Subject: [PATCH 26/57] Added port 6009 to pinecone Signed-off-by: pallavi jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/docker_compose_pinecone.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml index 9c4ae2eb2..103a1a911 100644 --- a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml +++ b/ChatQnA/docker/xeon/docker_compose_pinecone.yaml @@ -13,6 +13,7 @@ services: ports: - "6007:6007" - "6008:6008" + - "6009:6009" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} From aa213523a76572d93daa42a761407dfc0f6cbc8c Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 13 Aug 2024 15:20:38 -0700 Subject: [PATCH 27/57] Added the tests and renamed the files Signed-off-by: Pallavi Jaini Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker/xeon/README_pinecone.md | 6 +- ...se_pinecone.yaml => compose_pinecone.yaml} | 0 .../tests/_test_chatqna_pinecone_on_xeon.sh | 232 ++++++++++++++++++ 3 files changed, 235 insertions(+), 3 deletions(-) rename ChatQnA/docker/xeon/{docker_compose_pinecone.yaml => compose_pinecone.yaml} (100%) create mode 100644 ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker/xeon/README_pinecone.md index 5c1a50c2e..2b365c70f 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker/xeon/README_pinecone.md @@ -148,7 +148,7 @@ Then run the command `docker images`, you will have the following 7 Docker Image ### Setup Environment Variables -Since the `docker_compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. +Since the `compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below. **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** @@ -212,7 +212,7 @@ Note: Please replace with `host_ip` with you external IP address, do not use loc ```bash cd GenAIExamples/ChatQnA/docker/xeon/ -docker compose -f docker_compose_pinecone.yaml up -d +docker compose -f compose_pinecone.yaml up -d ``` ### Validate Microservices @@ -330,7 +330,7 @@ curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \ ## Enable LangSmith for Monotoring Application (Optional) -LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f docker_compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. +LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key. Here's how you can do it: diff --git a/ChatQnA/docker/xeon/docker_compose_pinecone.yaml b/ChatQnA/docker/xeon/compose_pinecone.yaml similarity index 100% rename from ChatQnA/docker/xeon/docker_compose_pinecone.yaml rename to ChatQnA/docker/xeon/compose_pinecone.yaml diff --git a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh new file mode 100644 index 000000000..392941994 --- /dev/null +++ b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh @@ -0,0 +1,232 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +echo "IMAGE_REPO=${IMAGE_REPO}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + git clone https://github.com/opea-project/GenAIComps.git + cd GenAIComps + + docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile . + docker build -t opea/retriever-pinecone:latest -f comps/retrievers/langchain/pinecone/docker/Dockerfile . + docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . + docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile . + docker build -t opea/dataprep-pinecone:latest -f comps/dataprep/pinecone/docker/Dockerfile . + + cd $WORKPATH/docker + docker build --no-cache -t opea/chatqna:latest -f Dockerfile . + + cd $WORKPATH/docker/ui + docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile . + + docker images +} + +function start_services() { + cd $WORKPATH/docker/xeon + + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" + export TGI_LLM_ENDPOINT="http://${ip_address}:9009" + export PINECONE_API_KEY=${PINECONE_KEY} + export PINECONE_INDEX_NAME="langchain-test" + export INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + export RERANK_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" + export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + + if [[ "$IMAGE_REPO" != "" ]]; then + # Replace the container name with a test-specific name + echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG" + sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_pinecone.yaml + sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_pinecone.yaml + cat compose_pinecone.yaml + fi + + # Start Docker Containers + docker compose -f compose_pinecone.yaml up -d + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-service > tgi_service_start.log + if grep -q Connected tgi_service_start.log; then + break + fi + sleep 1s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tei for embedding service + validate_services \ + "${ip_address}:6006/embed" \ + "\[\[" \ + "tei-embedding" \ + "tei-embedding-server" \ + '{"inputs":"What is Deep Learning?"}' + + # embedding microservice + validate_services \ + "${ip_address}:6000/v1/embeddings" \ + '"text":"What is Deep Learning?","embedding":\[' \ + "embedding" \ + "embedding-tei-server" \ + '{"text":"What is Deep Learning?"}' + + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # retrieval microservice + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + + # tei for rerank microservice + validate_services \ + "${ip_address}:8808/rerank" \ + '{"index":1,"score":' \ + "tei-rerank" \ + "tei-reranking-server" \ + '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' + + # rerank microservice + validate_services \ + "${ip_address}:8000/v1/reranking" \ + "Deep learning is..." \ + "rerank" \ + "reranking-tei-xeon-server" \ + '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' + + # tgi for llm service + validate_services \ + "${ip_address}:9009/generate" \ + "generated_text" \ + "tgi-llm" \ + "tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-tgi-server" \ + '{"query":"What is Deep Learning?"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:8888/v1/chatqna" \ + "billion" \ + "mega-chatqna" \ + "chatqna-xeon-backend-server" \ + '{"messages": "What is the revenue of Nike in 2023?"}' + +} + +function validate_frontend() { + cd $WORKPATH/docker/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH +# conda remove -n ${conda_env_name} --all -y +# conda create -n ${conda_env_name} python=3.12 -y + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + +# conda install -c conda-forge nodejs -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker/xeon + docker compose stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" && sleep 1s + + validate_microservices + validate_megaservice + validate_frontend + + stop_docker + echo y | docker system prune + +} + +main From d0b1ba36d502fde83f8cb7f9a72bc92db7e8aae3 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 11 Sep 2024 11:42:17 +0530 Subject: [PATCH 28/57] Updated the pinecone tests as per the xeon Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../intel/cpu}/xeon/README_pinecone.md | 8 +- .../intel/cpu}/xeon/compose_pinecone.yaml | 0 ChatQnA/docker_image_build/build.yaml | 12 + .../tests/test_chatqna_pinecone_on_xeon.sh | 258 ++++++++++++++++++ 4 files changed, 274 insertions(+), 4 deletions(-) rename ChatQnA/{docker => docker_compose/intel/cpu}/xeon/README_pinecone.md (98%) rename ChatQnA/{docker => docker_compose/intel/cpu}/xeon/compose_pinecone.yaml (100%) create mode 100644 ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh diff --git a/ChatQnA/docker/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md similarity index 98% rename from ChatQnA/docker/xeon/README_pinecone.md rename to ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index 2b365c70f..6cbf45135 100644 --- a/ChatQnA/docker/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -70,19 +70,19 @@ cd GenAIComps ### 1. Build Embedding Image ```bash -docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . ``` ### 2. Build Retriever Image ```bash -docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . +docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pinecone/langchain/Dockerfile . ``` ### 3. Build Rerank Image ```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/docker/Dockerfile . +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/langchain/Dockerfile . ``` ### 4. Build LLM Image @@ -94,7 +94,7 @@ docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_pr ### 5. Build Dataprep Image ```bash -docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile . +docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile . cd .. ``` diff --git a/ChatQnA/docker/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml similarity index 100% rename from ChatQnA/docker/xeon/compose_pinecone.yaml rename to ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 390231320..168a78a6f 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -53,6 +53,12 @@ services: dockerfile: comps/retrievers/qdrant/haystack/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest} + retriever-pinecone: + build: + context: GenAIComps + dockerfile: comps/retrievers/pinecone/langchain/Dockerfile + extends: chatqna + image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest} reranking-tei: build: context: GenAIComps @@ -107,6 +113,12 @@ services: dockerfile: comps/dataprep/qdrant/langchain/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest} + dataprep-pinecone: + build: + context: GenAIComps + dockerfile: comps/dataprep/pinecone/langchain/Dockerfile + extends: chatqna + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} guardrails-tgi: build: context: GenAIComps diff --git a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh new file mode 100644 index 000000000..5cecd1ea9 --- /dev/null +++ b/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-pinecone embedding-tei retriever-pinecone reranking-tei llm-tgi" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker/xeon + + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export RERANK_MODEL_ID="BAAI/bge-reranker-base" + export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" + export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" + export TGI_LLM_ENDPOINT="http://${ip_address}:9009" + export PINECONE_API_KEY=${PINECONE_KEY} + export PINECONE_INDEX_NAME="langchain-test" + export INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export EMBEDDING_SERVICE_HOST_IP=${ip_address} + export RETRIEVER_SERVICE_HOST_IP=${ip_address} + export RERANK_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" + export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_pinecone.yaml up -d + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs tgi-service > tgi_service_start.log + if grep -q Connected tgi_service_start.log; then + break + fi + sleep 1s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tei for embedding service + validate_services \ + "${ip_address}:6006/embed" \ + "\[\[" \ + "tei-embedding" \ + "tei-embedding-server" \ + '{"inputs":"What is Deep Learning?"}' + + # embedding microservice + validate_services \ + "${ip_address}:6000/v1/embeddings" \ + '"text":"What is Deep Learning?","embedding":\[' \ + "embedding" \ + "embedding-tei-server" \ + '{"text":"What is Deep Learning?"}' + + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # test /v1/dataprep upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_service \ + "http://${ip_address}:6007/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "dataprep-pinecone-server" + + # test /v1/dataprep/delete_file + validate_service \ + "http://${ip_address}:6007/v1/dataprep/delete_file" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-pinecone-server" + + # retrieval microservice + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + + # tei for rerank microservice + validate_services \ + "${ip_address}:8808/rerank" \ + '{"index":1,"score":' \ + "tei-rerank" \ + "tei-reranking-server" \ + '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' + + # rerank microservice + validate_services \ + "${ip_address}:8000/v1/reranking" \ + "Deep learning is..." \ + "rerank" \ + "reranking-tei-xeon-server" \ + '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' + + # tgi for llm service + validate_services \ + "${ip_address}:9009/generate" \ + "generated_text" \ + "tgi-llm" \ + "tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-tgi-server" \ + '{"query":"What is Deep Learning?"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:8888/v1/chatqna" \ + "billion" \ + "mega-chatqna" \ + "chatqna-xeon-backend-server" \ + '{"messages": "What is the revenue of Nike in 2023?"}' + +} + +function validate_frontend() { + echo "[ TEST INFO ]: --------- frontend test started ---------" + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + echo "[ TEST INFO ]: --------- conda env activated ---------" + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose stop -f compose_pinecone.yaml && docker compose rm -f compose_pinecone.yaml +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + start_time=$(date +%s) + start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" && sleep 1s + + if [ "${mode}" == "perf" ]; then + python3 $WORKPATH/tests/chatqna_benchmark.py + elif [ "${mode}" == "" ]; then + validate_microservices + echo "==== microservices validated ====" + validate_megaservice + echo "==== megaservice validated ====" + validate_frontend + echo "==== frontend validated ====" + fi + + stop_docker + echo y | docker system prune + +} + +main From 1345ea8b06430bdf58916444a880c69bd82ed44f Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 11 Sep 2024 21:12:19 +0530 Subject: [PATCH 29/57] Updated the Dockerfile pathf or rerank Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index 6cbf45135..decb374c6 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -82,7 +82,7 @@ docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_prox ### 3. Build Rerank Image ```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/langchain/Dockerfile . +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . ``` ### 4. Build LLM Image From 4133cfa995fffc1f2f185782dbe2fcae7e06a2cf Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:33:24 +0530 Subject: [PATCH 30/57] Updated the file name for the tests Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../tests/_test_chatqna_pinecone_on_xeon.sh | 232 ------------------ ...on.sh => test_compose_pinecone_on_xeon.sh} | 2 +- 2 files changed, 1 insertion(+), 233 deletions(-) delete mode 100644 ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh rename ChatQnA/tests/{test_chatqna_pinecone_on_xeon.sh => test_compose_pinecone_on_xeon.sh} (99%) diff --git a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh deleted file mode 100644 index 392941994..000000000 --- a/ChatQnA/tests/_test_chatqna_pinecone_on_xeon.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -e -echo "IMAGE_REPO=${IMAGE_REPO}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - git clone https://github.com/opea-project/GenAIComps.git - cd GenAIComps - - docker build -t opea/embedding-tei:latest -f comps/embeddings/langchain/docker/Dockerfile . - docker build -t opea/retriever-pinecone:latest -f comps/retrievers/langchain/pinecone/docker/Dockerfile . - docker build -t opea/reranking-tei:latest -f comps/reranks/tei/docker/Dockerfile . - docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile . - docker build -t opea/dataprep-pinecone:latest -f comps/dataprep/pinecone/docker/Dockerfile . - - cd $WORKPATH/docker - docker build --no-cache -t opea/chatqna:latest -f Dockerfile . - - cd $WORKPATH/docker/ui - docker build --no-cache -t opea/chatqna-ui:latest -f docker/Dockerfile . - - docker images -} - -function start_services() { - cd $WORKPATH/docker/xeon - - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export RERANK_MODEL_ID="BAAI/bge-reranker-base" - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" - export TGI_LLM_ENDPOINT="http://${ip_address}:9009" - export PINECONE_API_KEY=${PINECONE_KEY} - export PINECONE_INDEX_NAME="langchain-test" - export INDEX_NAME="langchain-test" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_HOST_IP=${ip_address} - export EMBEDDING_SERVICE_HOST_IP=${ip_address} - export RETRIEVER_SERVICE_HOST_IP=${ip_address} - export RERANK_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env - - if [[ "$IMAGE_REPO" != "" ]]; then - # Replace the container name with a test-specific name - echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG" - sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" compose_pinecone.yaml - sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" compose_pinecone.yaml - cat compose_pinecone.yaml - fi - - # Start Docker Containers - docker compose -f compose_pinecone.yaml up -d - n=0 - until [[ "$n" -ge 200 ]]; do - docker logs tgi-service > tgi_service_start.log - if grep -q Connected tgi_service_start.log; then - break - fi - sleep 1s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # tei for embedding service - validate_services \ - "${ip_address}:6006/embed" \ - "\[\[" \ - "tei-embedding" \ - "tei-embedding-server" \ - '{"inputs":"What is Deep Learning?"}' - - # embedding microservice - validate_services \ - "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":\[' \ - "embedding" \ - "embedding-tei-server" \ - '{"text":"What is Deep Learning?"}' - - sleep 1m # retrieval can't curl as expected, try to wait for more time - - # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-pinecone-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" - - # tei for rerank microservice - validate_services \ - "${ip_address}:8808/rerank" \ - '{"index":1,"score":' \ - "tei-rerank" \ - "tei-reranking-server" \ - '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' - - # rerank microservice - validate_services \ - "${ip_address}:8000/v1/reranking" \ - "Deep learning is..." \ - "rerank" \ - "reranking-tei-xeon-server" \ - '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' - - # tgi for llm service - validate_services \ - "${ip_address}:9009/generate" \ - "generated_text" \ - "tgi-llm" \ - "tgi-service" \ - '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' - - # llm microservice - validate_services \ - "${ip_address}:9000/v1/chat/completions" \ - "data: " \ - "llm" \ - "llm-tgi-server" \ - '{"query":"What is Deep Learning?"}' - -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/chatqna" \ - "billion" \ - "mega-chatqna" \ - "chatqna-xeon-backend-server" \ - '{"messages": "What is the revenue of Nike in 2023?"}' - -} - -function validate_frontend() { - cd $WORKPATH/docker/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH -# conda remove -n ${conda_env_name} --all -y -# conda create -n ${conda_env_name} python=3.12 -y - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - -# conda install -c conda-forge nodejs -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker/xeon - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi - start_time=$(date +%s) - start_services - end_time=$(date +%s) - duration=$((end_time-start_time)) - echo "Mega service start duration is $duration s" && sleep 1s - - validate_microservices - validate_megaservice - validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh similarity index 99% rename from ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh rename to ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 5cecd1ea9..9ae9d06c7 100644 --- a/ChatQnA/tests/test_chatqna_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { } function start_services() { - cd $WORKPATH/docker/xeon + cd $WORKPATH/docker_compose/intel/cpu/xeon/ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" From 1a3247cbfb6c25be85dfe955cca38f5bb741a3ba Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:53:34 +0530 Subject: [PATCH 31/57] Corrected the docker compose for pinecone Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 9ae9d06c7..32831f012 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -54,7 +54,7 @@ function start_services() { sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env # Start Docker Containers - docker compose -f compose_pinecone.yaml up -d + docker compose up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From 49b8708b8dc43b85c8779f300dc15c076124cf25 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Fri, 13 Sep 2024 10:54:34 +0530 Subject: [PATCH 32/57] Corrected the docker compose for pinecone Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 32831f012..6ee9147ee 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -55,6 +55,7 @@ function start_services() { # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From 75135b02306024d99ff899e32068929af7930944 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 05:25:14 +0000 Subject: [PATCH 33/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 6ee9147ee..aa9ebcbd7 100644 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -55,7 +55,7 @@ function start_services() { # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - + n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > tgi_service_start.log From d5da89140ea3dd9bc3d339372d1e9e5e32f7bdfd Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Sat, 14 Sep 2024 05:42:16 +0530 Subject: [PATCH 34/57] Corrected the test script issue Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 ChatQnA/tests/test_compose_pinecone_on_xeon.sh diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh old mode 100644 new mode 100755 index aa9ebcbd7..0a562a9e3 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -51,7 +51,7 @@ function start_services() { export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" - sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log @@ -226,8 +226,9 @@ function validate_frontend() { } function stop_docker() { + echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop -f compose_pinecone.yaml && docker compose rm -f compose_pinecone.yaml + docker compose -f compose_pinecone.yaml down } function main() { From 468957d21e7d2a4bf147c01a2d16b6127b2d71a6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Sep 2024 00:14:39 +0000 Subject: [PATCH 35/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 0a562a9e3..2c3413910 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -228,7 +228,7 @@ function validate_frontend() { function stop_docker() { echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_pinecone.yaml down + docker compose -f compose_pinecone.yaml down } function main() { From 17fd96b4e435e1d32e68acaf08a0ef712d45c302 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 04:14:21 +0530 Subject: [PATCH 36/57] Fixed the issues with tests Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- .../intel/cpu/xeon/compose_pinecone.yaml | 1 + ChatQnA/docker_image_build/build.yaml | 2 +- .../tests/test_compose_pinecone_on_xeon.sh | 26 +++++++++++++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 103a1a911..8f1d9ad84 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -67,6 +67,7 @@ services: PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 168a78a6f..5b3e8813e 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -118,7 +118,7 @@ services: context: GenAIComps dockerfile: comps/dataprep/pinecone/langchain/Dockerfile extends: chatqna - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} guardrails-tgi: build: context: GenAIComps diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 2c3413910..676e12a19 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -28,9 +28,22 @@ function build_docker_images() { docker images && sleep 1s } +function build_pinecone_docker_images() { + echo "In pinecone build" + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="dataprep-pinecone retriever-pinecone" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export no_proxy=${no_proxy},${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" @@ -54,7 +67,7 @@ function start_services() { sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do @@ -94,6 +107,7 @@ function validate_services() { else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi + # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" @@ -111,7 +125,7 @@ function validate_microservices() { # tei for embedding service validate_services \ "${ip_address}:6006/embed" \ - "\[\[" \ + "[[" \ "tei-embedding" \ "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' @@ -119,7 +133,7 @@ function validate_microservices() { # embedding microservice validate_services \ "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":\[' \ + '"text":"What is Deep Learning?","embedding":[' \ "embedding" \ "embedding-tei-server" \ '{"text":"What is Deep Learning?"}' @@ -128,14 +142,14 @@ function validate_microservices() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - validate_service \ + validate_services \ "http://${ip_address}:6007/v1/dataprep" \ "Data preparation succeeded" \ "dataprep_upload_file" \ "dataprep-pinecone-server" # test /v1/dataprep/delete_file - validate_service \ + validate_services \ "http://${ip_address}:6007/v1/dataprep/delete_file" \ '{"status":true}' \ "dataprep_del" \ @@ -226,6 +240,7 @@ function validate_frontend() { } function stop_docker() { + echo "In stop docker" echo $WORKPATH cd $WORKPATH/docker_compose/intel/cpu/xeon/ docker compose -f compose_pinecone.yaml down @@ -235,6 +250,7 @@ function main() { stop_docker if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi + build_pinecone_docker_images start_time=$(date +%s) start_services end_time=$(date +%s) From f32236f14c1fa9ea30f8eb16b39a4b68ec09a36c Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 04:21:39 +0530 Subject: [PATCH 37/57] Fixed the links Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md index decb374c6..f730a91ae 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md @@ -371,12 +371,12 @@ To access the Conversational UI frontend, open the following URL in your browser - "80:80" ``` -![project-screenshot](../../assets/img/chat_ui_init.png) +![project-screenshot](../../../../assets/img/chat_ui_init.png) Here is an example of running ChatQnA: -![project-screenshot](../../assets/img/chat_ui_response.png) +![project-screenshot](../../../../assets/img/chat_ui_response.png) Here is an example of running ChatQnA with Conversational UI (React): -![project-screenshot](../../assets/img/conversation_ui_response.png) +![project-screenshot](../../../../assets/img/conversation_ui_response.png) From 429be00667499bb54e684065a89fab1c0ddc600d Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 11:30:43 +0530 Subject: [PATCH 38/57] Corrected the tag Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 676e12a19..ffdd067e3 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -8,7 +8,7 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} +export TAG=latest WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" From 1ab8626556128eb7dab9244d9a50d1cc18a72333 Mon Sep 17 00:00:00 2001 From: AI Workloads Date: Wed, 18 Sep 2024 21:03:11 +0530 Subject: [PATCH 39/57] Corrected the ports Signed-off-by: AI Workloads Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index ffdd067e3..0c51d75e8 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -61,8 +61,8 @@ function start_services() { export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file" + export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file" + export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file" sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -150,7 +150,7 @@ function validate_microservices() { # test /v1/dataprep/delete_file validate_services \ - "http://${ip_address}:6007/v1/dataprep/delete_file" \ + "http://${ip_address}:6009/v1/dataprep/delete_file" \ '{"status":true}' \ "dataprep_del" \ "dataprep-pinecone-server" From 05a6b655f349c8ce1072d40eced207f4e1593d6c Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 24 Sep 2024 14:10:50 -0700 Subject: [PATCH 40/57] Commented out dataprep as not able to connect to pineconeserver Signed-off-by: Pallavi Jaini Signed-off-by: Pallavi Jaini --- .../tests/test_compose_pinecone_on_xeon.sh | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 0c51d75e8..84b4647b6 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -140,31 +140,32 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file + # test /v1/dataprep upload file -> Not able to connect to pinecone server from test server so commenting out echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - validate_services \ - "http://${ip_address}:6007/v1/dataprep" \ - "Data preparation succeeded" \ - "dataprep_upload_file" \ - "dataprep-pinecone-server" + #validate_services \ + # "http://${ip_address}:6007/v1/dataprep" \ + # "Data preparation succeeded" \ + # "dataprep_upload_file" \ + # "dataprep-pinecone-server" # test /v1/dataprep/delete_file - validate_services \ - "http://${ip_address}:6009/v1/dataprep/delete_file" \ - '{"status":true}' \ - "dataprep_del" \ - "dataprep-pinecone-server" - + #validate_services \ + # "http://${ip_address}:6009/v1/dataprep/delete_file" \ + # '{"status":true}' \ + # "dataprep_del" \ + # "dataprep-pinecone-server" + # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-pinecone-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + #validate_services \ + # "${ip_address}:7000/v1/retrieval" \ + # " " \ + # "retrieval" \ + # "retriever-pinecone-server" \ + # "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" # tei for rerank microservice + echo "Validating reranking service" validate_services \ "${ip_address}:8808/rerank" \ '{"index":1,"score":' \ @@ -173,6 +174,7 @@ function validate_microservices() { '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' # rerank microservice + echo "Validating reranking micro service" validate_services \ "${ip_address}:8000/v1/reranking" \ "Deep learning is..." \ @@ -181,6 +183,7 @@ function validate_microservices() { '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' # tgi for llm service + echo "Validating llm service" validate_services \ "${ip_address}:9009/generate" \ "generated_text" \ @@ -189,6 +192,7 @@ function validate_microservices() { '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # llm microservice + echo "Validating llm microservice" validate_services \ "${ip_address}:9000/v1/chat/completions" \ "data: " \ From 70b71590c45001c3f68dc37fb95b98be076ed98f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:11:29 +0000 Subject: [PATCH 41/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 84b4647b6..e4c48f1eb 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -154,7 +154,7 @@ function validate_microservices() { # '{"status":true}' \ # "dataprep_del" \ # "dataprep-pinecone-server" - + # retrieval microservice #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") #validate_services \ From 7628f33553ab284d001d851d4d56c16c576f4371 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 24 Sep 2024 21:25:40 -0700 Subject: [PATCH 42/57] Added more comments Signed-off-by: Pallavi Jaini Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index e4c48f1eb..822482aec 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -107,7 +107,10 @@ function validate_services() { else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - + echo "Response" + echo $RESPONSE_BODY + echo "Expected Result" + echo $EXPECTED_RESULT # check response body if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" @@ -266,7 +269,7 @@ function main() { elif [ "${mode}" == "" ]; then validate_microservices echo "==== microservices validated ====" - validate_megaservice + #validate_megaservice echo "==== megaservice validated ====" validate_frontend echo "==== frontend validated ====" From 98d8eb45822639888f09a54cd7ba3cb31957d49b Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 15 Oct 2024 18:22:08 +0000 Subject: [PATCH 43/57] Validated in IDC and enabled all the tests in pinecone Signed-off-by: Pallavi Jaini --- .../tests/test_compose_pinecone_on_xeon.sh | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 822482aec..12c56c704 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -8,7 +8,7 @@ IMAGE_TAG=${IMAGE_TAG:-"latest"} echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} -export TAG=latest +export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -143,29 +143,31 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time - # test /v1/dataprep upload file -> Not able to connect to pinecone server from test server so commenting out + # test /v1/dataprep/delete_file + validate_services \ + "http://${ip_address}:6009/v1/dataprep/delete_file" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-pinecone-server" + + + # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - #validate_services \ - # "http://${ip_address}:6007/v1/dataprep" \ - # "Data preparation succeeded" \ - # "dataprep_upload_file" \ - # "dataprep-pinecone-server" - - # test /v1/dataprep/delete_file - #validate_services \ - # "http://${ip_address}:6009/v1/dataprep/delete_file" \ - # '{"status":true}' \ - # "dataprep_del" \ - # "dataprep-pinecone-server" + validate_services \ + "http://${ip_address}:6007/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "dataprep-pinecone-server" + # retrieval microservice - #test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - #validate_services \ - # "${ip_address}:7000/v1/retrieval" \ - # " " \ - # "retrieval" \ - # "retriever-pinecone-server" \ - # "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_services \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-pinecone-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" # tei for rerank microservice echo "Validating reranking service" @@ -269,10 +271,8 @@ function main() { elif [ "${mode}" == "" ]; then validate_microservices echo "==== microservices validated ====" - #validate_megaservice + validate_megaservice echo "==== megaservice validated ====" - validate_frontend - echo "==== frontend validated ====" fi stop_docker From 19f95bf1993974a3ea8f5dc63b3d1e8ba5b88603 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:23:35 +0000 Subject: [PATCH 44/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 12c56c704..0a624188e 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -151,7 +151,7 @@ function validate_microservices() { "dataprep-pinecone-server" - # test /v1/dataprep upload file + # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt validate_services \ "http://${ip_address}:6007/v1/dataprep" \ @@ -159,7 +159,7 @@ function validate_microservices() { "dataprep_upload_file" \ "dataprep-pinecone-server" - + # retrieval microservice test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") validate_services \ From 6aa1720f5f3173a74e88913d8bdcece50582e6ca Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Wed, 16 Oct 2024 05:27:49 +0000 Subject: [PATCH 45/57] Implemented all review comments Signed-off-by: Pallavi Jaini --- .../intel/cpu/xeon/compose_pinecone.yaml | 16 ++++++++-------- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 15 +-------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 8f1d9ad84..ef2df2b7e 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -6,7 +6,7 @@ version: "3.8" services: dataprep-pinecone-service: - image: opea/dataprep-pinecone:latest + image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest} container_name: dataprep-pinecone-server depends_on: - tei-embedding-service @@ -37,7 +37,7 @@ services: https_proxy: ${https_proxy} command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: - image: opea/embedding-tei:latest + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} container_name: embedding-tei-server depends_on: - tei-embedding-service @@ -54,7 +54,7 @@ services: LANGCHAIN_PROJECT: "opea-embedding-service" restart: unless-stopped retriever: - image: opea/retriever-pinecone:latest + image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest} container_name: retriever-pinecone-server ports: - "7000:7000" @@ -86,7 +86,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: - image: opea/reranking-tei:latest + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-xeon-server depends_on: - tei-reranking-service @@ -122,7 +122,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} llm: - image: opea/llm-tgi:latest + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-server depends_on: - tgi-service @@ -142,7 +142,7 @@ services: LANGCHAIN_PROJECT: "opea-llm-service" restart: unless-stopped chaqna-xeon-backend-server: - image: opea/chatqna:latest + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server depends_on: - tei-embedding-service @@ -166,7 +166,7 @@ services: ipc: host restart: always chaqna-xeon-ui-server: - image: opea/chatqna-ui:latest + image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} container_name: chatqna-xeon-ui-server depends_on: - chaqna-xeon-backend-server @@ -183,7 +183,7 @@ services: ipc: host restart: always chaqna-xeon-conversation-ui-server: - image: opea/chatqna-conversation-ui:latest + image: ${REGISTRY:-opea}/chatqna-conversation-ui:${TAG:-latest} container_name: chatqna-xeon-conversation-ui-server environment: - no_proxy=${no_proxy} diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 0a624188e..5d5768c5e 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -28,18 +28,6 @@ function build_docker_images() { docker images && sleep 1s } -function build_pinecone_docker_images() { - echo "In pinecone build" - cd $WORKPATH/docker_image_build - git clone https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="dataprep-pinecone retriever-pinecone" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ @@ -258,8 +246,7 @@ function stop_docker() { function main() { stop_docker - if [[ "$IMAGE_REPO" == "" ]]; then build_docker_images; fi - build_pinecone_docker_images + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_time=$(date +%s) start_services end_time=$(date +%s) From 822d1093b68c303e1ead704bada966042666a79d Mon Sep 17 00:00:00 2001 From: root Date: Wed, 16 Oct 2024 07:09:15 +0000 Subject: [PATCH 46/57] Updated the pinecone key Signed-off-by: root --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 5d5768c5e..2422f359b 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -38,7 +38,7 @@ function start_services() { export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" export TGI_LLM_ENDPOINT="http://${ip_address}:9009" - export PINECONE_API_KEY=${PINECONE_KEY} + export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST} export PINECONE_INDEX_NAME="langchain-test" export INDEX_NAME="langchain-test" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} From 15362dccdf83bc448164aa0d351e01a4bd078fd9 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Thu, 17 Oct 2024 23:04:21 -0700 Subject: [PATCH 47/57] Made changes to reflect the new framework Signed-off-by: Pallavi Jaini --- .../intel/cpu/xeon/compose_pinecone.yaml | 103 +++++------------- .../tests/test_compose_pinecone_on_xeon.sh | 45 +------- 2 files changed, 31 insertions(+), 117 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index ef2df2b7e..7a2735fe9 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -20,7 +20,7 @@ services: https_proxy: ${https_proxy} PINECONE_API_KEY: ${PINECONE_API_KEY} PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: @@ -36,23 +36,6 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - embedding: - image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} - container_name: embedding-tei-server - depends_on: - - tei-embedding-service - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-embedding-service" - restart: unless-stopped retriever: image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest} container_name: retriever-pinecone-server @@ -66,7 +49,7 @@ services: INDEX_NAME: ${PINECONE_INDEX_NAME} PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: @@ -85,28 +68,8 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate - reranking: - image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} - container_name: reranking-tei-xeon-server - depends_on: - - tei-reranking-service - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-reranking-service" - restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.1.0 + image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu container_name: tgi-service ports: - "9009:80" @@ -121,48 +84,30 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} - llm: - image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} - container_name: llm-tgi-server - depends_on: - - tgi-service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} - LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} - LANGCHAIN_PROJECT: "opea-llm-service" - restart: unless-stopped chaqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server depends_on: - tei-embedding-service - - embedding + - dataprep-pinecone-service - retriever - tei-reranking-service - - reranking - tgi-service - - llm ports: - "8888:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP} - - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP} - - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server + - EMBEDDING_SERVER_HOST_IP=tei-embedding-service + - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} + - RETRIEVER_SERVICE_HOST_IP=retriever + - RERANK_SERVER_HOST_IP=tei-reranking-service + - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=tgi-service + - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} + - LOGFLAG=${LOGFLAG} ipc: host restart: always chaqna-xeon-ui-server: @@ -182,17 +127,25 @@ services: - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} ipc: host restart: always - chaqna-xeon-conversation-ui-server: - image: ${REGISTRY:-opea}/chatqna-conversation-ui:${TAG:-latest} - container_name: chatqna-xeon-conversation-ui-server +chatqna-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: chatqna-xeon-nginx-server + depends_on: + - chatqna-xeon-backend-server + - chatqna-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - ports: - - 5174:80 - depends_on: - - chaqna-xeon-backend-server + - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server + - FRONTEND_SERVICE_PORT=5173 + - BACKEND_SERVICE_NAME=chatqna + - BACKEND_SERVICE_IP=chatqna-xeon-backend-server + - BACKEND_SERVICE_PORT=8888 + - DATAPREP_SERVICE_IP=dataprep-pinecone-service + - DATAPREP_SERVICE_PORT=6007 ipc: host restart: always diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 2422f359b..ed2234027 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -19,10 +19,10 @@ function build_docker_images() { git clone https://github.com/opea-project/GenAIComps.git echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-pinecone embedding-tei retriever-pinecone reranking-tei llm-tgi" + service_list="chatqna chatqna-ui dataprep-pinecone retriever-pinecone nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 + docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s @@ -35,24 +35,10 @@ function start_services() { export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808" - export TGI_LLM_ENDPOINT="http://${ip_address}:9009" export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST} export PINECONE_INDEX_NAME="langchain-test" export INDEX_NAME="langchain-test" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export MEGA_SERVICE_HOST_IP=${ip_address} - export EMBEDDING_SERVICE_HOST_IP=${ip_address} - export RETRIEVER_SERVICE_HOST_IP=${ip_address} - export RERANK_SERVICE_HOST_IP=${ip_address} - export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna" - export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep" - export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file" - export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file" - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log @@ -121,14 +107,6 @@ function validate_microservices() { "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' - # embedding microservice - validate_services \ - "${ip_address}:6000/v1/embeddings" \ - '"text":"What is Deep Learning?","embedding":[' \ - "embedding" \ - "embedding-tei-server" \ - '{"text":"What is Deep Learning?"}' - sleep 1m # retrieval can't curl as expected, try to wait for more time # test /v1/dataprep/delete_file @@ -166,14 +144,6 @@ function validate_microservices() { "tei-reranking-server" \ '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' - # rerank microservice - echo "Validating reranking micro service" - validate_services \ - "${ip_address}:8000/v1/reranking" \ - "Deep learning is..." \ - "rerank" \ - "reranking-tei-xeon-server" \ - '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' # tgi for llm service echo "Validating llm service" @@ -184,15 +154,6 @@ function validate_microservices() { "tgi-service" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' - # llm microservice - echo "Validating llm microservice" - validate_services \ - "${ip_address}:9000/v1/chat/completions" \ - "data: " \ - "llm" \ - "llm-tgi-server" \ - '{"query":"What is Deep Learning?"}' - } function validate_megaservice() { @@ -200,7 +161,7 @@ function validate_megaservice() { validate_services \ "${ip_address}:8888/v1/chatqna" \ "billion" \ - "mega-chatqna" \ + "chatqna-megaservice" \ "chatqna-xeon-backend-server" \ '{"messages": "What is the revenue of Nike in 2023?"}' From 0b559c6573078c831c5505b26b68064648499c75 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Thu, 17 Oct 2024 23:20:14 -0700 Subject: [PATCH 48/57] Fixed the typos Signed-off-by: Pallavi Jaini --- .../docker_compose/intel/cpu/xeon/compose_pinecone.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 7a2735fe9..18ec2d5e3 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -84,7 +84,7 @@ services: HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} - chaqna-xeon-backend-server: + chatqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server depends_on: @@ -110,11 +110,11 @@ services: - LOGFLAG=${LOGFLAG} ipc: host restart: always - chaqna-xeon-ui-server: + chatqna-xeon-ui-server: image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} container_name: chatqna-xeon-ui-server depends_on: - - chaqna-xeon-backend-server + - chatqna-xeon-backend-server ports: - "5173:5173" environment: @@ -127,7 +127,7 @@ services: - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} ipc: host restart: always -chatqna-xeon-nginx-server: + chatqna-xeon-nginx-server: image: ${REGISTRY:-opea}/nginx:${TAG:-latest} container_name: chatqna-xeon-nginx-server depends_on: From a722ba1efeb6ac59338fc0343e12eabe741a387e Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Mon, 28 Oct 2024 21:52:18 -0700 Subject: [PATCH 49/57] added more logs Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index ed2234027..2425543c7 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -16,13 +16,13 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH/docker_image_build - git clone https://github.com/opea-project/GenAIComps.git + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." service_list="chatqna chatqna-ui dataprep-pinecone retriever-pinecone nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5 + docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s @@ -34,7 +34,7 @@ function start_services() { export no_proxy=${no_proxy},${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST} export PINECONE_INDEX_NAME="langchain-test" export INDEX_NAME="langchain-test" @@ -44,7 +44,7 @@ function start_services() { docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 - until [[ "$n" -ge 200 ]]; do + until [[ "$n" -ge 500 ]]; do docker logs tgi-service > tgi_service_start.log if grep -q Connected tgi_service_start.log; then break From 700671c01e9dd8ba62e04fab2e4ad6270108c839 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Mon, 28 Oct 2024 22:00:51 -0700 Subject: [PATCH 50/57] Updated the log path --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 2425543c7..fdc08482d 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -45,8 +45,8 @@ function start_services() { n=0 until [[ "$n" -ge 500 ]]; do - docker logs tgi-service > tgi_service_start.log - if grep -q Connected tgi_service_start.log; then + docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log + if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then break fi sleep 1s From 810a70b4169972634eff76e0590554dab3ff1ed2 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Mon, 28 Oct 2024 22:02:43 -0700 Subject: [PATCH 51/57] Updated the log path Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index fdc08482d..3ffe85380 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -73,6 +73,7 @@ function validate_services() { RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + # check response status if [ "$HTTP_STATUS" -ne "200" ]; then From 9b5a7e61504d6801e00425ead883304f4b405ae4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 05:03:22 +0000 Subject: [PATCH 52/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 3ffe85380..cdb9eec8d 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -73,7 +73,7 @@ function validate_services() { RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - + # check response status if [ "$HTTP_STATUS" -ne "200" ]; then From 53e50137e03a40b156bef84eccb2397fa1af11b7 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Mon, 11 Nov 2024 23:15:24 -0800 Subject: [PATCH 53/57] Updated the llm image --- .../intel/cpu/xeon/compose_pinecone.yaml | 9 +++----- .../tests/test_compose_pinecone_on_xeon.sh | 22 +++++++++---------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index 18ec2d5e3..f42fd6fd2 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -69,7 +69,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "9009:80" @@ -83,7 +83,7 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - command: --model-id ${LLM_MODEL_ID} + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 chatqna-xeon-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-xeon-backend-server @@ -108,6 +108,7 @@ services: - LLM_SERVER_HOST_IP=tgi-service - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} - LOGFLAG=${LOGFLAG} + - LLM_MODEL=${LLM_MODEL_ID} ipc: host restart: always chatqna-xeon-ui-server: @@ -121,10 +122,6 @@ services: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - - UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT} - - GET_FILE=${DATAPREP_GET_FILE_ENDPOINT} - - DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT} ipc: host restart: always chatqna-xeon-nginx-server: diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index cdb9eec8d..93cd45239 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna chatqna-ui dataprep-pinecone retriever-pinecone nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s @@ -54,7 +54,7 @@ function start_services() { done } -function validate_services() { +function validate_service() { local URL="$1" local EXPECTED_RESULT="$2" local SERVICE_NAME="$3" @@ -101,7 +101,7 @@ function validate_microservices() { # Check if the microservices are running correctly. # tei for embedding service - validate_services \ + validate_service \ "${ip_address}:6006/embed" \ "[[" \ "tei-embedding" \ @@ -111,7 +111,7 @@ function validate_microservices() { sleep 1m # retrieval can't curl as expected, try to wait for more time # test /v1/dataprep/delete_file - validate_services \ + validate_service \ "http://${ip_address}:6009/v1/dataprep/delete_file" \ '{"status":true}' \ "dataprep_del" \ @@ -120,7 +120,7 @@ function validate_microservices() { # test /v1/dataprep upload file echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - validate_services \ + validate_service \ "http://${ip_address}:6007/v1/dataprep" \ "Data preparation succeeded" \ "dataprep_upload_file" \ @@ -129,7 +129,7 @@ function validate_microservices() { # retrieval microservice test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ + validate_service \ "${ip_address}:7000/v1/retrieval" \ " " \ "retrieval" \ @@ -138,7 +138,7 @@ function validate_microservices() { # tei for rerank microservice echo "Validating reranking service" - validate_services \ + validate_service \ "${ip_address}:8808/rerank" \ '{"index":1,"score":' \ "tei-rerank" \ @@ -148,7 +148,7 @@ function validate_microservices() { # tgi for llm service echo "Validating llm service" - validate_services \ + validate_service \ "${ip_address}:9009/generate" \ "generated_text" \ "tgi-llm" \ @@ -159,9 +159,9 @@ function validate_microservices() { function validate_megaservice() { # Curl the Mega Service - validate_services \ + validate_service \ "${ip_address}:8888/v1/chatqna" \ - "billion" \ + "data: " \ "chatqna-megaservice" \ "chatqna-xeon-backend-server" \ '{"messages": "What is the revenue of Nike in 2023?"}' @@ -208,7 +208,7 @@ function stop_docker() { function main() { stop_docker - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + #if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_time=$(date +%s) start_services end_time=$(date +%s) From 2dbdffa1dc70b81ad19313bc0b7cef88865e1bfc Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Mon, 11 Nov 2024 23:15:50 -0800 Subject: [PATCH 54/57] Updated the llm image Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 93cd45239..8c102edf6 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -30,7 +30,6 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - export no_proxy=${no_proxy},${ip_address} export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export RERANK_MODEL_ID="BAAI/bge-reranker-base" From 2dd0339c80cbc0e6de51737778a8050328b85830 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 12 Nov 2024 08:25:37 -0800 Subject: [PATCH 55/57] uncommented building the images --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 8c102edf6..de43d4c16 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -207,7 +207,7 @@ function stop_docker() { function main() { stop_docker - #if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_time=$(date +%s) start_services end_time=$(date +%s) From de13ffeb9230f4c5ef0edf7979dd2b5483c4f226 Mon Sep 17 00:00:00 2001 From: Pallavi Jaini Date: Tue, 12 Nov 2024 08:25:59 -0800 Subject: [PATCH 56/57] uncommented building the images Signed-off-by: Pallavi Jaini --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index de43d4c16..743312461 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -207,7 +207,9 @@ function stop_docker() { function main() { stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_time=$(date +%s) start_services end_time=$(date +%s) From d7b040a9eab832d86564729b292b9048eabf7b49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 16:26:44 +0000 Subject: [PATCH 57/57] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/tests/test_compose_pinecone_on_xeon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 743312461..a95b90c16 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -207,9 +207,9 @@ function stop_docker() { function main() { stop_docker - + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - + start_time=$(date +%s) start_services end_time=$(date +%s)