diff --git a/.github/code_spell_ignore.txt b/.github/code_spell_ignore.txt index e69de29bb..047d573d2 100644 --- a/.github/code_spell_ignore.txt +++ b/.github/code_spell_ignore.txt @@ -0,0 +1,2 @@ +ModelIn +modelin \ No newline at end of file diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml index cfed39c95..9b50d9349 100644 --- a/.github/workflows/_example-workflow.yml +++ b/.github/workflows/_example-workflow.yml @@ -77,6 +77,10 @@ jobs: git clone https://github.com/vllm-project/vllm.git cd vllm && git rev-parse HEAD && cd ../ fi + if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then + git clone https://github.com/HabanaAI/vllm-fork.git + cd vllm-fork && git rev-parse HEAD && cd ../ + fi git clone https://github.com/opea-project/GenAIComps.git cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../ diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml new file mode 100644 index 000000000..4972f398d --- /dev/null +++ b/.github/workflows/check-online-doc-build.yml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Check Online Document Building +permissions: {} + +on: + pull_request: + branches: [main] + paths: + - "**.md" + - "**.rst" + +jobs: + build: + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v4 + with: + path: GenAIExamples + + - name: Checkout docs + uses: actions/checkout@v4 + with: + repository: opea-project/docs + path: docs + + - name: Build Online Document + shell: bash + run: | + echo "build online doc" + cd docs + bash scripts/build.sh diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml index 544c69924..d30562224 100644 --- a/.github/workflows/nightly-docker-build-publish.yml +++ b/.github/workflows/nightly-docker-build-publish.yml @@ -42,7 +42,6 @@ jobs: with: node: gaudi example: ${{ matrix.example }} - inject_commit: true secrets: inherit get-image-list: diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index 3a1c6b0e7..c314bd614 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -68,7 +68,7 @@ jobs: # echo $url_line url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) - response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true if [ "$response" -ne 200 ]; then echo "**********Validation failed, try again**********" response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index d3237e2fe..a922ec031 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -26,7 +26,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste export http_proxy="Your_HTTP_Proxy" export https_proxy="Your_HTTPs_Proxy" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails + export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails ``` 3. Set up other environment variables: @@ -227,7 +227,7 @@ For users in China who are unable to download models directly from Huggingface, export http_proxy="Your_HTTP_Proxy" export https_proxy="Your_HTTPs_Proxy" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails + export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails ``` 3. Set up other environment variables: @@ -257,12 +257,6 @@ If use vllm for llm backend. docker compose -f compose_vllm.yaml up -d ``` -If use vllm-on-ray for llm backend. - -```bash -docker compose -f compose_vllm_ray.yaml up -d -``` - If you want to enable guardrails microservice in the pipeline, please follow the below command instead: ```bash @@ -351,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid }' ``` - ```bash - #vLLM-on-Ray Service - curl http://${host_ip}:8006/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' - ``` - 5. MegaService ```bash diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml deleted file mode 100644 index 1f067e77e..000000000 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - redis-vector-db: - image: redis/redis-stack:7.2.0-v9 - container_name: redis-vector-db - ports: - - "6379:6379" - - "8001:8001" - dataprep-redis-service: - image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} - container_name: dataprep-redis-server - depends_on: - - redis-vector-db - - tei-embedding-service - ports: - - "6007:6007" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: redis://redis-vector-db:6379 - REDIS_HOST: redis-vector-db - INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - tei-embedding-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-gaudi-server - ports: - - "8090:80" - volumes: - - "./data:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - retriever: - image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} - container_name: retriever-redis-server - depends_on: - - redis-vector-db - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - REDIS_URL: redis://redis-vector-db:6379 - REDIS_HOST: redis-vector-db - INDEX_NAME: ${INDEX_NAME} - TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - restart: unless-stopped - tei-reranking-service: - image: ghcr.io/huggingface/tei-gaudi:latest - container_name: tei-reranking-gaudi-server - ports: - - "8808:80" - volumes: - - "./data:/data" - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${RERANK_MODEL_ID} --auto-truncate - vllm-ray-service: - image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest} - container_name: vllm-ray-gaudi-server - ports: - - "8006:8000" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL_ID: ${LLM_MODEL_ID} - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True" - chatqna-gaudi-backend-server: - image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} - container_name: chatqna-gaudi-backend-server - depends_on: - - redis-vector-db - - tei-embedding-service - - retriever - - tei-reranking-service - - vllm-ray-service - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server - - EMBEDDING_SERVER_HOST_IP=tei-embedding-service - - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} - - RETRIEVER_SERVICE_HOST_IP=retriever - - RERANK_SERVER_HOST_IP=tei-reranking-service - - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} - - LLM_SERVER_HOST_IP=vllm-ray-service - - LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000} - - LLM_MODEL=${LLM_MODEL_ID} - - LOGFLAG=${LOGFLAG} - ipc: host - restart: always - chatqna-gaudi-ui-server: - image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} - container_name: chatqna-gaudi-ui-server - depends_on: - - chatqna-gaudi-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - ipc: host - restart: always - chatqna-gaudi-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: chatqna-gaudi-nginx-server - depends_on: - - chatqna-gaudi-backend-server - - chatqna-gaudi-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server - - FRONTEND_SERVICE_PORT=5173 - - BACKEND_SERVICE_NAME=chatqna - - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server - - BACKEND_SERVICE_PORT=8888 - - DATAPREP_SERVICE_IP=dataprep-redis-service - - DATAPREP_SERVICE_PORT=6007 - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index 31ab0549b..24eb39f98 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - export no_proxy="Your_No_Proxy" export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" ``` @@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste ```bash export http_proxy="Your_HTTP_Proxy" export https_proxy="Your_HTTPs_Proxy" + # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" + export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service ``` 3. Set up other environment variables: @@ -156,8 +156,6 @@ Change the `xxx_MODEL_ID` below for your needs. ```bash # Example: host_ip="192.168.1.1" export host_ip="External_Public_IP" - # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - export no_proxy="Your_No_Proxy" export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token" # Example: NGINX_PORT=80 export NGINX_PORT=${your_nginx_port} @@ -168,6 +166,8 @@ Change the `xxx_MODEL_ID` below for your needs. ```bash export http_proxy="Your_HTTP_Proxy" export https_proxy="Your_HTTPs_Proxy" + # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" + export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service ``` 3. Set up other environment variables: diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 9684c0183..b84fa0796 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -77,12 +77,6 @@ services: dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile extends: chatqna image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest} - llm-vllm-ray-hpu: - build: - context: GenAIComps - dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile - extends: chatqna - image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest} dataprep-redis: build: context: GenAIComps diff --git a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh deleted file mode 100644 index d7d1dbe6b..000000000 --- a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh +++ /dev/null @@ -1,183 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -e -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH/docker_image_build - git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu nginx" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/tei-gaudi:latest - docker images && sleep 1s -} - -function start_services() { - - cd $WORKPATH/docker_compose/intel/hpu/gaudi - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export RERANK_MODEL_ID="BAAI/bge-reranker-base" - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" - export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - - # Start Docker Containers - docker compose -f compose_vllm_ray.yaml up -d > ${LOG_PATH}/start_services_with_compose.log - n=0 - until [[ "$n" -ge 100 ]]; do - echo "n=$n" - docker logs vllm-ray-gaudi-server > vllm_ray_service_start.log - if grep -q "Warmup finished" vllm_ray_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # tei for embedding service - validate_services \ - "${ip_address}:8090/embed" \ - "\[\[" \ - "tei-embedding" \ - "tei-embedding-gaudi-server" \ - '{"inputs":"What is Deep Learning?"}' - - sleep 1m # retrieval can't curl as expected, try to wait for more time - - # retrieval microservice - test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - " " \ - "retrieval" \ - "retriever-redis-server" \ - "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" - - # tei for rerank microservice - validate_services \ - "${ip_address}:8808/rerank" \ - '{"index":1,"score":' \ - "tei-rerank" \ - "tei-reranking-gaudi-server" \ - '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' - - # vllm-on-ray for llm service - validate_services \ - "${ip_address}:8006/v1/chat/completions" \ - "content" \ - "vllm-ray-llm" \ - "vllm-ray-gaudi-server" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/chatqna" \ - "data: " \ - "mega-chatqna" \ - "chatqna-gaudi-backend-server" \ - '{"messages": "What is the revenue of Nike in 2023?"}' - -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose_vllm_ray.yaml down -} - -function main() { - - stop_docker - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_time=$(date +%s) - start_services - end_time=$(date +%s) - duration=$((end_time-start_time)) - echo "Mega service start duration is $duration s" - - validate_microservices - validate_megaservice - # validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/EdgeCraftRAG/Dockerfile b/EdgeCraftRAG/Dockerfile new file mode 100644 index 000000000..a15136464 --- /dev/null +++ b/EdgeCraftRAG/Dockerfile @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY ./edgecraftrag /home/user/edgecraftrag +COPY ./chatqna.py /home/user/chatqna.py + +WORKDIR /home/user/edgecraftrag +RUN pip install --no-cache-dir -r requirements.txt + +WORKDIR /home/user + +USER user + +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc + +ENTRYPOINT ["python", "chatqna.py"] \ No newline at end of file diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server new file mode 100644 index 000000000..c04dc0a54 --- /dev/null +++ b/EdgeCraftRAG/Dockerfile.server @@ -0,0 +1,35 @@ +FROM python:3.11-slim + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN apt-get update && apt-get install -y gnupg wget +RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg +RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ + tee /etc/apt/sources.list.d/intel-gpu-jammy.list +RUN apt-get update +RUN apt-get install -y \ + intel-opencl-icd intel-level-zero-gpu level-zero intel-level-zero-gpu-raytracing \ + intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ + libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY ./edgecraftrag /home/user/edgecraftrag + +WORKDIR /home/user/edgecraftrag +RUN pip install --no-cache-dir -r requirements.txt + +WORKDIR /home/user/ + +USER user + +ENTRYPOINT ["python", "-m", "edgecraftrag.server"] diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md new file mode 100644 index 000000000..da8d2efb0 --- /dev/null +++ b/EdgeCraftRAG/README.md @@ -0,0 +1,274 @@ +# Edge Craft Retrieval-Augmented Generation + +Edge Craft RAG (EC-RAG) is a customizable, tunable and production-ready +Retrieval-Augmented Generation system for edge solutions. It is designed to +curate the RAG pipeline to meet hardware requirements at edge with guaranteed +quality and performance. + +## Quick Start Guide + +### Run Containers with Docker Compose + +```bash +cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc + +export MODEL_PATH="your model path for all your models" +export DOC_PATH="your doc path for uploading a dir of files" +export HOST_IP="your host ip" +export UI_SERVICE_PORT="port for UI service" + +# Optional for vllm endpoint +export vLLM_ENDPOINT="http://${HOST_IP}:8008" + +# If you have a proxy configured, uncomment below line +# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server +# If you have a HF mirror configured, it will be imported to the container +# export HF_ENDPOINT="your HF mirror endpoint" + +# By default, the ports of the containers are set, uncomment if you want to change +# export MEGA_SERVICE_PORT=16011 +# export PIPELINE_SERVICE_PORT=16011 + +docker compose up -d +``` + +### (Optional) Build Docker Images for Mega Service, Server and UI by your own + +```bash +cd GenAIExamples/EdgeCraftRAG + +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile . +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server . +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . +``` + +### ChatQnA with LLM Example (Command Line) + +```bash +cd GenAIExamples/EdgeCraftRAG + +# Activate pipeline test_pipeline_local_llm +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.' + +# Will need to wait for several minutes +# Expected output: +# { +# "idx": "3214cf25-8dff-46e6-b7d1-1811f237cf8c", +# "name": "rag_test", +# "comp_type": "pipeline", +# "node_parser": { +# "idx": "ababed12-c192-4cbb-b27e-e49c76a751ca", +# "parser_type": "simple", +# "chunk_size": 400, +# "chunk_overlap": 48 +# }, +# "indexer": { +# "idx": "46969b63-8a32-4142-874d-d5c86ee9e228", +# "indexer_type": "faiss_vector", +# "model": { +# "idx": "7aae57c0-13a4-4a15-aecb-46c2ec8fe738", +# "type": "embedding", +# "model_id": "BAAI/bge-small-en-v1.5", +# "model_path": "/home/user/models/bge_ov_embedding", +# "device": "auto" +# } +# }, +# "retriever": { +# "idx": "3747fa59-ff9b-49b6-a8e8-03cdf8c979a4", +# "retriever_type": "vectorsimilarity", +# "retrieve_topk": 30 +# }, +# "postprocessor": [ +# { +# "idx": "d46a6cae-ba7a-412e-85b7-d334f175efaa", +# "postprocessor_type": "reranker", +# "model": { +# "idx": "374e7471-bd7d-41d0-b69d-a749a052b4b0", +# "type": "reranker", +# "model_id": "BAAI/bge-reranker-large", +# "model_path": "/home/user/models/bge_ov_reranker", +# "device": "auto" +# }, +# "top_n": 2 +# } +# ], +# "generator": { +# "idx": "52d8f112-6290-4dd3-bc28-f9bd5deeb7c8", +# "generator_type": "local", +# "model": { +# "idx": "fa0c11e1-46d1-4df8-a6d8-48cf6b99eff3", +# "type": "llm", +# "model_id": "qwen2-7b-instruct", +# "model_path": "/home/user/models/qwen2-7b-instruct/INT4_compressed_weights", +# "device": "auto" +# } +# }, +# "status": { +# "active": true +# } +# } + +# Prepare data from local directory +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.' + +# Validate Mega Service +curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.' +``` + +### ChatQnA with LLM Example (UI) + +Open your browser, access http://${HOST_IP}:8082 + +> Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}. + +### (Optional) Launch vLLM with OpenVINO service + +```bash +# 1. export LLM_MODEL +export LLM_MODEL="your model id" +# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml' + # vllm-service: + # image: vllm:openvino + # container_name: vllm-openvino-server + # depends_on: + # - vllm-service + # ports: + # - "8008:80" + # environment: + # no_proxy: ${no_proxy} + # http_proxy: ${http_proxy} + # https_proxy: ${https_proxy} + # vLLM_ENDPOINT: ${vLLM_ENDPOINT} + # LLM_MODEL: ${LLM_MODEL} + # entrypoint: /bin/bash -c "\ + # cd / && \ + # export VLLM_CPU_KVCACHE_SPACE=50 && \ + # python3 -m vllm.entrypoints.openai.api_server \ + # --model '${LLM_MODEL}' \ + # --host 0.0.0.0 \ + # --port 80" +``` + +## Advanced User Guide + +### Pipeline Management + +#### Create a pipeline + +```bash +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.' +``` + +It will take some time to prepare the embedding model. + +#### Upload a text + +```bash +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.' +``` + +#### Provide a query to retrieve context with similarity search. + +```bash +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.' +``` + +#### Create the second pipeline test2 + +```bash +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.' +``` + +#### Check all pipelines + +```bash +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.' +``` + +#### Compare similarity retrieval (test1) and keyword retrieval (test2) + +```bash +# Activate pipeline test1 +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.' +# Similarity retrieval +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.' + +# Activate pipeline test2 +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.' +# Keyword retrieval +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.' + +``` + +### Model Management + +#### Load a model + +```bash +curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.' +``` + +It will take some time to load the model. + +#### Check all models + +```bash +curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" | jq '.' +``` + +#### Update a model + +```bash +curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.' +``` + +#### Check a certain model + +```bash +curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' +``` + +#### Delete a model + +```bash +curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' +``` + +### File Management + +#### Add a text + +```bash +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.' +``` + +#### Add files from existed file path + +```bash +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.' +``` + +#### Check all files + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/files -H "Content-Type: application/json" | jq '.' +``` + +#### Check one file + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' +``` + +#### Delete a file + +```bash +curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' +``` + +#### Update a file + +```bash +curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.' +``` diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py new file mode 100644 index 000000000..1afa9621c --- /dev/null +++ b/EdgeCraftRAG/chatqna.py @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from comps import MicroService, ServiceOrchestrator, ServiceType + +MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1") +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011)) +PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1") +PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010)) + +from comps import Gateway, MegaServiceEndpoint +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + ChatCompletionResponse, + ChatCompletionResponseChoice, + ChatMessage, + UsageInfo, +) +from fastapi import Request +from fastapi.responses import StreamingResponse + + +class EdgeCraftRagGateway(Gateway): + def __init__(self, megaservice, host="0.0.0.0", port=16011): + super().__init__( + megaservice, host, port, str(MegaServiceEndpoint.CHAT_QNA), ChatCompletionRequest, ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + input = await request.json() + result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input) + for node, response in result_dict.items(): + if isinstance(response, StreamingResponse): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="edgecraftrag", choices=choices, usage=usage) + + +class EdgeCraftRagService: + def __init__(self, host="0.0.0.0", port=16010): + self.host = host + self.port = port + self.megaservice = ServiceOrchestrator() + + def add_remote_service(self): + edgecraftrag = MicroService( + name="pipeline", + host=PIPELINE_SERVICE_HOST_IP, + port=PIPELINE_SERVICE_PORT, + endpoint="/v1/chatqna", + use_remote_service=True, + service_type=ServiceType.UNDEFINED, + ) + self.megaservice.add(edgecraftrag) + self.gateway = EdgeCraftRagGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + + +if __name__ == "__main__": + edgecraftrag = EdgeCraftRagService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) + edgecraftrag.add_remote_service() diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml new file mode 100644 index 000000000..f877b7c58 --- /dev/null +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -0,0 +1,78 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + server: + image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} + container_name: edgecraftrag-server + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_ENDPOINT: ${HF_ENDPOINT} + vLLM_ENDPOINT: ${vLLM_ENDPOINT} + volumes: + - ${MODEL_PATH:-${PWD}}:/home/user/models + - ${DOC_PATH:-${PWD}}:/home/user/docs + ports: + - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} + devices: + - /dev/dri:/dev/dri + group_add: + - video + ecrag: + image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} + container_name: edgecraftrag + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + ports: + - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} + depends_on: + - server + ui: + image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} + container_name: edgecraftrag-ui + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} + UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} + ports: + - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} + restart: always + depends_on: + - server + - ecrag + # vllm-service: + # image: vllm:openvino + # container_name: vllm-openvino-server + # ports: + # - "8008:80" + # environment: + # no_proxy: ${no_proxy} + # http_proxy: ${http_proxy} + # https_proxy: ${https_proxy} + # vLLM_ENDPOINT: ${vLLM_ENDPOINT} + # LLM_MODEL: ${LLM_MODEL} + # entrypoint: /bin/bash -c "\ + # cd / && \ + # export VLLM_CPU_KVCACHE_SPACE=50 && \ + # python3 -m vllm.entrypoints.openai.api_server \ + # --model '${LLM_MODEL}' \ + # --host 0.0.0.0 \ + # --port 80" + +networks: + default: + driver: bridge diff --git a/EdgeCraftRAG/docker_image_build/build.yaml b/EdgeCraftRAG/docker_image_build/build.yaml new file mode 100644 index 000000000..e0cc355cc --- /dev/null +++ b/EdgeCraftRAG/docker_image_build/build.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + server: + build: + context: .. + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + dockerfile: ./Dockerfile.server + image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} + ui: + build: + context: .. + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + dockerfile: ./ui/docker/Dockerfile.ui + image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} + ecrag: + build: + context: .. + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + dockerfile: ./Dockerfile + image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} diff --git a/EdgeCraftRAG/edgecraftrag/__init__.py b/EdgeCraftRAG/edgecraftrag/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/api/__init__.py b/EdgeCraftRAG/edgecraftrag/api/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py new file mode 100644 index 000000000..dfd32c29e --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.context import ctx +from fastapi import FastAPI + +chatqna_app = FastAPI() + + +# Retrieval +@chatqna_app.post(path="/v1/retrieval") +async def retrieval(request: ChatCompletionRequest): + nodeswithscore = ctx.get_pipeline_mgr().run_retrieve(chat_request=request) + print(nodeswithscore) + if nodeswithscore is not None: + ret = [] + for n in nodeswithscore: + ret.append((n.node.node_id, n.node.text, n.score)) + return ret + + return "Not found" + + +# ChatQnA +@chatqna_app.post(path="/v1/chatqna") +async def chatqna(request: ChatCompletionRequest): + ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + return str(ret) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py new file mode 100644 index 000000000..fb5b32792 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -0,0 +1,102 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from edgecraftrag.api_schema import DataIn, FilesIn +from edgecraftrag.context import ctx +from fastapi import FastAPI + +data_app = FastAPI() + + +# Upload a text or files +@data_app.post(path="/v1/data") +async def add_data(request: DataIn): + nodelist = None + + docs = [] + if request.text is not None: + docs.extend(ctx.get_file_mgr().add_text(text=request.text)) + if request.local_path is not None: + docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path)) + + nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) + if nodelist is None: + return "Error" + pl = ctx.get_pipeline_mgr().get_active_pipeline() + # TODO: Need bug fix, when node_parser is None + ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + return "Done" + + +# Upload files by a list of file_path +@data_app.post(path="/v1/data/files") +async def add_files(request: FilesIn): + nodelist = None + + docs = [] + if request.local_paths is not None: + docs.extend(ctx.get_file_mgr().add_files(docs=request.local_paths)) + + nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) + if nodelist is None: + return "Error" + pl = ctx.get_pipeline_mgr().get_active_pipeline() + # TODO: Need bug fix, when node_parser is None + ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + return "Done" + + +# GET files +@data_app.get(path="/v1/data/files") +async def get_files(): + return ctx.get_file_mgr().get_files() + + +# GET a file +@data_app.get(path="/v1/data/files") +async def get_file_docs(name): + return ctx.get_file_mgr().get_docs_by_file(name) + + +# DELETE a file +@data_app.delete(path="/v1/data/files/{name}") +async def delete_file(name): + if ctx.get_file_mgr().del_file(name): + # TODO: delete the nodes related to the file + all_docs = ctx.get_file_mgr().get_all_docs() + + nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) + if nodelist is None: + return "Error" + pl = ctx.get_pipeline_mgr().get_active_pipeline() + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + return f"File {name} is deleted" + else: + return f"File {name} not found" + + +# UPDATE a file +@data_app.patch(path="/v1/data/files/{name}") +async def update_file(name, request: DataIn): + # 1. Delete + if ctx.get_file_mgr().del_file(name): + # 2. Add + docs = [] + if request.text is not None: + docs.extend(ctx.get_file_mgr().add_text(text=request.text)) + if request.local_path is not None: + docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path)) + + # 3. Re-run the pipeline + # TODO: update the nodes related to the file + all_docs = ctx.get_file_mgr().get_all_docs() + nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) + if nodelist is None: + return "Error" + pl = ctx.get_pipeline_mgr().get_active_pipeline() + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + return f"File {name} is updated" + else: + return f"File {name} not found" diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py new file mode 100644 index 000000000..17044ae91 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import gc + +from edgecraftrag.api_schema import ModelIn +from edgecraftrag.context import ctx +from fastapi import FastAPI + +model_app = FastAPI() + + +# GET Models +@model_app.get(path="/v1/settings/models") +async def get_models(): + return ctx.get_model_mgr().get_models() + + +# GET Model +@model_app.get(path="/v1/settings/models/{model_id:path}") +async def get_model_by_name(model_id): + return ctx.get_model_mgr().get_model_by_name(model_id) + + +# POST Model +@model_app.post(path="/v1/settings/models") +async def add_model(request: ModelIn): + modelmgr = ctx.get_model_mgr() + # Currently use asyncio.Lock() to deal with multi-requests + async with modelmgr._lock: + model = modelmgr.search_model(request) + if model is None: + model = modelmgr.load_model(request) + modelmgr.add(model) + return model.model_id + " model loaded" + + +# PATCH Model +@model_app.patch(path="/v1/settings/models/{model_id:path}") +async def update_model(model_id, request: ModelIn): + # The process of patch model is : 1.delete model 2.create model + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + modelmgr = ctx.get_model_mgr() + if active_pl and active_pl.model_existed(model_id): + return "Model is being used by active pipeline, unable to update model" + else: + async with modelmgr._lock: + if modelmgr.get_model_by_name(model_id) is None: + # Need to make sure original model still exists before updating model + # to prevent memory leak in concurrent requests situation + return "Model " + model_id + " not exists" + model = modelmgr.search_model(request) + if model is None: + modelmgr.del_model_by_name(model_id) + # Clean up memory occupation + gc.collect() + # load new model + model = modelmgr.load_model(request) + modelmgr.add(model) + return model + + +# DELETE Model +@model_app.delete(path="/v1/settings/models/{model_id:path}") +async def delete_model(model_id): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if active_pl and active_pl.model_existed(model_id): + return "Model is being used by active pipeline, unable to remove" + else: + modelmgr = ctx.get_model_mgr() + # Currently use asyncio.Lock() to deal with multi-requests + async with modelmgr._lock: + response = modelmgr.del_model_by_name(model_id) + # Clean up memory occupation + gc.collect() + return response diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py new file mode 100644 index 000000000..9d008e82f --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -0,0 +1,180 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import weakref + +from edgecraftrag.api_schema import PipelineCreateIn +from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType +from edgecraftrag.components.generator import QnAGenerator +from edgecraftrag.components.indexer import VectorIndexer +from edgecraftrag.components.node_parser import HierarchyNodeParser, SimpleNodeParser, SWindowNodeParser +from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor +from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever +from edgecraftrag.context import ctx +from fastapi import FastAPI + +pipeline_app = FastAPI() + + +# GET Pipelines +@pipeline_app.get(path="/v1/settings/pipelines") +async def get_pipelines(): + return ctx.get_pipeline_mgr().get_pipelines() + + +# GET Pipeline +@pipeline_app.get(path="/v1/settings/pipelines/{name}") +async def get_pipeline(name): + return ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) + + +# POST Pipeline +@pipeline_app.post(path="/v1/settings/pipelines") +async def add_pipeline(request: PipelineCreateIn): + pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(request.name) + if pl is None: + pl = ctx.get_pipeline_mgr().create_pipeline(request.name) + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if pl == active_pl: + if not request.active: + pass + else: + return "Unable to patch an active pipeline..." + update_pipeline_handler(pl, request) + return pl + + +# PATCH Pipeline +@pipeline_app.patch(path="/v1/settings/pipelines/{name}") +async def update_pipeline(name, request: PipelineCreateIn): + pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) + if pl is None: + return None + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if pl == active_pl: + if not request.active: + pass + else: + return "Unable to patch an active pipeline..." + async with ctx.get_pipeline_mgr()._lock: + update_pipeline_handler(pl, request) + return pl + + +def update_pipeline_handler(pl, req): + if req.node_parser is not None: + np = req.node_parser + found_parser = ctx.get_node_parser_mgr().search_parser(np) + if found_parser is not None: + pl.node_parser = found_parser + else: + match np.parser_type: + case NodeParserType.SIMPLE: + pl.node_parser = SimpleNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) + case NodeParserType.HIERARCHY: + """ + HierarchyNodeParser is for Auto Merging Retriever + (https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/) + By default, the hierarchy is: + 1st level: chunk size 2048 + 2nd level: chunk size 512 + 3rd level: chunk size 128 + Please set chunk size with List. e.g. chunk_size=[2048,512,128] + """ + pl.node_parser = HierarchyNodeParser.from_defaults( + chunk_sizes=np.chunk_sizes, chunk_overlap=np.chunk_overlap + ) + case NodeParserType.SENTENCEWINDOW: + pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size) + ctx.get_node_parser_mgr().add(pl.node_parser) + + if req.indexer is not None: + ind = req.indexer + found_indexer = ctx.get_indexer_mgr().search_indexer(ind) + if found_indexer is not None: + pl.indexer = found_indexer + else: + embed_model = None + if ind.embedding_model: + embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) + if embed_model is None: + ind.embedding_model.model_type = ModelType.EMBEDDING + embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) + ctx.get_model_mgr().add(embed_model) + match ind.indexer_type: + case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR: + # TODO: **RISK** if considering 2 pipelines with different + # nodes, but same indexer, what will happen? + pl.indexer = VectorIndexer(embed_model, ind.indexer_type) + case _: + pass + ctx.get_indexer_mgr().add(pl.indexer) + + if req.retriever is not None: + retr = req.retriever + match retr.retriever_type: + case RetrieverType.VECTORSIMILARITY: + if pl.indexer is not None: + pl.retriever = VectorSimRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) + else: + return "No indexer" + case RetrieverType.AUTOMERGE: + # AutoMergeRetriever looks at a set of leaf nodes and recursively "merges" subsets of leaf nodes that reference a parent node + if pl.indexer is not None: + pl.retriever = AutoMergeRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) + else: + return "No indexer" + case RetrieverType.BM25: + if pl.indexer is not None: + pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk) + else: + return "No indexer" + case _: + pass + + if req.postprocessor is not None: + pp = req.postprocessor + pl.postprocessor = [] + for processor in pp: + match processor.processor_type: + case PostProcessorType.RERANKER: + if processor.reranker_model: + prm = processor.reranker_model + reranker_model = ctx.get_model_mgr().search_model(prm) + if reranker_model is None: + prm.model_type = ModelType.RERANKER + reranker_model = ctx.get_model_mgr().load_model(prm) + ctx.get_model_mgr().add(reranker_model) + postprocessor = RerankProcessor(reranker_model, processor.top_n) + pl.postprocessor.append(postprocessor) + else: + return "No reranker model" + case PostProcessorType.METADATAREPLACE: + postprocessor = MetadataReplaceProcessor(target_metadata_key="window") + pl.postprocessor.append(postprocessor) + + if req.generator: + gen = req.generator + if gen.model is None: + return "No ChatQnA Model" + if gen.inference_type == InferenceType.VLLM: + if gen.model.model_id: + model_ref = gen.model.model_id + else: + model_ref = gen.model.model_path + pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type) + elif gen.inference_type == InferenceType.LOCAL: + model = ctx.get_model_mgr().search_model(gen.model) + if model is None: + gen.model.model_type = ModelType.LLM + model = ctx.get_model_mgr().load_model(gen.model) + ctx.get_model_mgr().add(model) + # Use weakref to achieve model deletion and memory release + model_ref = weakref.ref(model) + pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type) + else: + return "Inference Type Not Supported" + + if pl.status.active != req.active: + ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr()) + return pl diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py new file mode 100644 index 000000000..1f124a7f9 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional + +from pydantic import BaseModel + + +class ModelIn(BaseModel): + model_type: Optional[str] = "LLM" + model_id: Optional[str] + model_path: Optional[str] = "./" + device: Optional[str] = "cpu" + + +class NodeParserIn(BaseModel): + chunk_size: Optional[int] = None + chunk_overlap: Optional[int] = None + chunk_sizes: Optional[list] = None + parser_type: str + window_size: Optional[int] = None + + +class IndexerIn(BaseModel): + indexer_type: str + embedding_model: Optional[ModelIn] = None + + +class RetrieverIn(BaseModel): + retriever_type: str + retrieve_topk: Optional[int] = 3 + + +class PostProcessorIn(BaseModel): + processor_type: str + reranker_model: Optional[ModelIn] = None + top_n: Optional[int] = 5 + + +class GeneratorIn(BaseModel): + prompt_path: Optional[str] = None + model: Optional[ModelIn] = None + inference_type: Optional[str] = "local" + + +class PipelineCreateIn(BaseModel): + name: Optional[str] = None + node_parser: Optional[NodeParserIn] = None + indexer: Optional[IndexerIn] = None + retriever: Optional[RetrieverIn] = None + postprocessor: Optional[list[PostProcessorIn]] = None + generator: Optional[GeneratorIn] = None + active: Optional[bool] = False + + +class DataIn(BaseModel): + text: Optional[str] = None + local_path: Optional[str] = None + + +class FilesIn(BaseModel): + local_paths: Optional[list[str]] = None diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py new file mode 100644 index 000000000..d8c7aaef8 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/base.py @@ -0,0 +1,128 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import abc +import uuid +from enum import Enum +from typing import Any, Callable, List, Optional + +from pydantic import BaseModel, ConfigDict, Field, model_serializer + + +class CompType(str, Enum): + + DEFAULT = "default" + MODEL = "model" + PIPELINE = "pipeline" + NODEPARSER = "node_parser" + INDEXER = "indexer" + RETRIEVER = "retriever" + POSTPROCESSOR = "postprocessor" + GENERATOR = "generator" + FILE = "file" + + +class ModelType(str, Enum): + + EMBEDDING = "embedding" + RERANKER = "reranker" + LLM = "llm" + + +class FileType(str, Enum): + TEXT = "text" + VISUAL = "visual" + AURAL = "aural" + VIRTUAL = "virtual" + OTHER = "other" + + +class NodeParserType(str, Enum): + + DEFAULT = "default" + SIMPLE = "simple" + HIERARCHY = "hierarchical" + SENTENCEWINDOW = "sentencewindow" + + +class IndexerType(str, Enum): + + DEFAULT = "default" + FAISS_VECTOR = "faiss_vector" + DEFAULT_VECTOR = "vector" + + +class RetrieverType(str, Enum): + + DEFAULT = "default" + VECTORSIMILARITY = "vectorsimilarity" + AUTOMERGE = "auto_merge" + BM25 = "bm25" + + +class PostProcessorType(str, Enum): + + RERANKER = "reranker" + METADATAREPLACE = "metadata_replace" + + +class GeneratorType(str, Enum): + + CHATQNA = "chatqna" + + +class InferenceType(str, Enum): + + LOCAL = "local" + VLLM = "vllm" + + +class CallbackType(str, Enum): + + DATAPREP = "dataprep" + RETRIEVE = "retrieve" + PIPELINE = "pipeline" + + +class BaseComponent(BaseModel): + + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + + idx: str = Field(default_factory=lambda: str(uuid.uuid4())) + name: Optional[str] = Field(default="") + comp_type: str = Field(default="") + comp_subtype: Optional[str] = Field(default="") + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "name": self.name, + "comp_type": self.comp_type, + "comp_subtype": self.comp_subtype, + } + return set + + @abc.abstractmethod + def run(self, **kwargs) -> Any: + pass + + +class BaseMgr: + + def __init__(self): + self.components = {} + + def add(self, comp: BaseComponent): + self.components[comp.idx] = comp + + def get(self, idx: str) -> BaseComponent: + if idx in self.components: + return self.components[idx] + else: + return None + + def remove(self, idx): + # remove the reference count + # after reference count == 0, object memory can be freed with Garbage Collector + del self.components[idx] diff --git a/EdgeCraftRAG/edgecraftrag/components/__init__.py b/EdgeCraftRAG/edgecraftrag/components/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/components/data.py b/EdgeCraftRAG/edgecraftrag/components/data.py new file mode 100644 index 000000000..e7fa19e7a --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/data.py @@ -0,0 +1,65 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +from typing import Any, List, Optional + +from edgecraftrag.base import BaseComponent, CompType, FileType +from llama_index.core.schema import Document +from pydantic import BaseModel, Field, model_serializer + + +class File(BaseComponent): + file_path: str = Field(default="") + comp_subtype: str = Field(default="") + documents: List[Document] = Field(default=[]) + + def __init__(self, file_name: Optional[str] = None, file_path: Optional[str] = None, content: Optional[str] = None): + super().__init__(comp_type=CompType.FILE) + + if not file_name and not file_path: + raise ValueError("File name or path must be provided") + + _path = Path(file_path) if file_path else None + if file_name: + self.name = file_name + else: + self.name = _path.name + self.file_path = _path + self.comp_subtype = FileType.TEXT + if _path and _path.exists(): + self.documents.extend(convert_file_to_documents(_path)) + if content: + self.documents.extend(convert_text_to_documents(content)) + + def run(self, **kwargs) -> Any: + pass + + @model_serializer + def ser_model(self): + set = { + "file_name": self.name, + "file_id": self.idx, + "file_type": self.comp_subtype, + "file_path": str(self.file_path), + "docs_count": len(self.documents), + } + return set + + +def convert_text_to_documents(text) -> List[Document]: + return [Document(text=text, metadata={"file_name": "text"})] + + +def convert_file_to_documents(file_path) -> List[Document]: + from llama_index.core import SimpleDirectoryReader + + supported_exts = [".pdf", ".txt", ".doc", ".docx", ".pptx", ".ppt", ".csv", ".md", ".html", ".rst"] + if file_path.is_dir(): + docs = SimpleDirectoryReader(input_dir=file_path, recursive=True, required_exts=supported_exts).load_data() + elif file_path.is_file(): + docs = SimpleDirectoryReader(input_files=[file_path], required_exts=supported_exts).load_data() + else: + docs = [] + + return docs diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py new file mode 100644 index 000000000..cbfd6686d --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -0,0 +1,194 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import dataclasses +import os + +from comps import GeneratedDoc, opea_telemetry +from edgecraftrag.base import BaseComponent, CompType, GeneratorType +from fastapi.responses import StreamingResponse +from langchain_core.prompts import PromptTemplate +from llama_index.llms.openai_like import OpenAILike +from pydantic import model_serializer + + +@opea_telemetry +def post_process_text(text: str): + if text == " ": + return "data: @#$\n\n" + if text == "\n": + return "data:
\n\n" + if text.isspace(): + return None + new_text = text.replace(" ", "@#$") + return f"data: {new_text}\n\n" + + +class QnAGenerator(BaseComponent): + + def __init__(self, llm_model, prompt_template, inference_type, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.GENERATOR, + comp_subtype=GeneratorType.CHATQNA, + ) + self.inference_type = inference_type + self._REPLACE_PAIRS = ( + ("\n\n", "\n"), + ("\t\n", "\n"), + ) + template = prompt_template + self.prompt = ( + DocumentedContextRagPromptTemplate.from_file(template) + if os.path.isfile(template) + else DocumentedContextRagPromptTemplate.from_template(template) + ) + self.llm = llm_model + if isinstance(llm_model, str): + self.model_id = llm_model + else: + self.model_id = llm_model().model_id + + def clean_string(self, string): + ret = string + for p in self._REPLACE_PAIRS: + ret = ret.replace(*p) + return ret + + def run(self, chat_request, retrieved_nodes, **kwargs): + if self.llm() is None: + # This could happen when User delete all LLMs through RESTful API + return "No LLM available, please load LLM" + # query transformation + text_gen_context = "" + for n in retrieved_nodes: + origin_text = n.node.get_text() + text_gen_context += self.clean_string(origin_text.strip()) + + query = chat_request.messages + prompt_str = self.prompt.format(input=query, context=text_gen_context) + generate_kwargs = dict( + temperature=chat_request.temperature, + do_sample=chat_request.temperature > 0.0, + top_p=chat_request.top_p, + top_k=chat_request.top_k, + typical_p=chat_request.typical_p, + repetition_penalty=chat_request.repetition_penalty, + ) + self.llm().generate_kwargs = generate_kwargs + + return self.llm().complete(prompt_str) + + def run_vllm(self, chat_request, retrieved_nodes, **kwargs): + if self.llm is None: + return "No LLM provided, please provide model_id_or_path" + # query transformation + text_gen_context = "" + for n in retrieved_nodes: + origin_text = n.node.get_text() + text_gen_context += self.clean_string(origin_text.strip()) + + query = chat_request.messages + prompt_str = self.prompt.format(input=query, context=text_gen_context) + + llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") + model_name = self.llm + llm = OpenAILike( + api_key="fake", + api_base=llm_endpoint + "/v1", + max_tokens=chat_request.max_tokens, + model=model_name, + top_p=chat_request.top_p, + temperature=chat_request.temperature, + streaming=chat_request.stream, + ) + + if chat_request.stream: + + async def stream_generator(): + response = await llm.astream_complete(prompt_str) + async for text in response: + output = text.text + yield f"data: {output}\n\n" + + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + response = llm.complete(prompt_str) + response = response.text + + return GeneratedDoc(text=response, prompt=prompt_str) + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "generator_type": self.comp_subtype, "model": self.model_id} + return set + + +@dataclasses.dataclass +class INSTRUCTIONS: + IM_START = "You are an AI assistant that helps users answer questions given a specific context." + SUCCINCT = "Ensure your response is succinct" + ACCURATE = "Ensure your response is accurate." + SUCCINCT_AND_ACCURATE = "Ensure your response is succinct. Try to be accurate if possible." + ACCURATE_AND_SUCCINCT = "Ensure your response is accurate. Try to be succinct if possible." + NO_RAMBLING = "Avoid posing new questions or self-questioning and answering, and refrain from repeating words in your response." + SAY_SOMETHING = "Avoid meaningless answer such a random symbol or blanks." + ENCOURAGE = "If you cannot well understand the question, try to translate it into English, and translate the answer back to the language of the question." + NO_IDEA = ( + 'If the answer is not discernible, please respond with "Sorry. I have no idea" in the language of the question.' + ) + CLOZE_TEST = """The task is a fill-in-the-blank/cloze test.""" + NO_MEANINGLESS_SYMBOLS = "Meaningless symbols and ``` should not be included in your response." + ADAPT_NATIVE_LANGUAGE = "Please try to think like a person that speak the same language that the question used." + + +def _is_cloze(question): + return ("()" in question or "()" in question) and ("填" in question or "fill" in question or "cloze" in question) + + +# depreciated +def get_instructions(question): + # naive pre-retrieval rewrite + # cloze + if _is_cloze(question): + instructions = [ + INSTRUCTIONS.CLOZE_TEST, + ] + else: + instructions = [ + INSTRUCTIONS.ACCURATE_AND_SUCCINCT, + INSTRUCTIONS.NO_RAMBLING, + INSTRUCTIONS.NO_MEANINGLESS_SYMBOLS, + ] + return ["System: {}".format(_) for _ in instructions] + + +def preprocess_question(question): + if _is_cloze(question): + question = question.replace(" ", "").replace("(", "(").replace(")", ")") + # .replace("()", " <|blank|> ") + ret = "User: Please finish the following fill-in-the-blank question marked by $$$ at the beginning and end. Make sure all the () are filled.\n$$$\n{}\n$$$\nAssistant: ".format( + question + ) + else: + ret = "User: {}\nAssistant: 从上下文提供的信息中可以知道,".format(question) + return ret + + +class DocumentedContextRagPromptTemplate(PromptTemplate): + + def format(self, **kwargs) -> str: + # context = '\n'.join([clean_string(f"{_.page_content}".strip()) for i, _ in enumerate(kwargs["context"])]) + context = kwargs["context"] + question = kwargs["input"] + preprocessed_question = preprocess_question(question) + if "instructions" in self.template: + instructions = get_instructions(question) + prompt_str = self.template.format( + context=context, instructions="\n".join(instructions), input=preprocessed_question + ) + else: + prompt_str = self.template.format(context=context, input=preprocessed_question) + return prompt_str diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py new file mode 100644 index 000000000..83346d490 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py @@ -0,0 +1,45 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +import faiss +from edgecraftrag.base import BaseComponent, CompType, IndexerType +from llama_index.core import StorageContext, VectorStoreIndex +from llama_index.vector_stores.faiss import FaissVectorStore +from pydantic import model_serializer + + +class VectorIndexer(BaseComponent, VectorStoreIndex): + + def __init__(self, embed_model, vector_type): + BaseComponent.__init__( + self, + comp_type=CompType.INDEXER, + comp_subtype=vector_type, + ) + self.model = embed_model + if not embed_model: + # Settings.embed_model should be set to None when embed_model is None to avoid 'no oneapi key' error + from llama_index.core import Settings + + Settings.embed_model = None + match vector_type: + case IndexerType.DEFAULT_VECTOR: + VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[]) + case IndexerType.FAISS_VECTOR: + if embed_model: + d = embed_model._model.request.outputs[0].get_partial_shape()[2].get_length() + else: + d = 128 + faiss_index = faiss.IndexFlatL2(d) + faiss_store = StorageContext.from_defaults(vector_store=FaissVectorStore(faiss_index=faiss_index)) + VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=faiss_store) + + def run(self, **kwargs) -> Any: + pass + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "indexer_type": self.comp_subtype, "model": self.model} + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py new file mode 100644 index 000000000..72ee7f16e --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/model.py @@ -0,0 +1,74 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Optional + +from edgecraftrag.base import BaseComponent, CompType, ModelType +from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding +from llama_index.llms.openvino import OpenVINOLLM +from llama_index.postprocessor.openvino_rerank import OpenVINORerank +from pydantic import Field, model_serializer + + +class BaseModelComponent(BaseComponent): + + model_id: Optional[str] = Field(default="") + model_path: Optional[str] = Field(default="") + device: Optional[str] = Field(default="cpu") + + def run(self, **kwargs) -> Any: + pass + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "type": self.comp_subtype, + "model_id": self.model_id, + "model_path": self.model_path, + "device": self.device, + } + return set + + +class OpenVINOEmbeddingModel(BaseModelComponent, OpenVINOEmbedding): + + def __init__(self, model_id, model_path, device): + OpenVINOEmbedding.create_and_save_openvino_model(model_id, model_path) + OpenVINOEmbedding.__init__(self, model_id_or_path=model_path, device=device) + self.comp_type = CompType.MODEL + self.comp_subtype = ModelType.EMBEDDING + self.model_id = model_id + self.model_path = model_path + self.device = device + + +class OpenVINORerankModel(BaseModelComponent, OpenVINORerank): + + def __init__(self, model_id, model_path, device): + OpenVINORerank.create_and_save_openvino_model(model_id, model_path) + OpenVINORerank.__init__( + self, + model_id_or_path=model_path, + device=device, + ) + self.comp_type = CompType.MODEL + self.comp_subtype = ModelType.RERANKER + self.model_id = model_id + self.model_path = model_path + self.device = device + + +class OpenVINOLLMModel(BaseModelComponent, OpenVINOLLM): + + def __init__(self, model_id, model_path, device): + OpenVINOLLM.__init__( + self, + model_id_or_path=model_path, + device_map=device, + ) + self.comp_type = CompType.MODEL + self.comp_subtype = ModelType.LLM + self.model_id = model_id + self.model_path = model_path + self.device = device diff --git a/EdgeCraftRAG/edgecraftrag/components/node_parser.py b/EdgeCraftRAG/edgecraftrag/components/node_parser.py new file mode 100644 index 000000000..cd50f4534 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/node_parser.py @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +from edgecraftrag.base import BaseComponent, CompType, NodeParserType +from llama_index.core.node_parser import HierarchicalNodeParser, SentenceSplitter, SentenceWindowNodeParser +from pydantic import model_serializer + + +class SimpleNodeParser(BaseComponent, SentenceSplitter): + + # Use super for SentenceSplitter since it's __init__ will cleanup + # BaseComponent fields + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.comp_type = CompType.NODEPARSER + self.comp_subtype = NodeParserType.SIMPLE + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "docs": + return self.get_nodes_from_documents(v, show_progress=False) + + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "parser_type": self.comp_subtype, + "chunk_size": self.chunk_size, + "chunk_overlap": self.chunk_overlap, + } + return set + + +class HierarchyNodeParser(BaseComponent, HierarchicalNodeParser): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.comp_type = CompType.NODEPARSER + self.comp_subtype = NodeParserType.HIERARCHY + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "docs": + return self.get_nodes_from_documents(v, show_progress=False) + + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "parser_type": self.comp_subtype, + "chunk_size": self.chunk_sizes, + "chunk_overlap": None, + } + return set + + +class SWindowNodeParser(BaseComponent, SentenceWindowNodeParser): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.comp_type = CompType.NODEPARSER + self.comp_subtype = NodeParserType.SENTENCEWINDOW + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "docs": + return self.get_nodes_from_documents(v, show_progress=False) + + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "parser_type": self.comp_subtype, + "chunk_size": None, + "chunk_overlap": None, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py new file mode 100644 index 000000000..4a2932e00 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -0,0 +1,160 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Callable, List, Optional + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType +from edgecraftrag.components.postprocessor import RerankProcessor +from llama_index.core.schema import Document, QueryBundle +from pydantic import BaseModel, Field, model_serializer + + +class PipelineStatus(BaseModel): + active: bool = False + + +class Pipeline(BaseComponent): + + node_parser: Optional[BaseComponent] = Field(default=None) + indexer: Optional[BaseComponent] = Field(default=None) + retriever: Optional[BaseComponent] = Field(default=None) + postprocessor: Optional[List[BaseComponent]] = Field(default=None) + generator: Optional[BaseComponent] = Field(default=None) + status: PipelineStatus = Field(default=PipelineStatus()) + run_pipeline_cb: Optional[Callable[..., Any]] = Field(default=None) + run_retriever_cb: Optional[Callable[..., Any]] = Field(default=None) + run_data_prepare_cb: Optional[Callable[..., Any]] = Field(default=None) + + def __init__( + self, + name, + ): + super().__init__(name=name, comp_type=CompType.PIPELINE) + if self.name == "" or self.name is None: + self.name = self.idx + self.run_pipeline_cb = run_test_generator + self.run_retriever_cb = run_test_retrieve + self.run_data_prepare_cb = run_simple_doc + self._node_changed = True + + # TODO: consider race condition + @property + def node_changed(self) -> bool: + return self._node_changed + + # TODO: update doc changes + # TODO: more operations needed, add, del, modify + def update_nodes(self, nodes): + print("updating nodes ", nodes) + if self.indexer is not None: + self.indexer.insert_nodes(nodes) + + # TODO: check more conditions + def check_active(self, nodelist): + if self._node_changed and nodelist is not None: + self.update_nodes(nodelist) + + # Implement abstract run function + # callback dispatcher + def run(self, **kwargs) -> Any: + print(kwargs) + if "cbtype" in kwargs: + if kwargs["cbtype"] == CallbackType.DATAPREP: + if "docs" in kwargs: + return self.run_data_prepare_cb(self, docs=kwargs["docs"]) + if kwargs["cbtype"] == CallbackType.RETRIEVE: + if "chat_request" in kwargs: + return self.run_retriever_cb(self, chat_request=kwargs["chat_request"]) + if kwargs["cbtype"] == CallbackType.PIPELINE: + if "chat_request" in kwargs: + return self.run_pipeline_cb(self, chat_request=kwargs["chat_request"]) + + def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=None, generator=None): + if node_parser is not None: + self.node_parser = node_parser + if indexer is not None: + self.indexer = indexer + if retriever is not None: + self.retriever = retriever + if postprocessor is not None: + self.postprocessor = postprocessor + if generator is not None: + self.generator = generator + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "name": self.name, + "comp_type": self.comp_type, + "node_parser": self.node_parser, + "indexer": self.indexer, + "retriever": self.retriever, + "postprocessor": self.postprocessor, + "generator": self.generator, + "status": self.status, + } + return set + + def model_existed(self, model_id: str) -> bool: + # judge if the given model is existed in a pipeline by model_id + if self.indexer: + if hasattr(self.indexer, "_embed_model") and self.indexer._embed_model.model_id == model_id: + return True + if hasattr(self.indexer, "_llm") and self.indexer._llm.model_id == model_id: + return True + if self.postprocessor: + for processor in self.postprocessor: + if hasattr(processor, "model_id") and processor.model_id == model_id: + return True + if self.generator: + llm = self.generator.llm + if llm() and llm().model_id == model_id: + return True + return False + + +# Test callback to retrieve nodes from query +def run_test_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + query = chat_request.messages + retri_res = pl.retriever.run(query=query) + query_bundle = QueryBundle(query) + if pl.postprocessor: + for processor in pl.postprocessor: + if ( + isinstance(processor, RerankProcessor) + and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default + ): + processor.top_n = chat_request.top_n + retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + return retri_res + + +def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: + n = pl.node_parser.run(docs=docs) + if pl.indexer is not None: + pl.indexer.insert_nodes(n) + print(pl.indexer._index_struct) + return n + + +def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + query = chat_request.messages + retri_res = pl.retriever.run(query=query) + query_bundle = QueryBundle(query) + if pl.postprocessor: + for processor in pl.postprocessor: + if ( + isinstance(processor, RerankProcessor) + and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default + ): + processor.top_n = chat_request.top_n + retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + if pl.generator is None: + return "No Generator Specified" + if pl.generator.inference_type == InferenceType.LOCAL: + answer = pl.generator.run(chat_request, retri_res) + elif pl.generator.inference_type == InferenceType.VLLM: + answer = pl.generator.run_vllm(chat_request, retri_res) + return answer diff --git a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py new file mode 100644 index 000000000..672826bdb --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py @@ -0,0 +1,64 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +from edgecraftrag.base import BaseComponent, CompType, PostProcessorType +from llama_index.core.postprocessor import MetadataReplacementPostProcessor +from pydantic import model_serializer + + +class RerankProcessor(BaseComponent): + + def __init__(self, rerank_model, top_n): + BaseComponent.__init__( + self, + comp_type=CompType.POSTPROCESSOR, + comp_subtype=PostProcessorType.RERANKER, + ) + self.model = rerank_model + self.top_n = top_n + + def run(self, **kwargs) -> Any: + self.model.top_n = self.top_n + query_bundle = None + query_str = None + if "retri_res" in kwargs: + nodes = kwargs["retri_res"] + if "query_bundle" in kwargs: + query_bundle = kwargs["query_bundle"] + if "query_str" in kwargs: + query_str = kwargs["query_str"] + return self.model.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str) + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": self.model, "top_n": self.top_n} + return set + + +class MetadataReplaceProcessor(BaseComponent, MetadataReplacementPostProcessor): + + def __init__(self, target_metadata_key="window"): + BaseComponent.__init__( + self, + target_metadata_key=target_metadata_key, + comp_type=CompType.POSTPROCESSOR, + comp_subtype=PostProcessorType.METADATAREPLACE, + ) + + def run(self, **kwargs) -> Any: + query_bundle = None + query_str = None + if "retri_res" in kwargs: + nodes = kwargs["retri_res"] + if "query_bundle" in kwargs: + query_bundle = kwargs["query_bundle"] + if "query_str" in kwargs: + query_str = kwargs["query_str"] + return self.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str) + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": None, "top_n": None} + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py new file mode 100644 index 000000000..cba251b2a --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py @@ -0,0 +1,104 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, List, cast + +from edgecraftrag.base import BaseComponent, CompType, RetrieverType +from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever +from llama_index.core.retrievers import AutoMergingRetriever +from llama_index.core.schema import BaseNode +from llama_index.retrievers.bm25 import BM25Retriever +from pydantic import model_serializer + + +class VectorSimRetriever(BaseComponent, VectorIndexRetriever): + + def __init__(self, indexer, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.RETRIEVER, + comp_subtype=RetrieverType.VECTORSIMILARITY, + ) + VectorIndexRetriever.__init__( + self, + index=indexer, + node_ids=list(indexer.index_struct.nodes_dict.values()), + callback_manager=indexer._callback_manager, + object_map=indexer._object_map, + **kwargs, + ) + # This might be a bug of llamaindex retriever. + # The node_ids will never be updated after the retriever's + # creation. However, the node_ids decides the available node + # ids to be retrieved which means the target nodes to be + # retrieved are freezed to the time of the retriever's creation. + self._node_ids = None + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "query": + return self.retrieve(v) + + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "retriever_type": self.comp_subtype, + "retrieve_topk": self.similarity_top_k, + } + return set + + +class AutoMergeRetriever(BaseComponent, AutoMergingRetriever): + + def __init__(self, indexer, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.RETRIEVER, + comp_subtype=RetrieverType.AUTOMERGE, + ) + self._index = indexer + self.topk = kwargs["similarity_top_k"] + + AutoMergingRetriever.__init__( + self, + vector_retriever=indexer.as_retriever(**kwargs), + storage_context=indexer._storage_context, + object_map=indexer._object_map, + callback_manager=indexer._callback_manager, + ) + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "query": + # vector_retriever needs to be updated + self._vector_retriever = self._index.as_retriever(similarity_top_k=self.topk) + return self.retrieve(v) + + return None + + +class SimpleBM25Retriever(BaseComponent): + # The nodes parameter in BM25Retriever is not from index, + # nodes in BM25Retriever can not be updated through 'indexer.insert_nodes()', + # which means nodes should be passed to BM25Retriever after data preparation stage, not init stage + + def __init__(self, indexer, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.RETRIEVER, + comp_subtype=RetrieverType.BM25, + ) + self._docstore = indexer._docstore + self.topk = kwargs["similarity_top_k"] + + def run(self, **kwargs) -> Any: + for k, v in kwargs.items(): + if k == "query": + nodes = cast(List[BaseNode], list(self._docstore.docs.values())) + bm25_retr = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=self.topk) + return bm25_retr.retrieve(v) + + return None diff --git a/EdgeCraftRAG/edgecraftrag/context.py b/EdgeCraftRAG/edgecraftrag/context.py new file mode 100644 index 000000000..3555ce4be --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/context.py @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from edgecraftrag.controllers.compmgr import GeneratorMgr, IndexerMgr, NodeParserMgr, PostProcessorMgr, RetrieverMgr +from edgecraftrag.controllers.filemgr import FilelMgr +from edgecraftrag.controllers.modelmgr import ModelMgr +from edgecraftrag.controllers.nodemgr import NodeMgr +from edgecraftrag.controllers.pipelinemgr import PipelineMgr + + +class Context: + + def __init__(self): + self.plmgr = PipelineMgr() + self.nodemgr = NodeMgr() + self.npmgr = NodeParserMgr() + self.idxmgr = IndexerMgr() + self.rtvmgr = RetrieverMgr() + self.ppmgr = PostProcessorMgr() + self.modmgr = ModelMgr() + self.genmgr = GeneratorMgr() + self.filemgr = FilelMgr() + + def get_pipeline_mgr(self): + return self.plmgr + + def get_node_mgr(self): + return self.nodemgr + + def get_node_parser_mgr(self): + return self.npmgr + + def get_indexer_mgr(self): + return self.idxmgr + + def get_retriever_mgr(self): + return self.rtvmgr + + def get_postprocessor_mgr(self): + return self.ppmgr + + def get_model_mgr(self): + return self.modmgr + + def get_generator_mgr(self): + return self.genmgr + + def get_file_mgr(self): + return self.filemgr + + +ctx = Context() diff --git a/EdgeCraftRAG/edgecraftrag/controllers/__init__.py b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py new file mode 100644 index 000000000..b8dd82ab7 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py @@ -0,0 +1,66 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn +from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType, NodeParserType + + +class NodeParserMgr(BaseMgr): + + def __init__(self): + super().__init__() + + def search_parser(self, npin: NodeParserIn) -> BaseComponent: + for _, v in self.components.items(): + v_parser_type = v.comp_subtype + if v_parser_type == npin.parser_type: + if v_parser_type == NodeParserType.HIERARCHY and v.chunk_sizes == npin.chunk_sizes: + return v + elif v_parser_type == NodeParserType.SENTENCEWINDOW and v.window_size == npin.window_size: + return v + elif ( + v_parser_type == NodeParserType.SIMPLE + and v.chunk_size == npin.chunk_size + and v.chunk_overlap == npin.chunk_overlap + ): + return v + return None + + +class IndexerMgr(BaseMgr): + + def __init__(self): + super().__init__() + + def search_indexer(self, indin: IndexerIn) -> BaseComponent: + for _, v in self.components.items(): + if v.comp_subtype == indin.indexer_type: + if ( + hasattr(v, "model") + and v.model + and indin.embedding_model + and ( + (v.model.model_id_or_path == indin.embedding_model.model_id) + or (v.model.model_id_or_path == indin.embedding_model.model_path) + ) + ): + return v + return None + + +class RetrieverMgr(BaseMgr): + + def __init__(self): + super().__init__() + + +class PostProcessorMgr(BaseMgr): + + def __init__(self): + super().__init__() + + +class GeneratorMgr(BaseMgr): + + def __init__(self): + super().__init__() diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py new file mode 100644 index 000000000..0278f1f6a --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py @@ -0,0 +1,83 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import os +from typing import Any, Callable, List, Optional + +from edgecraftrag.base import BaseMgr +from edgecraftrag.components.data import File +from llama_index.core.schema import Document + + +class FilelMgr(BaseMgr): + + def __init__(self): + super().__init__() + + def add_text(self, text: str): + file = File(file_name="text", content=text) + self.add(file) + return file.documents + + def add_files(self, docs: Any): + if not isinstance(docs, list): + docs = [docs] + + input_docs = [] + for doc in docs: + if not os.path.exists(doc): + continue + + if os.path.isfile(doc): + files = [doc] + elif os.path.isdir(doc): + files = [os.path.join(root, f) for root, _, files in os.walk(doc) for f in files] + else: + continue + + if not files: + continue + + for file_path in files: + file = File(file_path=file_path) + self.add(file) + input_docs.extend(file.documents) + + return input_docs + + def get_file_by_name_or_id(self, name: str): + for _, file in self.components.items(): + if file.name == name or file.idx == name: + return file + return None + + def get_files(self): + return [file for _, file in self.components.items()] + + def get_all_docs(self) -> List[Document]: + all_docs = [] + for _, file in self.components.items(): + all_docs.extend(file.documents) + return all_docs + + def get_docs_by_file(self, name) -> List[Document]: + file = self.get_file_by_name_or_id(name) + return file.documents if file else [] + + def del_file(self, name): + file = self.get_file_by_name_or_id(name) + if file: + self.remove(file.idx) + return True + else: + return False + + def update_file(self, name): + file = self.get_file_by_name_or_id(name) + if file: + self.remove(file.idx) + self.add_files(docs=name) + return True + else: + return False diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py new file mode 100644 index 000000000..73a77e48a --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py @@ -0,0 +1,94 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio + +from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn +from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType +from edgecraftrag.components.model import OpenVINOEmbeddingModel, OpenVINOLLMModel, OpenVINORerankModel + + +class ModelMgr(BaseMgr): + + def __init__(self): + self._lock = asyncio.Lock() + super().__init__() + + def get_model_by_name(self, name: str): + for _, v in self.components.items(): + if v.model_id == name: + model_type = v.comp_subtype.value + model_info = { + "model_type": model_type, + "model_id": getattr(v, "model_id", "Unknown"), + } + if model_type == ModelType.LLM: + model_info["model_path"] = getattr(v, "model_name", "Unknown") + model_info["device"] = getattr(v, "device_map", "Unknown") + else: + model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown") + model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown")) + return model_info + return None + + def get_models(self): + model = {} + for k, v in self.components.items(): + # Supplement the information of the model + model_type = v.comp_subtype.value + model_info = { + "model_type": model_type, + "model_id": getattr(v, "model_id", "Unknown"), + } + if model_type == ModelType.LLM: + model_info["model_path"] = getattr(v, "model_name", "Unknown") + model_info["device"] = getattr(v, "device_map", "Unknown") + else: + model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown") + model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown")) + model[k] = model_info + return model + + def search_model(self, modelin: ModelIn) -> BaseComponent: + # Compare model_path and device to search model + for _, v in self.components.items(): + model_path = v.model_name if v.comp_subtype.value == "llm" else v.model_id_or_path + model_dev = ( + v.device_map + if v.comp_subtype.value == "llm" + else getattr(v, "device", getattr(v, "_device", "Unknown")) + ) + if model_path == modelin.model_path and model_dev == modelin.device: + return v + return None + + def del_model_by_name(self, name: str): + for key, v in self.components.items(): + if v and v.model_id == name: + self.remove(key) + return "Model deleted" + return "Model not found" + + @staticmethod + def load_model(model_para: ModelIn): + model = None + match model_para.model_type: + case ModelType.EMBEDDING: + model = OpenVINOEmbeddingModel( + model_id=model_para.model_id, + model_path=model_para.model_path, + device=model_para.device, + ) + case ModelType.RERANKER: + model = OpenVINORerankModel( + model_id=model_para.model_id, + model_path=model_para.model_path, + device=model_para.device, + ) + case ModelType.LLM: + model = OpenVINOLLMModel( + model_id=model_para.model_id, + model_path=model_para.model_path, + device=model_para.device, + ) + return model diff --git a/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py new file mode 100644 index 000000000..13a41117c --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import List + +from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn +from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType +from llama_index.core.schema import BaseNode + + +class NodeMgr: + + def __init__(self): + self.nodes = {} + + # idx: index of node_parser + def add_nodes(self, np_idx, nodes): + if np_idx in self.nodes: + self.nodes[np_idx].append(nodes) + else: + self.nodes[np_idx] = nodes + + # TODO: to be implemented + def del_nodes(self, nodes): + pass + + def del_nodes_by_np_idx(self, np_idx): + del self.nodes[np_idx] + + def get_nodes(self, np_idx) -> List[BaseNode]: + if np_idx in self.nodes: + return self.nodes[np_idx] + else: + return [] diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py new file mode 100644 index 000000000..d0b8e0780 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -0,0 +1,79 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +from typing import Any, List + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.base import BaseMgr, CallbackType +from edgecraftrag.components.pipeline import Pipeline +from edgecraftrag.controllers.nodemgr import NodeMgr +from llama_index.core.schema import Document + + +class PipelineMgr(BaseMgr): + + def __init__(self): + self._active_pipeline = None + self._lock = asyncio.Lock() + super().__init__() + + def create_pipeline(self, name: str): + pl = Pipeline(name) + self.add(pl) + return pl + + def get_pipeline_by_name_or_id(self, name: str): + for _, pl in self.components.items(): + if pl.name == name or pl.idx == name: + return pl + return None + + def get_pipelines(self): + return [pl for _, pl in self.components.items()] + + def activate_pipeline(self, name: str, active: bool, nm: NodeMgr): + pl = self.get_pipeline_by_name_or_id(name) + nodelist = None + if pl is not None: + if not active: + pl.status.active = False + self._active_pipeline = None + return + if pl.node_changed: + nodelist = nm.get_nodes(pl.node_parser.idx) + pl.check_active(nodelist) + prevactive = self._active_pipeline + if prevactive: + prevactive.status.active = False + pl.status.active = True + self._active_pipeline = pl + + def get_active_pipeline(self) -> Pipeline: + return self._active_pipeline + + def notify_node_change(self): + for _, pl in self.components.items(): + pl.set_node_change() + + def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any: + ap = self.get_active_pipeline() + out = None + if ap is not None: + out = ap.run(cbtype=CallbackType.PIPELINE, chat_request=chat_request) + return out + return -1 + + def run_retrieve(self, chat_request: ChatCompletionRequest) -> Any: + ap = self.get_active_pipeline() + out = None + if ap is not None: + out = ap.run(cbtype=CallbackType.RETRIEVE, chat_request=chat_request) + return out + return -1 + + def run_data_prepare(self, docs: List[Document]) -> Any: + ap = self.get_active_pipeline() + if ap is not None: + return ap.run(cbtype=CallbackType.DATAPREP, docs=docs) + return -1 diff --git a/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt new file mode 100644 index 000000000..aa57e6059 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt @@ -0,0 +1,8 @@ +<|im_start|>System: You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge.<|im_end|> + +<|im_start|>{context}<|im_end|> + +<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|> +<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|> + +<|im_start|>{input} \ No newline at end of file diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt new file mode 100644 index 000000000..3756c732a --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/requirements.txt @@ -0,0 +1,16 @@ +docx2txt +faiss-cpu>=1.8.0.post1 +gradio>=4.44.1 +langchain-core==0.2.29 +llama-index>=0.11.0 +llama-index-embeddings-openvino>=0.4.0 +llama-index-llms-openai-like>=0.2.0 +llama-index-llms-openvino>=0.3.1 +llama-index-postprocessor-openvino-rerank>=0.3.0 +llama-index-retrievers-bm25>=0.3.0 +llama-index-vector-stores-faiss>=0.2.1 +loguru>=0.7.2 +omegaconf>=2.3.0 +opea-comps>=0.9 +py-cpuinfo>=9.0.0 +uvicorn>=0.30.6 diff --git a/EdgeCraftRAG/edgecraftrag/server.py b/EdgeCraftRAG/edgecraftrag/server.py new file mode 100644 index 000000000..705c3f07b --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/server.py @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import uvicorn +from edgecraftrag.api.v1.chatqna import chatqna_app +from edgecraftrag.api.v1.data import data_app +from edgecraftrag.api.v1.model import model_app +from edgecraftrag.api.v1.pipeline import pipeline_app +from fastapi import FastAPI +from llama_index.core.settings import Settings + +app = FastAPI() + +sub_apps = [data_app, model_app, pipeline_app, chatqna_app] +for sub_app in sub_apps: + for route in sub_app.routes: + app.router.routes.append(route) + + +if __name__ == "__main__": + Settings.llm = None + + host = os.getenv("PIPELINE_SERVICE_HOST_IP", "0.0.0.0") + port = int(os.getenv("PIPELINE_SERVICE_PORT", 16010)) + uvicorn.run(app, host=host, port=port) diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json new file mode 100644 index 000000000..18895d6e5 --- /dev/null +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -0,0 +1,41 @@ +{ + "name": "rag_test_local_llm", + "node_parser": { + "chunk_size": 400, + "chunk_overlap": 48, + "parser_type": "simple" + }, + "indexer": { + "indexer_type": "faiss_vector", + "embedding_model": { + "model_id": "BAAI/bge-small-en-v1.5", + "model_path": "./models/bge_ov_embedding", + "device": "auto" + } + }, + "retriever": { + "retriever_type": "vectorsimilarity", + "retrieve_topk": 30 + }, + "postprocessor": [ + { + "processor_type": "reranker", + "top_n": 2, + "reranker_model": { + "model_id": "BAAI/bge-reranker-large", + "model_path": "./models/bge_ov_reranker", + "device": "auto" + } + } + ], + "generator": { + "model": { + "model_id": "Qwen/Qwen2-7B-Instruct", + "model_path": "./models/qwen2-7b-instruct/INT4_compressed_weights", + "device": "cpu" + }, + "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt", + "inference_type": "local" + }, + "active": "True" +} diff --git a/EdgeCraftRAG/ui/docker/Dockerfile.ui b/EdgeCraftRAG/ui/docker/Dockerfile.ui new file mode 100644 index 000000000..d8e5eec8e --- /dev/null +++ b/EdgeCraftRAG/ui/docker/Dockerfile.ui @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY ./ui/gradio /home/user/ui +COPY ./edgecraftrag /home/user/edgecraftrag + +WORKDIR /home/user/edgecraftrag +RUN pip install --no-cache-dir -r requirements.txt + +WORKDIR /home/user/ui + +USER user + +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc + +ENTRYPOINT ["python", "ecragui.py"] \ No newline at end of file diff --git a/EdgeCraftRAG/ui/gradio/__init__.py b/EdgeCraftRAG/ui/gradio/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png new file mode 100644 index 000000000..527b9ad94 Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png differ diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png new file mode 100644 index 000000000..707ddd251 Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png differ diff --git a/EdgeCraftRAG/ui/gradio/config.py b/EdgeCraftRAG/ui/gradio/config.py new file mode 100644 index 000000000..477aba7c2 --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/config.py @@ -0,0 +1,358 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +DEFAULT_SYSTEM_PROMPT = """\ +You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +If a question does not make any sense or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\ +""" + +DEFAULT_SYSTEM_PROMPT_CHINESE = """\ +你是一个乐于助人、尊重他人以及诚实可靠的助手。在安全的情况下,始终尽可能有帮助地回答。 您的回答不应包含任何有害、不道德、种族主义、性别歧视、有毒、危险或非法的内容。请确保您的回答在社会上是公正的和积极的。 +如果一个问题没有任何意义或与事实不符,请解释原因,而不是回答错误的问题。如果您不知道问题的答案,请不要分享虚假信息。另外,答案请使用中文。\ +""" + +DEFAULT_SYSTEM_PROMPT_JAPANESE = """\ +あなたは親切で、礼儀正しく、誠実なアシスタントです。 常に安全を保ちながら、できるだけ役立つように答えてください。 回答には、有害、非倫理的、人種差別的、性差別的、有毒、危険、または違法なコンテンツを含めてはいけません。 回答は社会的に偏見がなく、本質的に前向きなものであることを確認してください。 +質問が意味をなさない場合、または事実に一貫性がない場合は、正しくないことに答えるのではなく、その理由を説明してください。 質問の答えがわからない場合は、誤った情報を共有しないでください。\ +""" + +DEFAULT_RAG_PROMPT = """\ +You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\ +""" + +DEFAULT_RAG_PROMPT_CHINESE = """\ +基于以下已知信息,请简洁并专业地回答用户的问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"。不允许在答案中添加编造成分。另外,答案请使用中文。\ +""" + + +def red_pijama_partial_text_processor(partial_text, new_text): + if new_text == "<": + return partial_text + + partial_text += new_text + return partial_text.split(":")[-1] + + +def llama_partial_text_processor(partial_text, new_text): + new_text = new_text.replace("[INST]", "").replace("[/INST]", "") + partial_text += new_text + return partial_text + + +def chatglm_partial_text_processor(partial_text, new_text): + new_text = new_text.strip() + new_text = new_text.replace("[[训练时间]]", "2023年") + partial_text += new_text + return partial_text + + +def youri_partial_text_processor(partial_text, new_text): + new_text = new_text.replace("システム:", "") + partial_text += new_text + return partial_text + + +def internlm_partial_text_processor(partial_text, new_text): + partial_text += new_text + return partial_text.split("<|im_end|>")[0] + + +SUPPORTED_LLM_MODELS = { + "English": { + "tiny-llama-1b-chat": { + "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "remote_code": False, + "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}\n", + "history_template": "<|user|>\n{user} \n<|assistant|>\n{assistant} \n", + "current_message_template": "<|user|>\n{user} \n<|assistant|>\n{assistant}", + "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }""" + + """ + <|user|> + Question: {input} + Context: {context} + Answer: + <|assistant|>""", + }, + "gemma-2b-it": { + "model_id": "google/gemma-2b-it", + "remote_code": False, + "start_message": DEFAULT_SYSTEM_PROMPT + ", ", + "history_template": "user{user}model{assistant}", + "current_message_template": "user{user}model{assistant}", + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" + + """user{input}context{context}model""", + }, + "red-pajama-3b-chat": { + "model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "remote_code": False, + "start_message": "", + "history_template": "\n:{user}\n:{assistant}", + "stop_tokens": [29, 0], + "partial_text_processor": red_pijama_partial_text_processor, + "current_message_template": "\n:{user}\n:{assistant}", + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT }""" + + """ + : Question: {input} + Context: {context} + Answer: """, + }, + "gemma-7b-it": { + "model_id": "google/gemma-7b-it", + "remote_code": False, + "start_message": DEFAULT_SYSTEM_PROMPT + ", ", + "history_template": "user{user}model{assistant}", + "current_message_template": "user{user}model{assistant}", + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" + + """user{input}context{context}model""", + }, + "llama-2-chat-7b": { + "model_id": "meta-llama/Llama-2-7b-chat-hf", + "remote_code": False, + "start_message": f"[INST] <>\n{DEFAULT_SYSTEM_PROMPT }\n<>\n\n", + "history_template": "{user}[/INST]{assistant}[INST]", + "current_message_template": "{user} [/INST]{assistant}", + "tokenizer_kwargs": {"add_special_tokens": False}, + "partial_text_processor": llama_partial_text_processor, + "rag_prompt_template": f"""[INST]Human: <> {DEFAULT_RAG_PROMPT }<>""" + + """ + Question: {input} + Context: {context} + Answer: [/INST]""", + }, + "mpt-7b-chat": { + "model_id": "mosaicml/mpt-7b-chat", + "remote_code": False, + "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT }<|im_end|>", + "history_template": "<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}<|im_end|>", + "current_message_template": '"<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}', + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + "rag_prompt_template": f"""<|im_start|>system + {DEFAULT_RAG_PROMPT }<|im_end|>""" + + """ + <|im_start|>user + Question: {input} + Context: {context} + Answer: <|im_start|>assistant""", + }, + "mistral-7b": { + "model_id": "mistralai/Mistral-7B-v0.1", + "remote_code": False, + "start_message": f"[INST] <>\n{DEFAULT_SYSTEM_PROMPT }\n<>\n\n", + "history_template": "{user}[/INST]{assistant}[INST]", + "current_message_template": "{user} [/INST]{assistant}", + "tokenizer_kwargs": {"add_special_tokens": False}, + "partial_text_processor": llama_partial_text_processor, + "rag_prompt_template": f""" [INST] {DEFAULT_RAG_PROMPT } [/INST] """ + + """ + [INST] Question: {input} + Context: {context} + Answer: [/INST]""", + }, + "zephyr-7b-beta": { + "model_id": "HuggingFaceH4/zephyr-7b-beta", + "remote_code": False, + "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}\n", + "history_template": "<|user|>\n{user} \n<|assistant|>\n{assistant} \n", + "current_message_template": "<|user|>\n{user} \n<|assistant|>\n{assistant}", + "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }""" + + """ + <|user|> + Question: {input} + Context: {context} + Answer: + <|assistant|>""", + }, + "notus-7b-v1": { + "model_id": "argilla/notus-7b-v1", + "remote_code": False, + "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}\n", + "history_template": "<|user|>\n{user} \n<|assistant|>\n{assistant} \n", + "current_message_template": "<|user|>\n{user} \n<|assistant|>\n{assistant}", + "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }""" + + """ + <|user|> + Question: {input} + Context: {context} + Answer: + <|assistant|>""", + }, + "neural-chat-7b-v3-1": { + "model_id": "Intel/neural-chat-7b-v3-3", + "remote_code": False, + "start_message": f"[INST] <>\n{DEFAULT_SYSTEM_PROMPT }\n<>\n\n", + "history_template": "{user}[/INST]{assistant}[INST]", + "current_message_template": "{user} [/INST]{assistant}", + "tokenizer_kwargs": {"add_special_tokens": False}, + "partial_text_processor": llama_partial_text_processor, + "rag_prompt_template": f""" [INST] {DEFAULT_RAG_PROMPT } [/INST] """ + + """ + [INST] Question: {input} + Context: {context} + Answer: [/INST]""", + }, + }, + "Chinese": { + "qwen1.5-0.5b-chat": { + "model_id": "Qwen/Qwen1.5-0.5B-Chat", + "remote_code": False, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + }, + "qwen1.5-7b-chat": { + "model_id": "Qwen/Qwen1.5-7B-Chat", + "remote_code": False, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + "summarization_prompt_template": """ + <|im_start|>user + 问题: 总结下文内容,不少于{character_num}字. + 已知内容: {text} + 回答: <|im_end|><|im_start|>assistant""", + "split_summary_template": """ + <|im_start|>user + 问题: 根据已知内容写一篇简短的摘要. + 已知内容: {text} + 回答: <|im_end|><|im_start|>assistant""", + "combine_summary_template": """ + <|im_start|>user + 问题: 根据已知内容写一篇摘要,不少于{character_num}字. + 已知内容: {text} + 回答: <|im_end|><|im_start|>assistant""", + "rag_prompt_template": f"""<|im_start|>system + {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" + + """ + <|im_start|>user + 问题: {input} + 已知内容: {context} + 回答: <|im_end|><|im_start|>assistant""", + }, + "qwen-7b-chat": { + "model_id": "Qwen/Qwen-7B-Chat", + "remote_code": True, + "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>", + "history_template": "<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}<|im_end|>", + "current_message_template": '"<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}', + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d", + "rag_prompt_template": f"""<|im_start|>system + {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" + + """ + <|im_start|>user + 问题: {input} + 已知内容: {context} + 回答: <|im_end|><|im_start|>assistant""", + }, + "qwen2-7b-instruct": { + "model_id": "Qwen/Qwen2-7B-Instruct", + "remote_code": True, + "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>", + "history_template": "<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}<|im_end|>", + "current_message_template": '"<|im_start|>user\n{user}<|im_start|>assistant\n{assistant}', + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d", + "rag_prompt_template": f"""<|im_start|>system + {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" + + """ + <|im_start|>user + 问题: {input} + 已知内容: {context} + 回答: <|im_end|><|im_start|>assistant""", + }, + "chatglm3-6b": { + "model_id": "THUDM/chatglm3-6b", + "remote_code": True, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "tokenizer_kwargs": {"add_special_tokens": False}, + "stop_tokens": [0, 2], + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }""" + + """ + 问题: {input} + 已知内容: {context} + 回答: + """, + }, + "baichuan2-7b-chat": { + "model_id": "baichuan-inc/Baichuan2-7B-Chat", + "remote_code": True, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "tokenizer_kwargs": {"add_special_tokens": False}, + "stop_tokens": [0, 2], + "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }""" + + """ + 问题: {input} + 已知内容: {context} + 回答: + """, + }, + "minicpm-2b-dpo": { + "model_id": "openbmb/MiniCPM-2B-dpo-fp16", + "remote_code": True, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "stop_tokens": [2], + }, + "internlm2-chat-1.8b": { + "model_id": "internlm/internlm2-chat-1_8b", + "remote_code": True, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "stop_tokens": [2, 92542], + "partial_text_processor": internlm_partial_text_processor, + }, + "qwen1.5-1.8b-chat": { + "model_id": "Qwen/Qwen1.5-1.8B-Chat", + "remote_code": False, + "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, + "stop_tokens": ["<|im_end|>", "<|endoftext|>"], + "rag_prompt_template": f"""<|im_start|>system + {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" + + """ + <|im_start|>user + 问题: {input} + 已知内容: {context} + 回答: <|im_end|><|im_start|>assistant""", + }, + }, + "Japanese": { + "youri-7b-chat": { + "model_id": "rinna/youri-7b-chat", + "remote_code": False, + "start_message": f"設定: {DEFAULT_SYSTEM_PROMPT_JAPANESE}\n", + "history_template": "ユーザー: {user}\nシステム: {assistant}\n", + "current_message_template": "ユーザー: {user}\nシステム: {assistant}", + "tokenizer_kwargs": {"add_special_tokens": False}, + "partial_text_processor": youri_partial_text_processor, + }, + }, +} + +SUPPORTED_EMBEDDING_MODELS = { + "English": { + "bge-small-en-v1.5": { + "model_id": "BAAI/bge-small-en-v1.5", + "mean_pooling": False, + "normalize_embeddings": True, + }, + "bge-large-en-v1.5": { + "model_id": "BAAI/bge-large-en-v1.5", + "mean_pooling": False, + "normalize_embeddings": True, + }, + }, + "Chinese": { + "bge-small-zh-v1.5": { + "model_id": "BAAI/bge-small-zh-v1.5", + "mean_pooling": False, + "normalize_embeddings": True, + }, + "bge-large-zh-v1.5": { + "model_id": "bge-large-zh-v1.5", + "mean_pooling": False, + "normalize_embeddings": True, + }, + }, +} + + +SUPPORTED_RERANK_MODELS = { + "bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"}, + "bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"}, +} diff --git a/EdgeCraftRAG/ui/gradio/default.yaml b/EdgeCraftRAG/ui/gradio/default.yaml new file mode 100644 index 000000000..1421da8f4 --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/default.yaml @@ -0,0 +1,49 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Model language for LLM +model_language: "Chinese" +vector_db: "FAISS" +splitter_name: "RecursiveCharacter" +k_rerank: 5 +search_method: "similarity" +score_threshold: 0.5 +bm25_weight: 0 + +# Pipeline +name: "default" + +# Node parser +node_parser: "simple" +chunk_size: 192 +chunk_overlap: 48 + +# Indexer +indexer: "faiss_vector" + +# Retriever +retriever: "vectorsimilarity" +k_retrieval: 30 + +# Post Processor +postprocessor: "reranker" + +# Generator +generator: "local" +prompt_path: "./data/default_prompt.txt" + +# Models +embedding_model_id: "BAAI/bge-small-en-v1.5" +embedding_model_path: "./bge_ov_embedding" +# Device for embedding model inference +embedding_device: "AUTO" + +rerank_model_id: "BAAI/bge-reranker-large" +rerank_model_path: "./bge_ov_reranker" +# Device for reranking model inference +rerank_device: "AUTO" + +llm_model_id: "qwen2-7b-instruct" +llm_model_path: "./qwen2-7b-instruct/INT4_compressed_weights" +# Device for LLM model inference +llm_device: "AUTO" diff --git a/EdgeCraftRAG/ui/gradio/ecrag_client.py b/EdgeCraftRAG/ui/gradio/ecrag_client.py new file mode 100644 index 000000000..47b5f776d --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/ecrag_client.py @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import sys + +import requests + +sys.path.append("..") +import os + +from edgecraftrag import api_schema + +PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1") +PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010)) +server_addr = f"http://{PIPELINE_SERVICE_HOST_IP}:{PIPELINE_SERVICE_PORT}" + + +def get_current_pipelines(): + res = requests.get(f"{server_addr}/v1/settings/pipelines", proxies={"http": None}) + pls = [] + for pl in res.json(): + if pl["status"]["active"]: + pls.append((pl["idx"], pl["name"] + " (active)")) + else: + pls.append((pl["idx"], pl["name"])) + return pls + + +def get_pipeline(name): + res = requests.get(f"{server_addr}/v1/settings/pipelines/{name}", proxies={"http": None}) + return res.json() + + +def create_update_pipeline( + name, + active, + node_parser, + chunk_size, + chunk_overlap, + indexer, + retriever, + vector_search_top_k, + postprocessor, + generator, + llm_id, + llm_device, + llm_weights, + embedding_id, + embedding_device, + rerank_id, + rerank_device, +): + req_dict = api_schema.PipelineCreateIn( + name=name, + active=active, + node_parser=api_schema.NodeParserIn( + parser_type=node_parser, chunk_size=chunk_size, chunk_overlap=chunk_overlap + ), + indexer=api_schema.IndexerIn( + indexer_type=indexer, + embedding_model=api_schema.ModelIn( + model_id=embedding_id, + # TODO: remove hardcoding + model_path="./bge_ov_embedding", + device=embedding_device, + ), + ), + retriever=api_schema.RetrieverIn(retriever_type=retriever, retriever_topk=vector_search_top_k), + postprocessor=[ + api_schema.PostProcessorIn( + processor_type=postprocessor[0], + reranker_model=api_schema.ModelIn( + model_id=rerank_id, + # TODO: remove hardcoding + model_path="./bge_ov_reranker", + device=rerank_device, + ), + ) + ], + generator=api_schema.GeneratorIn( + # TODO: remove hardcoding + prompt_path="./edgecraftrag/prompt_template/default_prompt.txt", + model=api_schema.ModelIn( + model_id=llm_id, + # TODO: remove hardcoding + model_path="./models/qwen2-7b-instruct/INT4_compressed_weights", + device=llm_device, + ), + ), + ) + # hard code only for test + print(req_dict) + res = requests.post(f"{server_addr}/v1/settings/pipelines", json=req_dict.dict(), proxies={"http": None}) + return res.text + + +def activate_pipeline(name): + active_dict = {"active": "True"} + res = requests.patch(f"{server_addr}/v1/settings/pipelines/{name}", json=active_dict, proxies={"http": None}) + status = False + restext = f"Activate pipeline {name} failed." + if res.ok: + status = True + restext = f"Activate pipeline {name} successfully." + return restext, status + + +def create_vectordb(docs, spliter, vector_db): + req_dict = api_schema.FilesIn(local_paths=docs) + res = requests.post(f"{server_addr}/v1/data/files", json=req_dict.dict(), proxies={"http": None}) + return res.text + + +def get_files(): + res = requests.get(f"{server_addr}/v1/data/files", proxies={"http": None}) + files = [] + for file in res.json(): + files.append((file["file_name"], file["file_id"])) + return files + + +def delete_file(file_name_or_id): + res = requests.delete(f"{server_addr}/v1/data/files/{file_name_or_id}", proxies={"http": None}) + return res.text diff --git a/EdgeCraftRAG/ui/gradio/ecragui.py b/EdgeCraftRAG/ui/gradio/ecragui.py new file mode 100644 index 000000000..3c198bf2a --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/ecragui.py @@ -0,0 +1,983 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +import platform +import re +from datetime import datetime +from pathlib import Path + +import cpuinfo +import distro # if running Python 3.8 or above +import ecrag_client as cli +import gradio as gr +import httpx + +# Creation of the ModelLoader instance and loading models remain the same +import platform_config as pconf +import psutil +import requests +from loguru import logger +from omegaconf import OmegaConf +from platform_config import get_available_devices, get_available_weights, get_local_available_models + +pipeline_df = [] + +import os + +MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1") +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011)) +UI_SERVICE_HOST_IP = os.getenv("UI_SERVICE_HOST_IP", "0.0.0.0") +UI_SERVICE_PORT = int(os.getenv("UI_SERVICE_PORT", 8084)) + + +def get_llm_model_dir(llm_model_id, weights_compression): + model_dirs = { + "fp16_model_dir": Path(llm_model_id) / "FP16", + "int8_model_dir": Path(llm_model_id) / "INT8_compressed_weights", + "int4_model_dir": Path(llm_model_id) / "INT4_compressed_weights", + } + + if weights_compression == "INT4": + model_dir = model_dirs["int4_model_dir"] + elif weights_compression == "INT8": + model_dir = model_dirs["int8_model_dir"] + else: + model_dir = model_dirs["fp16_model_dir"] + + if not model_dir.exists(): + raise FileNotFoundError(f"The model directory {model_dir} does not exist.") + elif not model_dir.is_dir(): + raise NotADirectoryError(f"The path {model_dir} is not a directory.") + + return model_dir + + +def get_system_status(): + cpu_usage = psutil.cpu_percent(interval=1) + memory_info = psutil.virtual_memory() + memory_usage = memory_info.percent + memory_total_gb = memory_info.total / (1024**3) + memory_used_gb = memory_info.used / (1024**3) + # uptime_seconds = time.time() - psutil.boot_time() + # uptime_hours, uptime_minutes = divmod(uptime_seconds // 60, 60) + disk_usage = psutil.disk_usage("/").percent + # net_io = psutil.net_io_counters() + os_info = platform.uname() + kernel_version = os_info.release + processor = cpuinfo.get_cpu_info()["brand_raw"] + dist_name = distro.name(pretty=True) + + now = datetime.now() + current_time_str = now.strftime("%Y-%m-%d %H:%M") + + status = ( + f"{current_time_str} \t" + f"CPU Usage: {cpu_usage}% \t" + f"Memory Usage: {memory_usage}% {memory_used_gb:.2f}GB / {memory_total_gb:.2f}GB \t" + # f"System Uptime: {int(uptime_hours)} hours, {int(uptime_minutes)} minutes \t" + f"Disk Usage: {disk_usage}% \t" + # f"Bytes Sent: {net_io.bytes_sent}\n" + # f"Bytes Received: {net_io.bytes_recv}\n" + f"Kernel: {kernel_version} \t" + f"Processor: {processor} \t" + f"OS: {dist_name} \n" + ) + return status + + +def build_demo(cfg, args): + + def load_chatbot_models( + llm_id, + llm_device, + llm_weights, + embedding_id, + embedding_device, + rerank_id, + rerank_device, + ): + req_dict = { + "llm_id": llm_id, + "llm_device": llm_device, + "llm_weights": llm_weights, + "embedding_id": embedding_id, + "embedding_device": embedding_device, + "rerank_id": rerank_id, + "rerank_device": rerank_device, + } + # hard code only for test + worker_addr = "http://127.0.0.1:8084" + print(req_dict) + result = requests.post(f"{worker_addr}/load", json=req_dict, proxies={"http": None}) + return result.text + + def user(message, history): + """Callback function for updating user messages in interface on submit button click. + + Params: + message: current message + history: conversation history + Returns: + None + """ + # Append the user's message to the conversation history + return "", history + [[message, ""]] + + async def bot( + history, + temperature, + top_p, + top_k, + repetition_penalty, + hide_full_prompt, + do_rag, + docs, + spliter_name, + vector_db, + chunk_size, + chunk_overlap, + vector_search_top_k, + vector_search_top_n, + run_rerank, + search_method, + score_threshold, + ): + """Callback function for running chatbot on submit button click. + + Params: + history: conversation history + temperature: parameter for control the level of creativity in AI-generated text. + By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. + top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. + top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. + repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. + conversation_id: unique conversation identifier. + """ + # req_dict = { + # "history": history, + # "temperature": temperature, + # "top_p": top_p, + # "top_k": top_k, + # "repetition_penalty": repetition_penalty, + # "hide_full_prompt": hide_full_prompt, + # "do_rag": do_rag, + # "docs": docs, + # "spliter_name": spliter_name, + # "vector_db": vector_db, + # "chunk_size": chunk_size, + # "chunk_overlap": chunk_overlap, + # "vector_search_top_k": vector_search_top_k, + # "vector_search_top_n": vector_search_top_n, + # "run_rerank": run_rerank, + # "search_method": search_method, + # "score_threshold": score_threshold, + # "streaming": True + # } + print(history) + new_req = {"messages": history[-1][0]} + server_addr = f"http://{MEGA_SERVICE_HOST_IP}:{MEGA_SERVICE_PORT}" + + # Async for streaming response + partial_text = "" + async with httpx.AsyncClient() as client: + async with client.stream("POST", f"{server_addr}/v1/chatqna", json=new_req, timeout=None) as response: + partial_text = "" + async for chunk in response.aiter_lines(): + new_text = chunk + if new_text.startswith("data"): + new_text = re.sub(r"\r\n", "", chunk.split("data: ")[-1]) + new_text = json.loads(chunk)["choices"][0]["message"]["content"] + partial_text = partial_text + new_text + history[-1][1] = partial_text + yield history + + avail_llms = get_local_available_models("llm") + avail_embed_models = get_local_available_models("embed") + avail_rerank_models = get_local_available_models("rerank") + avail_devices = get_available_devices() + avail_weights_compression = get_available_weights() + avail_node_parsers = pconf.get_available_node_parsers() + avail_indexers = pconf.get_available_indexers() + avail_retrievers = pconf.get_available_retrievers() + avail_postprocessors = pconf.get_available_postprocessors() + avail_generators = pconf.get_available_generators() + + css = """ + .feedback textarea {font-size: 18px; !important } + #blude_border {border: 1px solid #0000FF} + #white_border {border: 2px solid #FFFFFF} + .test textarea {color: E0E0FF; border: 1px solid #0000FF} + .disclaimer {font-variant-caps: all-small-caps} + """ + + with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: + gr.HTML( + """ + + + + + + + +
+ + Sample Image + + + +
+

Edge Craft RAG based Q&A Chatbot

+
Powered by Intel NEXC Edge AI solutions
+
+
+ + + + """ + ) + _ = gr.Textbox( + label="System Status", + value=get_system_status, + max_lines=1, + every=1, + info="", + elem_id="white_border", + ) + + def get_pipeline_df(): + global pipeline_df + pipeline_df = cli.get_current_pipelines() + return pipeline_df + + # ------------------- + # RAG Settings Layout + # ------------------- + with gr.Tab("RAG Settings"): + with gr.Row(): + with gr.Column(scale=2): + u_pipelines = gr.Dataframe( + headers=["ID", "Name"], + column_widths=[70, 30], + value=get_pipeline_df, + label="Pipelines", + show_label=True, + interactive=False, + every=5, + ) + + u_rag_pipeline_status = gr.Textbox(label="Status", value="", interactive=False) + + with gr.Column(scale=3): + with gr.Accordion("Pipeline Configuration"): + with gr.Row(): + rag_create_pipeline = gr.Button("Create Pipeline") + rag_activate_pipeline = gr.Button("Activate Pipeline") + rag_remove_pipeline = gr.Button("Remove Pipeline") + + with gr.Column(variant="panel"): + u_pipeline_name = gr.Textbox( + label="Name", + value=cfg.name, + interactive=True, + ) + u_active = gr.Checkbox( + value=True, + label="Activated", + interactive=True, + ) + + with gr.Column(variant="panel"): + with gr.Accordion("Node Parser"): + u_node_parser = gr.Dropdown( + choices=avail_node_parsers, + label="Node Parser", + value=cfg.node_parser, + info="Select a parser to split documents.", + multiselect=False, + interactive=True, + ) + u_chunk_size = gr.Slider( + label="Chunk size", + value=cfg.chunk_size, + minimum=100, + maximum=2000, + step=50, + interactive=True, + info="Size of sentence chunk", + ) + + u_chunk_overlap = gr.Slider( + label="Chunk overlap", + value=cfg.chunk_overlap, + minimum=0, + maximum=400, + step=1, + interactive=True, + info=("Overlap between 2 chunks"), + ) + + with gr.Column(variant="panel"): + with gr.Accordion("Indexer"): + u_indexer = gr.Dropdown( + choices=avail_indexers, + label="Indexer", + value=cfg.indexer, + info="Select an indexer for indexing content of the documents.", + multiselect=False, + interactive=True, + ) + + with gr.Accordion("Embedding Model Configuration"): + u_embed_model_id = gr.Dropdown( + choices=avail_embed_models, + value=cfg.embedding_model_id, + label="Embedding Model", + # info="Select a Embedding Model", + multiselect=False, + allow_custom_value=True, + ) + + u_embed_device = gr.Dropdown( + choices=avail_devices, + value=cfg.embedding_device, + label="Embedding run device", + # info="Run embedding model on which device?", + multiselect=False, + ) + + with gr.Column(variant="panel"): + with gr.Accordion("Retriever"): + u_retriever = gr.Dropdown( + choices=avail_retrievers, + value=cfg.retriever, + label="Retriever", + info="Select a retriever for retrieving context.", + multiselect=False, + interactive=True, + ) + u_vector_search_top_k = gr.Slider( + 1, + 50, + value=cfg.k_retrieval, + step=1, + label="Search top k", + info="Number of searching results, must >= Rerank top n", + interactive=True, + ) + + with gr.Column(variant="panel"): + with gr.Accordion("Postprocessor"): + u_postprocessor = gr.Dropdown( + choices=avail_postprocessors, + value=cfg.postprocessor, + label="Postprocessor", + info="Select postprocessors for post-processing of the context.", + multiselect=True, + interactive=True, + ) + + with gr.Accordion("Rerank Model Configuration", open=True): + u_rerank_model_id = gr.Dropdown( + choices=avail_rerank_models, + value=cfg.rerank_model_id, + label="Rerank Model", + # info="Select a Rerank Model", + multiselect=False, + allow_custom_value=True, + ) + + u_rerank_device = gr.Dropdown( + choices=avail_devices, + value=cfg.rerank_device, + label="Rerank run device", + # info="Run rerank model on which device?", + multiselect=False, + ) + + with gr.Column(variant="panel"): + with gr.Accordion("Generator"): + u_generator = gr.Dropdown( + choices=avail_generators, + value=cfg.generator, + label="Generator", + info="Select a generator for AI inference.", + multiselect=False, + interactive=True, + ) + + with gr.Accordion("LLM Configuration", open=True): + u_llm_model_id = gr.Dropdown( + choices=avail_llms, + value=cfg.llm_model_id, + label="Large Language Model", + # info="Select a Large Language Model", + multiselect=False, + allow_custom_value=True, + ) + + u_llm_device = gr.Dropdown( + choices=avail_devices, + value=cfg.llm_device, + label="LLM run device", + # info="Run LLM on which device?", + multiselect=False, + ) + + u_llm_weights = gr.Radio( + avail_weights_compression, + label="Weights", + info="weights compression", + ) + + # ------------------- + # RAG Settings Events + # ------------------- + # Event handlers + def show_pipeline_detail(evt: gr.SelectData): + # get selected pipeline id + # Dataframe: {'headers': '', 'data': [[x00, x01], [x10, x11]} + # SelectData.index: [i, j] + print(u_pipelines.value["data"]) + print(evt.index) + # always use pipeline id for indexing + selected_id = pipeline_df[evt.index[0]][0] + pl = cli.get_pipeline(selected_id) + # TODO: change to json fomart + # pl["postprocessor"][0]["processor_type"] + # pl["postprocessor"]["model"]["model_id"], pl["postprocessor"]["model"]["device"] + return ( + pl["name"], + pl["status"]["active"], + pl["node_parser"]["parser_type"], + pl["node_parser"]["chunk_size"], + pl["node_parser"]["chunk_overlap"], + pl["indexer"]["indexer_type"], + pl["retriever"]["retriever_type"], + pl["retriever"]["retrieve_topk"], + pl["generator"]["generator_type"], + pl["generator"]["model"]["model_id"], + pl["generator"]["model"]["device"], + "", + pl["indexer"]["model"]["model_id"], + pl["indexer"]["model"]["device"], + ) + + def modify_create_pipeline_button(): + return "Create Pipeline" + + def modify_update_pipeline_button(): + return "Update Pipeline" + + def create_update_pipeline( + name, + active, + node_parser, + chunk_size, + chunk_overlap, + indexer, + retriever, + vector_search_top_k, + postprocessor, + generator, + llm_id, + llm_device, + llm_weights, + embedding_id, + embedding_device, + rerank_id, + rerank_device, + ): + res = cli.create_update_pipeline( + name, + active, + node_parser, + chunk_size, + chunk_overlap, + indexer, + retriever, + vector_search_top_k, + postprocessor, + generator, + llm_id, + llm_device, + llm_weights, + embedding_id, + embedding_device, + rerank_id, + rerank_device, + ) + return res, get_pipeline_df() + + # Events + u_pipelines.select( + show_pipeline_detail, + inputs=None, + outputs=[ + u_pipeline_name, + u_active, + # node parser + u_node_parser, + u_chunk_size, + u_chunk_overlap, + # indexer + u_indexer, + # retriever + u_retriever, + u_vector_search_top_k, + # postprocessor + # u_postprocessor, + # generator + u_generator, + # models + u_llm_model_id, + u_llm_device, + u_llm_weights, + u_embed_model_id, + u_embed_device, + # u_rerank_model_id, + # u_rerank_device + ], + ) + + u_pipeline_name.input(modify_create_pipeline_button, inputs=None, outputs=rag_create_pipeline) + + # Create pipeline button will change to update pipeline button if any + # of the listed fields changed + gr.on( + triggers=[ + u_active.input, + # node parser + u_node_parser.input, + u_chunk_size.input, + u_chunk_overlap.input, + # indexer + u_indexer.input, + # retriever + u_retriever.input, + u_vector_search_top_k.input, + # postprocessor + u_postprocessor.input, + # generator + u_generator.input, + # models + u_llm_model_id.input, + u_llm_device.input, + u_llm_weights.input, + u_embed_model_id.input, + u_embed_device.input, + u_rerank_model_id.input, + u_rerank_device.input, + ], + fn=modify_update_pipeline_button, + inputs=None, + outputs=rag_create_pipeline, + ) + + rag_create_pipeline.click( + create_update_pipeline, + inputs=[ + u_pipeline_name, + u_active, + u_node_parser, + u_chunk_size, + u_chunk_overlap, + u_indexer, + u_retriever, + u_vector_search_top_k, + u_postprocessor, + u_generator, + u_llm_model_id, + u_llm_device, + u_llm_weights, + u_embed_model_id, + u_embed_device, + u_rerank_model_id, + u_rerank_device, + ], + outputs=[u_rag_pipeline_status, u_pipelines], + queue=False, + ) + + rag_activate_pipeline.click( + cli.activate_pipeline, + inputs=[u_pipeline_name], + outputs=[u_rag_pipeline_status, u_active], + queue=False, + ) + + # -------------- + # Chatbot Layout + # -------------- + def get_files(): + return cli.get_files() + + def create_vectordb(docs, spliter, vector_db): + res = cli.create_vectordb(docs, spliter, vector_db) + return gr.update(value=get_files()), res + + global u_files_selected_row + u_files_selected_row = None + + def select_file(data, evt: gr.SelectData): + if not evt.selected or len(evt.index) == 0: + return "No file selected" + global u_files_selected_row + row_index = evt.index[0] + u_files_selected_row = data.iloc[row_index] + file_name, file_id = u_files_selected_row + return f"File Name: {file_name}\nFile ID: {file_id}" + + def deselect_file(): + global u_files_selected_row + u_files_selected_row = None + return gr.update(value=get_files()), "Selection cleared" + + def delete_file(): + global u_files_selected_row + if u_files_selected_row is None: + res = "Please select a file first." + else: + file_name, file_id = u_files_selected_row + u_files_selected_row = None + res = cli.delete_file(file_id) + return gr.update(value=get_files()), res + + with gr.Tab("Chatbot"): + with gr.Row(): + with gr.Column(scale=1): + docs = gr.File( + label="Step 1: Load text files", + file_count="multiple", + file_types=[ + ".csv", + ".doc", + ".docx", + ".enex", + ".epub", + ".html", + ".md", + ".odt", + ".pdf", + ".ppt", + ".pptx", + ".txt", + ], + ) + retriever_argument = gr.Accordion("Vector Store Configuration", open=False) + with retriever_argument: + spliter = gr.Dropdown( + ["Character", "RecursiveCharacter", "Markdown", "Chinese"], + value=cfg.splitter_name, + label="Text Spliter", + info="Method used to split the documents", + multiselect=False, + ) + + vector_db = gr.Dropdown( + ["FAISS", "Chroma"], + value=cfg.vector_db, + label="Vector Stores", + info="Stores embedded data and performs vector search.", + multiselect=False, + ) + load_docs = gr.Button("Upload files") + + u_files_status = gr.Textbox(label="File Processing Status", value="", interactive=False) + u_files = gr.Dataframe( + headers=["Loaded File Name", "File ID"], + value=get_files, + label="Loaded Files", + show_label=False, + interactive=False, + every=5, + ) + + with gr.Accordion("Delete File", open=False): + selected_files = gr.Textbox(label="Click file to select", value="", interactive=False) + with gr.Row(): + with gr.Column(): + delete_button = gr.Button("Delete Selected File") + with gr.Column(): + deselect_button = gr.Button("Clear Selection") + + do_rag = gr.Checkbox( + value=True, + label="RAG is ON", + interactive=True, + info="Whether to do RAG for generation", + ) + with gr.Accordion("Generation Configuration", open=False): + with gr.Row(): + with gr.Column(): + with gr.Row(): + temperature = gr.Slider( + label="Temperature", + value=0.1, + minimum=0.0, + maximum=1.0, + step=0.1, + interactive=True, + info="Higher values produce more diverse outputs", + ) + with gr.Column(): + with gr.Row(): + top_p = gr.Slider( + label="Top-p (nucleus sampling)", + value=1.0, + minimum=0.0, + maximum=1, + step=0.01, + interactive=True, + info=( + "Sample from the smallest possible set of tokens whose cumulative probability " + "exceeds top_p. Set to 1 to disable and sample from all tokens." + ), + ) + with gr.Column(): + with gr.Row(): + top_k = gr.Slider( + label="Top-k", + value=50, + minimum=0.0, + maximum=200, + step=1, + interactive=True, + info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.", + ) + with gr.Column(): + with gr.Row(): + repetition_penalty = gr.Slider( + label="Repetition Penalty", + value=1.1, + minimum=1.0, + maximum=2.0, + step=0.1, + interactive=True, + info="Penalize repetition — 1.0 to disable.", + ) + with gr.Column(scale=4): + chatbot = gr.Chatbot( + height=600, + label="Step 2: Input Query", + show_copy_button=True, + ) + with gr.Row(): + with gr.Column(): + msg = gr.Textbox( + label="QA Message Box", + placeholder="Chat Message Box", + show_label=False, + container=False, + ) + with gr.Column(): + with gr.Row(): + submit = gr.Button("Submit") + stop = gr.Button("Stop") + clear = gr.Button("Clear") + retriever_argument = gr.Accordion("Retriever Configuration", open=True) + with retriever_argument: + with gr.Row(): + with gr.Row(): + do_rerank = gr.Checkbox( + value=True, + label="Rerank searching result", + interactive=True, + ) + hide_context = gr.Checkbox( + value=True, + label="Hide searching result in prompt", + interactive=True, + ) + with gr.Row(): + search_method = gr.Dropdown( + ["similarity_score_threshold", "similarity", "mmr"], + value=cfg.search_method, + label="Searching Method", + info="Method used to search vector store", + multiselect=False, + interactive=True, + ) + with gr.Row(): + score_threshold = gr.Slider( + 0.01, + 0.99, + value=cfg.score_threshold, + step=0.01, + label="Similarity Threshold", + info="Only working for 'similarity score threshold' method", + interactive=True, + ) + with gr.Row(): + vector_rerank_top_n = gr.Slider( + 1, + 10, + value=cfg.k_rerank, + step=1, + label="Rerank top n", + info="Number of rerank results", + interactive=True, + ) + load_docs.click( + create_vectordb, + inputs=[ + docs, + spliter, + vector_db, + ], + outputs=[u_files, u_files_status], + queue=True, + ) + # TODO: Need to de-select the dataframe, + # otherwise every time the dataframe is updated, a select event is triggered + u_files.select(select_file, inputs=[u_files], outputs=selected_files, queue=True) + + delete_button.click( + delete_file, + outputs=[u_files, u_files_status], + queue=True, + ) + deselect_button.click( + deselect_file, + outputs=[u_files, selected_files], + queue=True, + ) + + submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, + [ + chatbot, + temperature, + top_p, + top_k, + repetition_penalty, + hide_context, + do_rag, + docs, + spliter, + vector_db, + u_chunk_size, + u_chunk_overlap, + u_vector_search_top_k, + vector_rerank_top_n, + do_rerank, + search_method, + score_threshold, + ], + chatbot, + queue=True, + ) + submit_click_event = submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( + bot, + [ + chatbot, + temperature, + top_p, + top_k, + repetition_penalty, + hide_context, + do_rag, + docs, + spliter, + vector_db, + u_chunk_size, + u_chunk_overlap, + u_vector_search_top_k, + vector_rerank_top_n, + do_rerank, + search_method, + score_threshold, + ], + chatbot, + queue=True, + ) + # stop.click( + # fn=request_cancel, + # inputs=None, + # outputs=None, + # cancels=[submit_event, submit_click_event], + # queue=False, + # ) + clear.click(lambda: None, None, chatbot, queue=False) + return demo + + +def main(): + # Create the parser + parser = argparse.ArgumentParser(description="Load Embedding and LLM Models with OpenVino.") + # Add the arguments + parser.add_argument("--prompt_template", type=str, required=False, help="User specific template") + # parser.add_argument("--server_name", type=str, default="0.0.0.0") + # parser.add_argument("--server_port", type=int, default=8082) + parser.add_argument("--config", type=str, default="./default.yaml", help="configuration file path") + parser.add_argument("--share", action="store_true", help="share model") + parser.add_argument("--debug", action="store_true", help="enable debugging") + + # Execute the parse_args() method to collect command line arguments + args = parser.parse_args() + logger.info(args) + cfg = OmegaConf.load(args.config) + init_cfg_(cfg) + logger.info(cfg) + + demo = build_demo(cfg, args) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # if you have any issue to launch on your platform, you can pass share=True to launch method: + # demo.launch(share=True) + # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ + # demo.launch(share=True) + demo.queue().launch( + server_name=UI_SERVICE_HOST_IP, server_port=UI_SERVICE_PORT, share=args.share, allowed_paths=["."] + ) + + # %% + # please run this cell for stopping gradio interface + demo.close() + + +def init_cfg_(cfg): + if "name" not in cfg: + cfg.name = "default" + if "embedding_device" not in cfg: + cfg.embedding_device = "CPU" + if "rerank_device" not in cfg: + cfg.rerank_device = "CPU" + if "llm_device" not in cfg: + cfg.llm_device = "CPU" + if "model_language" not in cfg: + cfg.model_language = "Chinese" + if "vector_db" not in cfg: + cfg.vector_db = "FAISS" + if "splitter_name" not in cfg: + cfg.splitter_name = "RecursiveCharacter" # or "Chinese" + if "search_method" not in cfg: + cfg.search_method = "similarity" + if "score_threshold" not in cfg: + cfg.score_threshold = 0.5 + + +if __name__ == "__main__": + main() diff --git a/EdgeCraftRAG/ui/gradio/platform_config.py b/EdgeCraftRAG/ui/gradio/platform_config.py new file mode 100644 index 000000000..852409c1c --- /dev/null +++ b/EdgeCraftRAG/ui/gradio/platform_config.py @@ -0,0 +1,114 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys +from enum import Enum + +import openvino.runtime as ov +from config import SUPPORTED_EMBEDDING_MODELS, SUPPORTED_LLM_MODELS, SUPPORTED_RERANK_MODELS + +sys.path.append("..") +from edgecraftrag.base import GeneratorType, IndexerType, NodeParserType, PostProcessorType, RetrieverType + + +def _get_llm_model_ids(supported_models, model_language=None): + if model_language is None: + model_ids = [model_id for model_id, _ in supported_models.items()] + return model_ids + + if model_language not in supported_models: + print("Invalid model language! Please choose from the available options.") + return None + + # Create a list of model IDs based on the selected language + llm_model_ids = [ + model_id + for model_id, model_config in supported_models[model_language].items() + if model_config.get("rag_prompt_template") or model_config.get("normalize_embeddings") + ] + + return llm_model_ids + + +def _list_subdirectories(parent_directory): + """List all subdirectories under the given parent directory using os.listdir. + + Parameters: + parent_directory (str): The path to the parent directory from which to list subdirectories. + + Returns: + list: A list of subdirectory names found in the parent directory. + """ + # Get a list of all entries in the parent directory + entries = os.listdir(parent_directory) + + # Filter out the entries to only keep directories + subdirectories = [entry for entry in entries if os.path.isdir(os.path.join(parent_directory, entry))] + + return sorted(subdirectories) + + +def _get_available_models(model_ids, local_dirs): + """Filters and sorts model IDs based on their presence in the local directories. + + Parameters: + model_ids (list): A list of model IDs to check. + local_dirs (list): A list of local directory names to check against. + + Returns: + list: A sorted list of available model IDs. + """ + # Filter model_ids for those that are present in local directories + return sorted([model_id for model_id in model_ids if model_id in local_dirs]) + + +def get_local_available_models(model_type: str, local_path: str = "./"): + local_dirs = _list_subdirectories(local_path) + if model_type == "llm": + model_ids = _get_llm_model_ids(SUPPORTED_LLM_MODELS, "Chinese") + elif model_type == "embed": + model_ids = _get_llm_model_ids(SUPPORTED_EMBEDDING_MODELS, "Chinese") + elif model_type == "rerank": + model_ids = _get_llm_model_ids(SUPPORTED_RERANK_MODELS) + else: + print("Unknown model type") + avail_models = _get_available_models(model_ids, local_dirs) + return avail_models + + +def get_available_devices(): + core = ov.Core() + avail_devices = core.available_devices + ["AUTO"] + if "NPU" in avail_devices: + avail_devices.remove("NPU") + return avail_devices + + +def get_available_weights(): + avail_weights_compression = ["FP16", "INT8", "INT4"] + return avail_weights_compression + + +def get_enum_values(c: Enum): + return [v.value for k, v in vars(c).items() if not callable(v) and not k.startswith("__") and not k.startswith("_")] + + +def get_available_node_parsers(): + return get_enum_values(NodeParserType) + + +def get_available_indexers(): + return get_enum_values(IndexerType) + + +def get_available_retrievers(): + return get_enum_values(RetrieverType) + + +def get_available_postprocessors(): + return get_enum_values(PostProcessorType) + + +def get_available_generators(): + return get_enum_values(GeneratorType) diff --git a/FaqGen/docker_compose/intel/cpu/xeon/README.md b/FaqGen/docker_compose/intel/cpu/xeon/README.md index 04fea0f85..c512621b0 100644 --- a/FaqGen/docker_compose/intel/cpu/xeon/README.md +++ b/FaqGen/docker_compose/intel/cpu/xeon/README.md @@ -114,9 +114,11 @@ docker compose up -d 3. MegaService ```bash - curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - }' + curl http://${host_ip}:8888/v1/faqgen \ + -H "Content-Type: multipart/form-data" \ + -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ + -F "max_tokens=32" \ + -F "stream=false" ``` Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service. diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/README.md b/FaqGen/docker_compose/intel/hpu/gaudi/README.md index acdded9c2..548a94e16 100644 --- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md +++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md @@ -28,7 +28,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op ```bash git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/FaqGen/docker/ +cd GenAIExamples/FaqGen/ docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . ``` @@ -37,7 +37,7 @@ docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_pro Construct the frontend Docker image using the command below: ```bash -cd GenAIExamples/FaqGen/ +cd GenAIExamples/FaqGen/ui docker build -t opea/faqgen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . ``` @@ -115,9 +115,11 @@ docker compose up -d 3. MegaService ```bash - curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - }' + curl http://${host_ip}:8888/v1/faqgen \ + -H "Content-Type: multipart/form-data" \ + -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ + -F "max_tokens=32" \ + -F "stream=false" ``` ## 🚀 Launch the UI diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh index a58339780..161c1e2a7 100644 --- a/FaqGen/tests/test_compose_on_gaudi.sh +++ b/FaqGen/tests/test_compose_on_gaudi.sh @@ -101,13 +101,30 @@ function validate_microservices() { } function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/faqgen" \ - "Text Embeddings Inference" \ - "mega-faqgen" \ - "faqgen-gaudi-backend-server" \ - '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + local SERVICE_NAME="mega-faqgen" + local DOCKER_NAME="faqgen-gaudi-backend-server" + local EXPECTED_RESULT="Embeddings" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${ip_address}:8888/v1/faqgen" + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s } function validate_frontend() { @@ -152,7 +169,7 @@ function main() { validate_microservices validate_megaservice - validate_frontend + # validate_frontend stop_docker echo y | docker system prune diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh index c6265e02d..e9ed4bf1e 100755 --- a/FaqGen/tests/test_compose_on_xeon.sh +++ b/FaqGen/tests/test_compose_on_xeon.sh @@ -101,13 +101,30 @@ function validate_microservices() { } function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/faqgen" \ - "Text Embeddings Inference" \ - "mega-faqgen" \ - "faqgen-xeon-backend-server" \ - '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + local SERVICE_NAME="mega-faqgen" + local DOCKER_NAME="faqgen-xeon-backend-server" + local EXPECTED_RESULT="Embeddings" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${ip_address}:8888/v1/faqgen" + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s } function validate_frontend() { @@ -152,7 +169,7 @@ function main() { validate_microservices validate_megaservice - validate_frontend + # validate_frontend stop_docker echo y | docker system prune diff --git a/FaqGen/ui/svelte/.env b/FaqGen/ui/svelte/.env index bfdca1c9a..4d0880c76 100644 --- a/FaqGen/ui/svelte/.env +++ b/FaqGen/ui/svelte/.env @@ -1 +1 @@ -DOC_BASE_URL = 'http://backend_address:8888/v1/faqgen' +FAQ_BASE_URL = 'http://backend_address:8888/v1/faqgen' diff --git a/FaqGen/ui/svelte/src/lib/doc.svelte b/FaqGen/ui/svelte/src/lib/doc.svelte index bae896ba3..f9ea33584 100644 --- a/FaqGen/ui/svelte/src/lib/doc.svelte +++ b/FaqGen/ui/svelte/src/lib/doc.svelte @@ -38,8 +38,8 @@ } else { currentIdx = index; if ( - (currentIdx === 1 && message !== "") || - (currentIdx === 2 && $kb_id !== "") + (currentIdx === 2 && message !== "") || + (currentIdx === 1 && $kb_id !== "") ) { formModal = true; } else { @@ -49,10 +49,10 @@ } function panelExchange() { - if (currentIdx === 2) { + if (currentIdx === 1) { kb_id.set(""); dispatch("clearMsg", { status: true }); - } else if (currentIdx === 1) { + } else if (currentIdx === 2) { message = ""; dispatch("clearMsg", { status: true }); } @@ -152,7 +152,7 @@ type="submit" data-testid="sum-click" class="xl:my-12 inline-flex items-center px-5 py-2.5 text-sm font-medium text-center text-white bg-blue-700 mt-2 focus:ring-4 focus:ring-blue-200 dark:focus:ring-blue-900 hover:bg-blue-800" - on:click={() => generateFaq()} + on:click={() => generateFaq()} > Generate FAQs @@ -165,11 +165,11 @@ /> {#if currentIdx === 1}

- The current content will be cleared. + The currently uploaded file will be cleared.

{:else if currentIdx === 2}

- The currently uploaded file will be cleared. + The current content will be cleared.

{/if} diff --git a/FaqGen/ui/svelte/src/lib/dropFile.svelte b/FaqGen/ui/svelte/src/lib/dropFile.svelte index ef52ca1d0..fcc972c54 100644 --- a/FaqGen/ui/svelte/src/lib/dropFile.svelte +++ b/FaqGen/ui/svelte/src/lib/dropFile.svelte @@ -15,30 +15,36 @@ -->