diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..972b2a181 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,9 @@ +/comps/asr/ sihan.chen@intel.com +/comps/cores/ liang1.lv@intel.com +/comps/dataprep/ xinyu.ye@intel.com +/comps/embeddings/ xuhui.ren@intel.com +/comps/guardrails/ letong.han@intel.com +/comps/llms/ liang1.lv@intel.com +/comps/reranks/ xuhui.ren@intel.com +/comps/retrievers/ xuhui.ren@intel.com +/comps/tts/ sihan.chen@intel.com diff --git a/.github/workflows/reuse-get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml similarity index 98% rename from .github/workflows/reuse-get-test-matrix.yml rename to .github/workflows/_get-test-matrix.yml index 6860f8e9b..09be978cb 100644 --- a/.github/workflows/reuse-get-test-matrix.yml +++ b/.github/workflows/_get-test-matrix.yml @@ -45,7 +45,7 @@ jobs: merged_commit=$(git log -1 --format='%H') changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | \ - grep 'comps/' | grep -vE '*.md|*.txt|comps/cores')" || true + grep 'comps/' | grep -vE '*.md|comps/cores')" || true services=$(printf '%s\n' "${changed_files[@]}" | cut -d'/' -f2 | grep -vE '*.py' | sort -u) || true run_matrix="{\"include\":[" for service in ${services}; do diff --git a/.github/workflows/container-build.yml b/.github/workflows/container-build.yml deleted file mode 100644 index b3c2f7bc6..000000000 --- a/.github/workflows/container-build.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: Container Build -permissions: read-all -on: - workflow_dispatch: -jobs: - # https://github.com/intel/ai-containers/blob/main/.github/action.yml - build-containers: - runs-on: docker - env: - REGISTRY: ${{ secrets.REGISTRY }} - REPO: ${{ secrets.REPO }} - steps: - - uses: step-security/harden-runner@v2 - with: - egress-policy: audit - - uses: actions/checkout@v4 - - uses: actions/checkout@v4 - with: - path: tei-gaudi - ref: habana-main - repository: huggingface/tei-gaudi - token: ${{ secrets.ACTION_TOKEN }} - - uses: docker/login-action@v3 - with: - registry: ${{ secrets.REGISTRY }} - username: ${{ secrets.REGISTRY_USER }} - password: ${{ secrets.REGISTRY_TOKEN }} - - name: Build Containers - run: | - docker compose -p ${GITHUB_RUN_NUMBER} build --no-cache - working-directory: .github/workflows/docker - - name: Print Containers to Summary - run: | - docker compose -p ${GITHUB_RUN_NUMBER} images --format json | jq -r --arg registry "$REGISTRY" '.[] | select(.Repository | contains($registry)) | .Tag' >> $GITHUB_STEP_SUMMARY - - name: Push Containers - run: | - docker compose -p ${GITHUB_RUN_NUMBER} push - working-directory: .github/workflows/docker - - name: Un-Tag Containers - run: | - docker compose -p ${GITHUB_RUN_NUMBER} down --rmi all - working-directory: .github/workflows/docker - - name: Remove Containers - if: always() - run: docker system prune --all --force diff --git a/.github/workflows/docker/code-scan.dockerfile b/.github/workflows/docker/code-scan.dockerfile index 786ec3ad8..129e146b0 100644 --- a/.github/workflows/docker/code-scan.dockerfile +++ b/.github/workflows/docker/code-scan.dockerfile @@ -4,7 +4,7 @@ ARG UBUNTU_VER=22.04 FROM ubuntu:${UBUNTU_VER} as devel -ENV LANG C.UTF-8 +ENV LANG=C.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ aspell \ diff --git a/.github/workflows/docker/compose/asr-compose.yaml b/.github/workflows/docker/compose/asr-compose.yaml new file mode 100644 index 000000000..b790fa0ed --- /dev/null +++ b/.github/workflows/docker/compose/asr-compose.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: asr,whisper,whisper-gaudi +services: + asr: + build: + dockerfile: comps/asr/Dockerfile + image: ${REGISTRY}opea/asr:${TAG:-latest} + whisper: + build: + dockerfile: comps/asr/whisper/Dockerfile + image: ${REGISTRY}opea/whisper:${TAG:-latest} + whisper-gaudi: + build: + dockerfile: comps/asr/whisper/Dockerfile_hpu + image: ${REGISTRY}opea/whisper-gaudi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml new file mode 100644 index 000000000..5cca84cb4 --- /dev/null +++ b/.github/workflows/docker/compose/dataprep-compose.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: dataprep,dataprep-qdrant +# others: dataprep-redis-llama-index,dataprep-on-ray-redis +services: + dataprep-redis: + build: + dockerfile: comps/dataprep/redis/langchain/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-redis:${TAG:-latest} + dataprep-qdrant: + build: + dockerfile: comps/dataprep/qdrant/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-qdrant:${TAG:-latest} + dataprep-redis-llama-index: + build: + dockerfile: comps/dataprep/redis/llama_index/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-redis-llama-index:${TAG:-latest} + dataprep-on-ray-redis: + build: + dockerfile: comps/dataprep/redis/langchain_ray/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-on-ray-redis:${TAG:-latest} diff --git a/.github/workflows/docker/compose/embeddings-compose.yaml b/.github/workflows/docker/compose/embeddings-compose.yaml new file mode 100644 index 000000000..e442b179b --- /dev/null +++ b/.github/workflows/docker/compose/embeddings-compose.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: embedding-tei +services: + embedding-tei: + build: + dockerfile: comps/embeddings/langchain/docker/Dockerfile + image: ${REGISTRY}opea/embedding-tei:${TAG:-latest} diff --git a/.github/workflows/docker/compose/guardrails-compose.yaml b/.github/workflows/docker/compose/guardrails-compose.yaml new file mode 100644 index 000000000..30592aecb --- /dev/null +++ b/.github/workflows/docker/compose/guardrails-compose.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: guardrails-tgi +# others: guardrails-pii-detection +services: + guardrails-tgi: + build: + dockerfile: comps/guardrails/llama_guard/docker/Dockerfile + image: ${REGISTRY}opea/guardrails-tgi:${TAG:-latest} + guardrails-pii-detection: + build: + dockerfile: comps/guardrails/pii_detection/docker/Dockerfile + image: ${REGISTRY}opea/guardrails-pii-detection:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml new file mode 100644 index 000000000..e722682e8 --- /dev/null +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-hpu,llm-vllm-ray,llm-vllm-ray-hpu +services: + llm-tgi: + build: + dockerfile: comps/llms/text-generation/tgi/Dockerfile + image: ${REGISTRY}opea/llm-tgi:${TAG:-latest} + llm-ollama: + build: + dockerfile: comps/llms/text-generation/ollama/Dockerfile + image: ${REGISTRY}opea/llm-ollama:${TAG:-latest} + llm-docsum-tgi: + build: + dockerfile: comps/llms/summarization/tgi/Dockerfile + image: ${REGISTRY}opea/llm-docsum-tgi:${TAG:-latest} + llm-faqgen-tgi: + build: + dockerfile: comps/llms/faq-generation/tgi/Dockerfile + image: ${REGISTRY}opea/llm-faqgen-tgi:${TAG:-latest} + llm-vllm: + build: + dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.microservice + image: ${REGISTRY}opea/llm-vllm:${TAG:-latest} + llm-vllm-hpu: + build: + dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.hpu + image: ${REGISTRY}opea/llm-vllm-hpu:${TAG:-latest} + llm-vllm-ray: + build: + dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice + image: ${REGISTRY}opea/llm-vllm-ray:${TAG:-latest} + llm-vllm-ray-hpu: + build: + dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray + image: ${REGISTRY}opea/llm-vllm-ray-hpu:${TAG:-latest} diff --git a/.github/workflows/docker/compose/reranks-compose.yaml b/.github/workflows/docker/compose/reranks-compose.yaml new file mode 100644 index 000000000..e3743d652 --- /dev/null +++ b/.github/workflows/docker/compose/reranks-compose.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: reranking-tei +services: + reranking-tei: + build: + dockerfile: comps/reranks/tei/docker/Dockerfile + image: ${REGISTRY}opea/reranking-tei:${TAG:-latest} diff --git a/.github/workflows/docker/compose/retrievers-compose.yaml b/.github/workflows/docker/compose/retrievers-compose.yaml new file mode 100644 index 000000000..d9de4b27f --- /dev/null +++ b/.github/workflows/docker/compose/retrievers-compose.yaml @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + retriever-redis: + build: + dockerfile: comps/retrievers/langchain/redis/docker/Dockerfile + image: ${REGISTRY}opea/retriever-redis:${TAG:-latest} + retriever-qdrant: + build: + dockerfile: comps/retrievers/haystack/qdrant/docker/Dockerfile + image: ${REGISTRY}opea/retriever-qdrant:${TAG:-latest} diff --git a/.github/workflows/docker/compose/tts-compose.yaml b/.github/workflows/docker/compose/tts-compose.yaml new file mode 100644 index 000000000..dd7766345 --- /dev/null +++ b/.github/workflows/docker/compose/tts-compose.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: reranking-tei +services: + asr: + build: + dockerfile: comps/tts/Dockerfile + image: ${REGISTRY}opea/asr:${TAG:-latest} + speecht5: + build: + dockerfile: comps/tts/speecht5/Dockerfile + image: ${REGISTRY}opea/speecht5:${TAG:-latest} + speecht5-gaudi: + build: + dockerfile: comps/tts/speecht5/Dockerfile_hpu + image: ${REGISTRY}opea/speecht5-gaudi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/web_retrievers-compose.yaml b/.github/workflows/docker/compose/web_retrievers-compose.yaml new file mode 100644 index 000000000..ae3de3571 --- /dev/null +++ b/.github/workflows/docker/compose/web_retrievers-compose.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +# images used by GenAIExamples: web-retriever-chroma +services: + web-retriever-chroma: + build: + dockerfile: comps/web_retrievers/langchain/chroma/docker/Dockerfile + image: ${REGISTRY}opea/web-retriever-chroma:${TAG:-latest} diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml deleted file mode 100644 index 4a0b165a7..000000000 --- a/.github/workflows/docker/docker-compose.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - embedding-tei-server: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../../.. - dockerfile: comps/embeddings/langchain/docker/Dockerfile - image: ${REGISTRY}/${REPO}:embedding-tei-server - pull_policy: always - retriever-redis-server: - build: - dockerfile: comps/retrievers/langchain/redis/docker/Dockerfile - extends: embedding-tei-server - image: ${REGISTRY}/${REPO}:retriever-redis-server - reranking-tei-server: - build: - dockerfile: comps/reranks/langchain/docker/Dockerfile - extends: embedding-tei-server - image: ${REGISTRY}/${REPO}:reranking-tei-server - llm-tgi-server: - build: - dockerfile: comps/llms/langchain/docker/Dockerfile - extends: embedding-tei-server - image: ${REGISTRY}/${REPO}:llm-tgi-server - dataprep-redis-server: - build: - dockerfile: comps/dataprep/redis/docker/Dockerfile - extends: embedding-tei-server - image: ${REGISTRY}/${REPO}:dataprep-redis-server - tei-gaudi: - build: - context: ../../../tei-gaudi - dockerfile: Dockerfile-hpu - extends: embedding-tei-server - image: ${REGISTRY}/${REPO}:tei-gaudi diff --git a/.github/workflows/docker/ut.dockerfile b/.github/workflows/docker/ut.dockerfile index 328984ff5..1453b1693 100644 --- a/.github/workflows/docker/ut.dockerfile +++ b/.github/workflows/docker/ut.dockerfile @@ -4,7 +4,7 @@ ARG UBUNTU_VER=22.04 FROM ubuntu:${UBUNTU_VER} as devel -ENV LANG C.UTF-8 +ENV LANG=C.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ aspell \ diff --git a/.github/workflows/image-build-on-push.yml b/.github/workflows/image-build-on-push.yml deleted file mode 100644 index 44010fdb4..000000000 --- a/.github/workflows/image-build-on-push.yml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Test -name: Build latest images on push event - -on: - push: - branches: ["main"] - paths: - - comps/** - - "!**.md" - - "!**.txt" - - .github/workflows/image-build-on-push.yml - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-on-push - cancel-in-progress: true - -jobs: - job1: - uses: ./.github/workflows/reuse-get-test-matrix.yml - - image-build: - needs: job1 - strategy: - matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }} - uses: ./.github/workflows/reuse-image-build.yml - with: - micro_service: "${{ matrix.service }}" diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml new file mode 100644 index 000000000..4d1d01eee --- /dev/null +++ b/.github/workflows/manual-image-build.yml @@ -0,0 +1,67 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Build latest images on manual event + +on: + workflow_dispatch: + inputs: + services: + default: "asr,dataprep" + description: "List of services to build including [asr,dataprep,embeddings,guardrails,llms,reranks,retrievers,tts,web_retrievers]" + required: true + type: string + tag: + default: "latest" + description: "Tag to apply to images" + required: true + type: string + nodes: + default: "docker-build-xeon,docker-build-gaudi" + description: "List of nodes to run the build including [docker-build-xeon,docker-build-gaudi]" + required: true + type: string + +jobs: + get-build-matrix: + runs-on: ubuntu-latest + outputs: + services: ${{ steps.get-services.outputs.services }} + nodes: ${{ steps.get-services.outputs.nodes }} + steps: + - name: Get test Services + id: get-services + run: | + set -x + service_list=($(echo ${{ github.event.inputs.services }} | tr ',' ' ')) + services=$(printf '%s\n' "${service_list[@]}" | sort -u | jq -R '.' | jq -sc '.') + echo "services=$services" >> $GITHUB_OUTPUT + node_list=($(echo ${{ github.event.inputs.nodes }} | tr ',' ' ')) + nodes=$(printf '%s\n' "${node_list[@]}" | sort -u | jq -R '.' | jq -sc '.') + echo "nodes=$nodes" >> $GITHUB_OUTPUT + + image-build: + needs: get-build-matrix + strategy: + matrix: + service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} + node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }} + runs-on: ${{ matrix.node }} + continue-on-error: true + steps: + - name: Clean Up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Build image + env: + service: ${{ matrix.service }} + uses: opea-project/validation/actions/image-build@main + with: + work_dir: ${{ github.workspace }} + docker_compose_path: ${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml + registry: ${OPEA_IMAGE_REPO} + tag: ${{ github.event.inputs.tag }} diff --git a/.github/workflows/code-scan.yml b/.github/workflows/mix-code-scan.yml similarity index 100% rename from .github/workflows/code-scan.yml rename to .github/workflows/mix-code-scan.yml diff --git a/.github/workflows/megaservice-test.yml b/.github/workflows/mix-megaservice-test.yml similarity index 97% rename from .github/workflows/megaservice-test.yml rename to .github/workflows/mix-megaservice-test.yml index 291af0276..83c826cd7 100644 --- a/.github/workflows/megaservice-test.yml +++ b/.github/workflows/mix-megaservice-test.yml @@ -10,7 +10,7 @@ on: branches: [main] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - - .github/workflows/megaservice-test.yml + - .github/workflows/mix-megaservice-test.yml - comps/cores/** - requirements.txt - setup.py @@ -31,7 +31,7 @@ env: jobs: MegaService: - runs-on: aise-cluster + runs-on: gaudi steps: - name: Clean Up Working Directory run: | diff --git a/.github/workflows/trellix.yml b/.github/workflows/mix-trellix.yml similarity index 100% rename from .github/workflows/trellix.yml rename to .github/workflows/mix-trellix.yml diff --git a/.github/workflows/pr-dockerfile-path-scan.yaml b/.github/workflows/pr-dockerfile-path-scan.yaml new file mode 100644 index 000000000..0595e4bcb --- /dev/null +++ b/.github/workflows/pr-dockerfile-path-scan.yaml @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: File Change Warning + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + Dockerfile-path-change-detection-in-GenAIComps: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check for changed Dockerfile paths in yaml + run: | + set -xe + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + if grep -q "$file" .github/workflows/docker/compose/*.yaml; then + is_use="TRUE" + used_files+="$file " + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding yaml in GenAIComps/.github/workflows/docker/compose and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi + + - name: Check for changed Dockerfile paths in readme + run: | + set -xe + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + if grep -q "$file" ./**/*.md; then + is_use="TRUE" + used_files+="$file " + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding README in GenAIComps and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi + + Dockerfile-path-change-detection-in-GenAIExamples: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Clone repo GenAIExamples + run: | + cd .. + git clone https://github.com/opea-project/GenAIExamples + + - name: Check for changed Dockerfile paths + run: | + set -xe + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + if grep -q "$file" ../GenAIExamples/**/*.md; then + is_use="TRUE" + used_files+="$file " + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding README in GenAIExamples repo and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi diff --git a/.github/workflows/pr-examples-test.yml b/.github/workflows/pr-examples-test.yml new file mode 100644 index 000000000..92354c5c2 --- /dev/null +++ b/.github/workflows/pr-examples-test.yml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Example-test + +on: + pull_request_target: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - .github/workflows/pr-examples-test.yml + - comps/cores/** + - comps/embeddings/langchain/** + - comps/retrievers/langchain/redis/** + - comps/reranks/tei/** + - comps/llms/text-generation/tgi/** + - comps/dataprep/redis/langchain/** + - requirements.txt + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + example-test: + runs-on: gaudi-01-3 + steps: + - name: Clean Up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* || true + echo y | docker system prune + docker rmi $(docker images --filter reference="*/*:comps" -q) || true + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.number }}/merge" + + - name: Run ChatQnA + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + run: | + git clone https://github.com/opea-project/GenAIExamples.git + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi + sed -i "s#:latest#:comps#g" compose.yaml + cat compose.yaml + + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/tests + GenAIComps_dir=${{github.workspace}} + sed -i '/GenAIComps.git/d' test_chatqna_on_gaudi.sh + sed -i "s#cd GenAIComps#cd ${GenAIComps_dir}#g" test_chatqna_on_gaudi.sh + sed -i "s#docker build -t#docker build --no-cache -q -t#g" test_chatqna_on_gaudi.sh + sed -i "s#:latest#:comps#g" test_chatqna_on_gaudi.sh + cat test_chatqna_on_gaudi.sh + + echo "Run test..." + timeout 50m bash test_chatqna_on_gaudi.sh + + - name: Clean up container + if: cancelled() || failure() + run: | + cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi + docker compose stop && docker compose rm -f + docker system prune -f + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: "Examples-Test-Logs" + path: ${{ github.workspace }}/GenAIExamples/ChatQnA/tests/*.log diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/pr-microservice-test.yml similarity index 84% rename from .github/workflows/microservice-test.yml rename to .github/workflows/pr-microservice-test.yml index d8000b93d..8dd5e6e6c 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -11,8 +11,7 @@ on: - comps/** - tests/** - "!**.md" - - "!**.txt" - - .github/workflows/microservice-test.yml + - .github/workflows/pr-microservice-test.yml # If there is a new commit, the previous jobs will be canceled concurrency: @@ -21,7 +20,7 @@ concurrency: jobs: job1: - uses: ./.github/workflows/reuse-get-test-matrix.yml + uses: ./.github/workflows/_get-test-matrix.yml Microservice-test: needs: job1 @@ -31,7 +30,10 @@ jobs: continue-on-error: true steps: - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* + run: | + sudo rm -rf ${{github.workspace}}/* + docker system prune -f + docker rmi $(docker images --filter reference="*/*:comps" -q) || true - name: Checkout out Repo uses: actions/checkout@v4 @@ -43,6 +45,7 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + PINECONE_KEY: ${{ secrets.PINECONE_KEY }} service_path: ${{ matrix.service }} hardware: ${{ matrix.hardware }} run: | @@ -56,7 +59,7 @@ jobs: run: | cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - echo y | docker system prune --all + docker system prune -f - name: Publish pipeline artifact if: ${{ !cancelled() }} diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml new file mode 100644 index 000000000..5472111dd --- /dev/null +++ b/.github/workflows/push-image-build.yml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# Test +name: Build latest images on push event + +on: + push: + branches: ["main"] + paths: + - comps/** + - "!**.md" + - "!**.txt" + - .github/workflows/push-image-build.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-on-push + cancel-in-progress: true + +jobs: + get-build-matrix: + runs-on: ubuntu-latest + outputs: + services: ${{ steps.get-services.outputs.services }} + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get Test Services + id: get-services + run: | + base_commit=$(git rev-parse HEAD~1) + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | grep 'comps/' | grep -vE '*.md|*.txt|comps/cores')" || true + services=$(printf '%s\n' "${changed_files[@]}" | cut -d'/' -f2 | grep -vE '*.py' | sort -u | jq -R '.' | jq -sc '.') || true + echo "services=$services" + echo "services=$services" >> $GITHUB_OUTPUT + + image-build: + needs: get-build-matrix + strategy: + matrix: + service: ${{ fromJSON(needs.get-build-matrix.outputs.services) }} + node: [docker-build-xeon, docker-build-gaudi] + runs-on: ${{ matrix.node }} + continue-on-error: true + steps: + - name: Clean up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Check Docker Compose File Exists + env: + service: ${{ matrix.service }} + run: | + docker_compose_path="${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml" + if [ -e $docker_compose_path ]; then + echo "file_exists=true" >> $GITHUB_ENV + echo "docker_compose_path=${docker_compose_path}" >> $GITHUB_ENV + else + echo "file_exists=false" >> $GITHUB_ENV + echo "docker_compose_path=${docker_compose_path} for this service does not exist, so skipping image build for this service!!!" + fi + + - name: Build Image + if: env.file_exists == 'true' + uses: opea-project/validation/actions/image-build@main + with: + work_dir: ${{ github.workspace }} + docker_compose_path: ${{ env.docker_compose_path }} + registry: ${OPEA_IMAGE_REPO} diff --git a/.github/workflows/reuse-image-build.yml b/.github/workflows/reuse-image-build.yml deleted file mode 100644 index e2ed6883b..000000000 --- a/.github/workflows/reuse-image-build.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: Image Build -permissions: read-all -on: - workflow_call: - inputs: - micro_service: - required: true - type: string - -jobs: - micro-image-build: - continue-on-error: true - strategy: - matrix: - node: [docker-build-xeon, docker-build-gaudi] - runs-on: ${{ matrix.node }} - steps: - - name: Checkout out Repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Building MicroService Docker Image - id: build-microservice-image - env: - micro_service: ${{ inputs.micro_service }} - hardware: ${{ matrix.node }} - run: | - bash .github/workflows/scripts/docker_images_build_push.sh ${micro_service} ${hardware} diff --git a/.github/workflows/schedule-image-build.yml b/.github/workflows/schedule-image-build.yml new file mode 100644 index 000000000..6db94a7fc --- /dev/null +++ b/.github/workflows/schedule-image-build.yml @@ -0,0 +1,63 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Build latest images on schedule event + +on: + schedule: + - cron: "0 17 * * *" # 5:00 PM UTC every day, 1:00 AM CST every day + +env: + tag: "latest" + +jobs: + check-build: + runs-on: ubuntu-latest + outputs: + run_build: ${{ steps.get-changes.outputs.run_build }} + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check if build is needed + id: get-changes + run: | + set -x + changed_files=$(git log --since="24 hours ago" --name-only --pretty=format: | grep comps/cores | grep -vE '*.md') || true + if [ -z "$changed_files" ]; then + echo "No changes in the last 24 hours for the 'comps/cores' directory" + echo "run_build=false" >> $GITHUB_OUTPUT + else + echo "Changes detected in the last 24 hours for the 'comps/cores' directory" + echo "run_build=true" >> $GITHUB_OUTPUT + fi + + image-build: + needs: check-build + if: ${{ needs.check-build.outputs.run_build == 'true' }} + strategy: + matrix: + service: + ["asr", "dataprep", "embeddings", "guardrails", "llms", "reranks", "retrievers", "tts", "web_retrievers"] + node: ["docker-build-xeon", "docker-build-gaudi"] + runs-on: ${{ matrix.node }} + continue-on-error: true + steps: + - name: Clean Up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Build image + env: + service: ${{ matrix.service }} + uses: opea-project/validation/actions/image-build@main + with: + work_dir: ${{ github.workspace }} + docker_compose_path: ${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml + registry: ${OPEA_IMAGE_REPO} + tag: ${tag} diff --git a/.github/workflows/scripts/codeScan/bandit.sh b/.github/workflows/scripts/codeScan/bandit.sh index e0f5137d2..aa5aa93a7 100644 --- a/.github/workflows/scripts/codeScan/bandit.sh +++ b/.github/workflows/scripts/codeScan/bandit.sh @@ -6,7 +6,7 @@ source /GenAIComps/.github/workflows/scripts/change_color pip install bandit==1.7.8 log_dir=/GenAIComps/.github/workflows/scripts/codeScan -python -m bandit -r -lll -iii /GenAIComps 2>&1 | tee ${log_dir}/bandit.log +python -m bandit -r -lll -iii /GenAIComps > ${log_dir}/bandit.log exit_code=$? $BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" diff --git a/.github/workflows/scripts/docker_images_build_push.sh b/.github/workflows/scripts/docker_images_build_push.sh deleted file mode 100644 index a2ce94581..000000000 --- a/.github/workflows/scripts/docker_images_build_push.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKSPACE=$PWD -IMAGE_REPO=${IMAGE_REPO:-$OPEA_IMAGE_REPO} -IMAGE_TAG=${IMAGE_TAG:-latest} - -function docker_build() { - # docker_build - IMAGE_NAME=$1 - micro_service=$2 - dockerfile_path=${WORKSPACE}/comps/${micro_service} - if [[ "$IMAGE_NAME" == *"gaudi" ]]; then - dockerfile_name="Dockerfile_hpu" - else - dockerfile_name="Dockerfile" - fi - if [ -f "$dockerfile_path/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/$dockerfile_name" - elif [ -f "$dockerfile_path/docker/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/docker/$dockerfile_name" - else - echo "Dockerfile not found" - exit 1 - fi - echo "Building ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG using Dockerfile $DOCKERFILE_PATH" - - docker build --no-cache -t ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -f $DOCKERFILE_PATH . - docker push ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG - docker rmi ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -} - -micro_service=$1 -hardware=$(echo $2 | cut -d- -f3) -case ${micro_service} in - "asr"|"tts") - IMAGE_NAME="opea/${micro_service}" - ;; - "embeddings/langchain") - IMAGE_NAME="opea/embedding-tei" - ;; - "retrievers/langchain") - IMAGE_NAME="opea/retriever-redis" - ;; - "reranks/tei") - IMAGE_NAME="opea/reranking-tei" - ;; - "llms/text-generation/tgi") - IMAGE_NAME="opea/llm-tgi" - ;; - "dataprep/redis/langchain") - IMAGE_NAME="opea/dataprep-redis" - ;; - "llms/summarization/tgi") - IMAGE_NAME="opea/llm-docsum-tgi" - ;; - "llms/faq-generation/tgi") - IMAGE_NAME="opea/llm-faqgen-tgi" - ;; - "web_retrievers/langchain/chroma") - IMAGE_NAME="opea/web-retriever-chroma" - ;; - "tts/speecht5") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/speecht5-gaudi"; else IMAGE_NAME="opea/speecht5"; fi - ;; - "asr/whisper") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/whisper-gaudi"; else IMAGE_NAME="opea/whisper"; fi - ;; - *) - echo "Not supported yet" - exit 0 - ;; -esac -docker_build "${IMAGE_NAME}" "${micro_service}" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 94b6ef31e..000000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: File Change Warning - -on: - pull_request: - branches: [main] - types: [opened, reopened, ready_for_review, synchronize] - -# If there is a new commit, the previous jobs will be canceled -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - file-change-detection: - runs-on: ubuntu-latest - steps: - - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* - - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Clone repo GenAIExamples - run: | - cd .. - git clone https://github.com/opea-project/GenAIExamples - - - name: Check for changed Dockerfile paths - run: | - shopt -s globstar - cd ${{github.workspace}} - is_use="FALSE" - used_files="" - merged_commit=$(git log -1 --format='%H') - changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile' | cut -f2)" - if [ -n "$changed_files" ]; then - for file in $changed_files; do - if grep -q "$file" ../GenAIExamples/**/*.md; then - is_use="TRUE" - used_files+="$file " - fi - done - fi - - if [[ "$is_use" == "TRUE" ]]; then - echo "Warning: Changed Dockerfile paths:" - echo "$used_files" - echo "Please modify the corresponding README in GenAIExamples repo and ask suyue.chen@intel.com for final confirmation." - exit 1 - fi diff --git a/README.md b/README.md index c40d060d0..8c3af2373 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,7 @@ A `Microservices` can be created by using the decorator `register_microservice`. from langchain_community.embeddings import HuggingFaceHubEmbeddings from langsmith import traceable -from comps import register_microservice, EmbedDoc768, ServiceType, TextDoc +from comps import register_microservice, EmbedDoc, ServiceType, TextDoc @register_microservice( @@ -185,13 +185,12 @@ from comps import register_microservice, EmbedDoc768, ServiceType, TextDoc host="0.0.0.0", port=6000, input_datatype=TextDoc, - output_datatype=EmbedDoc768, + output_datatype=EmbedDoc, ) @traceable(run_type="embedding") -def embedding(input: TextDoc) -> EmbedDoc768: +def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.embed_query(input.text) - embed_vector = embed_vector[:768] # Keep only the first 768 elements - res = EmbedDoc768(text=input.text, embedding=embed_vector) + res = EmbedDoc(text=input.text, embedding=embed_vector) return res ``` diff --git a/comps/__init__.py b/comps/__init__.py index e537f4081..cabff34fe 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -8,8 +8,7 @@ Audio2TextDoc, Base64ByteStrDoc, DocPath, - EmbedDoc768, - EmbedDoc1024, + EmbedDoc, GeneratedDoc, LLMParamsDoc, SearchedDoc, @@ -41,6 +40,7 @@ SearchQnAGateway, AudioQnAGateway, FaqGenGateway, + VisualQnAGateway, ) # Telemetry diff --git a/comps/agent/langchain/docker/Dockerfile b/comps/agent/langchain/docker/Dockerfile index 2540c7bad..9a966b952 100644 --- a/comps/agent/langchain/docker/Dockerfile +++ b/comps/agent/langchain/docker/Dockerfile @@ -3,7 +3,8 @@ FROM python:3.11-slim -ENV LANG C.UTF-8 +ENV LANG=C.UTF-8 +ARG ARCH=cpu RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ @@ -19,8 +20,11 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/agent/langchain/requirements.txt + if [ ${ARCH} = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/agent/langchain/requirements.txt; \ + else \ + pip install --no-cache-dir -r /home/user/comps/agent/langchain/requirements.txt; \ + fi ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/agent/langchain/requirements.txt b/comps/agent/langchain/requirements.txt index 94bff2a3c..16859ac2c 100644 --- a/comps/agent/langchain/requirements.txt +++ b/comps/agent/langchain/requirements.txt @@ -29,6 +29,8 @@ tavily-python transformers transformers[sentencepiece] +uvicorn + # used by document loader # beautifulsoup4 # easyocr diff --git a/comps/asr/Dockerfile b/comps/asr/Dockerfile index cc2740b7d..3a4425437 100644 --- a/comps/asr/Dockerfile +++ b/comps/asr/Dockerfile @@ -3,15 +3,25 @@ FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user + ENV LANG=C.UTF-8 +ARG ARCH=cpu -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr +WORKDIR /home/user/comps/asr ENTRYPOINT ["python", "asr.py"] \ No newline at end of file diff --git a/comps/asr/requirements.txt b/comps/asr/requirements.txt index 1698cbe80..3ebeff787 100644 --- a/comps/asr/requirements.txt +++ b/comps/asr/requirements.txt @@ -11,4 +11,5 @@ pydub shortuuid torch transformers +uvicorn zhconv diff --git a/comps/asr/whisper/Dockerfile b/comps/asr/whisper/Dockerfile index c3e2a0025..57e186a4e 100644 --- a/comps/asr/whisper/Dockerfile +++ b/comps/asr/whisper/Dockerfile @@ -3,21 +3,33 @@ FROM python:3.11-slim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + # Set environment variables ENV LANG=en_US.UTF-8 -ENV PYTHONPATH=/home/user +ARG ARCH=cpu # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ + if [ "${ARCH}" = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper -ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] \ No newline at end of file +ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] diff --git a/comps/asr/whisper/Dockerfile_hpu b/comps/asr/whisper/Dockerfile_hpu index 128b8d5cc..15b2688f0 100644 --- a/comps/asr/whisper/Dockerfile_hpu +++ b/comps/asr/whisper/Dockerfile_hpu @@ -4,23 +4,30 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana +ENV LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/chathistory/mongo/README.md b/comps/chathistory/mongo/README.md index 4a132e9c8..2eaa62e55 100644 --- a/comps/chathistory/mongo/README.md +++ b/comps/chathistory/mongo/README.md @@ -60,7 +60,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -71,7 +71,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -97,7 +97,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6014/v1/chathistory/delete \ + http://${host_ip}:6012/v1/chathistory/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/chathistory/mongo/chathistory_mongo.py b/comps/chathistory/mongo/chathistory_mongo.py index 5b65d1d8e..1993503da 100644 --- a/comps/chathistory/mongo/chathistory_mongo.py +++ b/comps/chathistory/mongo/chathistory_mongo.py @@ -35,7 +35,7 @@ def get_first_string(value): @register_microservice( - name="opea_service@chathistory_mongo_create", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/create", host="0.0.0.0", input_datatype=ChatMessage, @@ -70,11 +70,11 @@ async def create_documents(document: ChatMessage): @register_microservice( - name="opea_service@chathistory_mongo_get", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/get", host="0.0.0.0", input_datatype=ChatId, - port=6013, + port=6012, ) async def get_documents(document: ChatId): """Retrieves documents from the document store based on the provided ChatId. @@ -100,11 +100,11 @@ async def get_documents(document: ChatId): @register_microservice( - name="opea_service@chathistory_mongo_delete", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/delete", host="0.0.0.0", input_datatype=ChatId, - port=6014, + port=6012, ) async def delete_documents(document: ChatId): """Deletes a document from the document store based on the provided ChatId. @@ -130,6 +130,4 @@ async def delete_documents(document: ChatId): if __name__ == "__main__": - opea_microservices["opea_service@chathistory_mongo_get"].start() - opea_microservices["opea_service@chathistory_mongo_create"].start() - opea_microservices["opea_service@chathistory_mongo_delete"].start() + opea_microservices["opea_service@chathistory_mongo"].start() diff --git a/comps/chathistory/mongo/docker/Dockerfile b/comps/chathistory/mongo/docker/Dockerfile index 986aac504..5209af835 100644 --- a/comps/chathistory/mongo/docker/Dockerfile +++ b/comps/chathistory/mongo/docker/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11-slim -ENV LANG C.UTF-8 +ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ diff --git a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml index 97e17e077..e272d4f91 100644 --- a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml +++ b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: chathistory-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} diff --git a/comps/cores/mega/README.md b/comps/cores/mega/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 862205414..6eb069e6e 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -1,8 +1,14 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import base64 +import os +from io import BytesIO + +import requests from fastapi import Request from fastapi.responses import StreamingResponse +from PIL import Image from ..proto.api_protocol import ( AudioChatCompletionRequest, @@ -71,10 +77,13 @@ def list_parameter(self): pass def _handle_message(self, messages): + images = [] if isinstance(messages, str): prompt = messages else: messages_dict = {} + system_prompt = "" + prompt = "" for message in messages: msg_role = message["role"] if msg_role == "system": @@ -84,20 +93,56 @@ def _handle_message(self, messages): text = "" text_list = [item["text"] for item in message["content"] if item["type"] == "text"] text += "\n".join(text_list) - messages_dict[msg_role] = text + image_list = [ + item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url" + ] + if image_list: + messages_dict[msg_role] = (text, image_list) + else: + messages_dict[msg_role] = text else: messages_dict[msg_role] = message["content"] elif msg_role == "assistant": messages_dict[msg_role] = message["content"] else: raise ValueError(f"Unknown role: {msg_role}") - prompt = system_prompt + "\n" + if system_prompt: + prompt = system_prompt + "\n" for role, message in messages_dict.items(): - if message: - prompt += role + ": " + message + "\n" + if isinstance(message, tuple): + text, image_list = message + if text: + prompt += role + ": " + text + "\n" + else: + prompt += role + ":" + for img in image_list: + # URL + if img.startswith("http://") or img.startswith("https://"): + response = requests.get(img) + image = Image.open(BytesIO(response.content)).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Local Path + elif os.path.exists(img): + image = Image.open(img).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Bytes + else: + img_b64_str = img + + images.append(img_b64_str) else: - prompt += role + ":" - return prompt + if message: + prompt += role + ": " + message + "\n" + else: + prompt += role + ":" + if images: + return prompt, images + else: + return prompt class ChatQnAGateway(Gateway): @@ -118,6 +163,7 @@ async def handle_request(self, request: Request): temperature=chat_request.temperature if chat_request.temperature else 0.01, repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, streaming=stream_opt, + chat_template=chat_request.chat_template if chat_request.chat_template else None, ) result_dict, runtime_graph = await self.megaservice.schedule( initial_inputs={"text": prompt}, llm_parameters=parameters @@ -439,3 +485,47 @@ async def handle_request(self, request: Request): ) ) return ChatCompletionResponse(model="faqgen", choices=choices, usage=usage) + + +class VisualQnAGateway(Gateway): + def __init__(self, megaservice, host="0.0.0.0", port=8888): + super().__init__( + megaservice, host, port, str(MegaServiceEndpoint.VISUAL_QNA), ChatCompletionRequest, ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + data = await request.json() + stream_opt = data.get("stream", False) + chat_request = ChatCompletionRequest.parse_obj(data) + prompt, images = self._handle_message(chat_request.messages) + parameters = LLMParams( + max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, + top_k=chat_request.top_k if chat_request.top_k else 10, + top_p=chat_request.top_p if chat_request.top_p else 0.95, + temperature=chat_request.temperature if chat_request.temperature else 0.01, + repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, + streaming=stream_opt, + ) + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters + ) + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LVM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LVM + ): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage) diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index e83a2836b..e1276716c 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -156,23 +156,24 @@ def register_microservice( provider_endpoint: Optional[str] = None, ): def decorator(func): - micro_service = MicroService( - name=name, - service_role=service_role, - service_type=service_type, - protocol=protocol, - host=host, - port=port, - ssl_keyfile=ssl_keyfile, - ssl_certfile=ssl_certfile, - endpoint=endpoint, - input_datatype=input_datatype, - output_datatype=output_datatype, - provider=provider, - provider_endpoint=provider_endpoint, - ) - micro_service.app.router.add_api_route(endpoint, func, methods=["POST"]) - opea_microservices[name] = micro_service + if name not in opea_microservices: + micro_service = MicroService( + name=name, + service_role=service_role, + service_type=service_type, + protocol=protocol, + host=host, + port=port, + ssl_keyfile=ssl_keyfile, + ssl_certfile=ssl_certfile, + endpoint=endpoint, + input_datatype=input_datatype, + output_datatype=output_datatype, + provider=provider, + provider_endpoint=provider_endpoint, + ) + opea_microservices[name] = micro_service + opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"]) return func return decorator diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 2e33a564d..616af41c8 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -47,7 +47,7 @@ async def schedule(self, initial_inputs: Dict, llm_parameters: LLMParams = LLMPa timeout = aiohttp.ClientTimeout(total=1000) async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session: pending = { - asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph)) + asyncio.create_task(self.execute(session, node, initial_inputs, runtime_graph, llm_parameters)) for node in self.ind_nodes() } ind_nodes = self.ind_nodes() @@ -117,7 +117,10 @@ async def execute( if inputs.get(field) != value: inputs[field] = value - if self.services[cur_node].service_type == ServiceType.LLM and llm_parameters.streaming: + if ( + self.services[cur_node].service_type == ServiceType.LLM + or self.services[cur_node].service_type == ServiceType.LVM + ) and llm_parameters.streaming: # Still leave to sync requests.post for StreamingResponse response = requests.post( url=endpoint, data=json.dumps(inputs), proxies={"http": None}, stream=True, timeout=1000 @@ -126,8 +129,8 @@ async def execute( if downstream: assert len(downstream) == 1, "Not supported multiple streaming downstreams yet!" cur_node = downstream[0] - hitted_ends = [".", "?", "!", "。", ",", "!"] - endpoint = self.services[downstream[0]].endpoint_path + hitted_ends = [".", "?", "!", "。", ",", "!"] + downstream_endpoint = self.services[downstream[0]].endpoint_path def generate(): if response: @@ -140,7 +143,7 @@ def generate(): is_last = chunk.endswith("[DONE]\n\n") if (buffered_chunk_str and buffered_chunk_str[-1] in hitted_ends) or is_last: res = requests.post( - url=endpoint, + url=downstream_endpoint, data=json.dumps({"text": buffered_chunk_str}), proxies={"http": None}, ) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 957fc9d95..bd52d7274 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -30,24 +30,243 @@ class UsageInfo(BaseModel): completion_tokens: Optional[int] = 0 +class ResponseFormat(BaseModel): + # type must be "json_object" or "text" + type: Literal["text", "json_object"] + + +class StreamOptions(BaseModel): + # refer https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L105 + include_usage: Optional[bool] + + +class FunctionDefinition(BaseModel): + name: str + description: Optional[str] = None + parameters: Optional[Dict[str, Any]] = None + + +class ChatCompletionToolsParam(BaseModel): + type: Literal["function"] = "function" + function: FunctionDefinition + + +class ChatCompletionNamedFunction(BaseModel): + name: str + + +class ChatCompletionNamedToolChoiceParam(BaseModel): + function: ChatCompletionNamedFunction + type: Literal["function"] = "function" + + +class TokenCheckRequestItem(BaseModel): + model: str + prompt: str + max_tokens: int + + +class TokenCheckRequest(BaseModel): + prompts: List[TokenCheckRequestItem] + + +class TokenCheckResponseItem(BaseModel): + fits: bool + tokenCount: int + contextLength: int + + +class TokenCheckResponse(BaseModel): + prompts: List[TokenCheckResponseItem] + + +class EmbeddingRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/embeddings + model: Optional[str] = None + input: Union[List[int], List[List[int]], str, List[str]] + encoding_format: Optional[str] = Field("float", pattern="^(float|base64)$") + dimensions: Optional[int] = None + user: Optional[str] = None + + # define + request_type: Literal["embedding"] = "embedding" + + +class EmbeddingResponseData(BaseModel): + index: int + object: str = "embedding" + embedding: Union[List[float], str] + + +class EmbeddingResponse(BaseModel): + object: str = "list" + model: Optional[str] = None + data: List[EmbeddingResponseData] + usage: Optional[UsageInfo] = None + + +class RetrievalRequest(BaseModel): + embedding: Union[EmbeddingResponse, List[float]] = None + input: Optional[str] = None # search_type maybe need, like "mmr" + search_type: str = "similarity" + k: int = 4 + distance_threshold: Optional[float] = None + fetch_k: int = 20 + lambda_mult: float = 0.5 + score_threshold: float = 0.2 + + # define + request_type: Literal["retrieval"] = "retrieval" + + +class RetrievalResponseData(BaseModel): + text: str + metadata: Optional[Dict[str, Any]] = None + + +class RetrievalResponse(BaseModel): + retrieved_docs: List[RetrievalResponseData] + + +class RerankingRequest(BaseModel): + input: str + retrieved_docs: Union[List[RetrievalResponseData], List[Dict[str, Any]], List[str]] + top_n: int = 1 + + # define + request_type: Literal["reranking"] = "reranking" + + +class RerankingResponseData(BaseModel): + text: str + score: Optional[float] = 0.0 + + +class RerankingResponse(BaseModel): + reranked_docs: List[RerankingResponseData] + + class ChatCompletionRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/chat/create messages: Union[ str, List[Dict[str, str]], List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]], ] model: Optional[str] = "Intel/neural-chat-7b-v3-3" - temperature: Optional[float] = 0.01 - top_p: Optional[float] = 0.95 - top_k: Optional[int] = 10 + frequency_penalty: Optional[float] = 0.0 + logit_bias: Optional[Dict[str, float]] = None + logprobs: Optional[bool] = False + top_logprobs: Optional[int] = 0 + max_tokens: Optional[int] = 16 # use https://platform.openai.com/docs/api-reference/completions/create n: Optional[int] = 1 - max_tokens: Optional[int] = 1024 - stop: Optional[Union[str, List[str]]] = None + presence_penalty: Optional[float] = 0.0 + response_format: Optional[ResponseFormat] = None + seed: Optional[int] = None + service_tier: Optional[str] = None + stop: Union[str, List[str], None] = Field(default_factory=list) stream: Optional[bool] = False - presence_penalty: Optional[float] = 1.03 - frequency_penalty: Optional[float] = 0.0 + stream_options: Optional[StreamOptions] = None + temperature: Optional[float] = 1.0 # vllm default 0.7 + top_p: Optional[float] = None # openai default 1.0, but tgi needs `top_p` must be > 0.0 and < 1.0, set None + tools: Optional[List[ChatCompletionToolsParam]] = None + tool_choice: Optional[Union[Literal["none"], ChatCompletionNamedToolChoiceParam]] = "none" + parallel_tool_calls: Optional[bool] = True user: Optional[str] = None + # Ordered by official OpenAI API documentation + # default values are same with + # https://platform.openai.com/docs/api-reference/completions/create + best_of: Optional[int] = 1 + suffix: Optional[str] = None + + # vllm reference: https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L130 + repetition_penalty: Optional[float] = 1.0 + + # tgi reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate + # some tgi parameters in use + # default values are same with + # https://github.com/huggingface/text-generation-inference/blob/main/router/src/lib.rs#L190 + # max_new_tokens: Optional[int] = 100 # Priority use openai + top_k: Optional[int] = None + # top_p: Optional[float] = None # Priority use openai + typical_p: Optional[float] = None + # repetition_penalty: Optional[float] = None + + # doc: begin-chat-completion-extra-params + echo: Optional[bool] = Field( + default=False, + description=( + "If true, the new message will be prepended with the last message " "if they belong to the same role." + ), + ) + add_generation_prompt: Optional[bool] = Field( + default=True, + description=( + "If true, the generation prompt will be added to the chat template. " + "This is a parameter used by chat template in tokenizer config of the " + "model." + ), + ) + add_special_tokens: Optional[bool] = Field( + default=False, + description=( + "If true, special tokens (e.g. BOS) will be added to the prompt " + "on top of what is added by the chat template. " + "For most models, the chat template takes care of adding the " + "special tokens so this should be set to False (as is the " + "default)." + ), + ) + documents: Optional[Union[List[Dict[str, str]], List[str]]] = Field( + default=None, + description=( + "A list of dicts representing documents that will be accessible to " + "the model if it is performing RAG (retrieval-augmented generation)." + " If the template does not support RAG, this argument will have no " + "effect. We recommend that each document should be a dict containing " + '"title" and "text" keys.' + ), + ) + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + chat_template_kwargs: Optional[Dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to the template renderer. " "Will be accessible by the chat template."), + ) + # doc: end-chat-completion-extra-params + + # embedding + input: Union[List[int], List[List[int]], str, List[str]] = None # user query/question from messages[-] + encoding_format: Optional[str] = Field("float", pattern="^(float|base64)$") + dimensions: Optional[int] = None + embedding: Union[EmbeddingResponse, List[float]] = Field(default_factory=list) + + # retrieval + search_type: str = "similarity" + k: int = 4 + distance_threshold: Optional[float] = None + fetch_k: int = 20 + lambda_mult: float = 0.5 + score_threshold: float = 0.2 + retrieved_docs: Union[List[RetrievalResponseData], List[Dict[str, Any]]] = Field(default_factory=list) + + # reranking + top_n: int = 1 + reranked_docs: Union[List[RerankingResponseData], List[Dict[str, Any]]] = Field(default_factory=list) + + # define + request_type: Literal["chat"] = "chat" + class AudioChatCompletionRequest(BaseModel): audio: str @@ -110,41 +329,6 @@ class ChatCompletionStreamResponse(BaseModel): choices: List[ChatCompletionResponseStreamChoice] -class TokenCheckRequestItem(BaseModel): - model: str - prompt: str - max_tokens: int - - -class TokenCheckRequest(BaseModel): - prompts: List[TokenCheckRequestItem] - - -class TokenCheckResponseItem(BaseModel): - fits: bool - tokenCount: int - contextLength: int - - -class TokenCheckResponse(BaseModel): - prompts: List[TokenCheckResponseItem] - - -class EmbeddingsRequest(BaseModel): - model: Optional[str] = None - engine: Optional[str] = None - input: Union[str, List[Any]] - user: Optional[str] = None - encoding_format: Optional[str] = None - - -class EmbeddingsResponse(BaseModel): - object: str = "list" - data: List[Dict[str, Any]] - model: str - usage: UsageInfo - - class CompletionRequest(BaseModel): model: str prompt: Union[str, List[Any]] diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index c21fe727d..b225758e7 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -1,13 +1,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Optional +from typing import Dict, List, Optional, Union import numpy as np from docarray import BaseDoc, DocList from docarray.documents import AudioDoc from docarray.typing import AudioUrl -from pydantic import Field, conint, conlist +from pydantic import Field, conint, conlist, field_validator class TopologyInfo: @@ -32,9 +32,9 @@ class DocPath(BaseDoc): table_strategy: str = "fast" -class EmbedDoc768(BaseDoc): +class EmbedDoc(BaseDoc): text: str - embedding: conlist(float, min_length=768, max_length=768) + embedding: conlist(float, min_length=0) search_type: str = "similarity" k: int = 4 distance_threshold: Optional[float] = None @@ -58,11 +58,6 @@ class Audio2TextDoc(AudioDoc): ) -class EmbedDoc1024(BaseDoc): - text: str - embedding: conlist(float, min_length=1024, max_length=1024) - - class SearchedDoc(BaseDoc): retrieved_docs: DocList[TextDoc] initial_query: str @@ -93,6 +88,30 @@ class LLMParamsDoc(BaseDoc): repetition_penalty: float = 1.03 streaming: bool = True + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + documents: Optional[Union[List[Dict[str, str]], List[str]]] = Field( + default=[], + description=( + "A list of dicts representing documents that will be accessible to " + "the model if it is performing RAG (retrieval-augmented generation)." + " If the template does not support RAG, this argument will have no " + "effect. We recommend that each document should be a dict containing " + '"title" and "text" keys.' + ), + ) + + @field_validator("chat_template") + def chat_template_must_contain_variables(cls, v): + return v + class LLMParams(BaseDoc): max_new_tokens: int = 1024 @@ -103,6 +122,16 @@ class LLMParams(BaseDoc): repetition_penalty: float = 1.03 streaming: bool = True + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + class RAGASParams(BaseDoc): questions: DocList[TextDoc] @@ -135,6 +164,12 @@ class LVMDoc(BaseDoc): image: str prompt: str max_new_tokens: conint(ge=0, le=1024) = 512 + top_k: int = 10 + top_p: float = 0.95 + typical_p: float = 0.95 + temperature: float = 0.01 + repetition_penalty: float = 1.03 + streaming: bool = False class ImagePath(BaseDoc): @@ -146,4 +181,4 @@ class ImagesPath(BaseDoc): class VideoPath(BaseDoc): - video_path: str \ No newline at end of file + video_path: str diff --git a/comps/dataprep/milvus/config.py b/comps/dataprep/milvus/config.py index 06aa60975..0f8c57139 100644 --- a/comps/dataprep/milvus/config.py +++ b/comps/dataprep/milvus/config.py @@ -12,7 +12,7 @@ MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -MOSEC_EMBEDDING_MODEL = "/root/bce-embedding-base_v1" +MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/root/bce-embedding-base_v1") MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/dataprep/milvus/prepare_doc_milvus.py b/comps/dataprep/milvus/prepare_doc_milvus.py index 46a81e3f3..143010c06 100644 --- a/comps/dataprep/milvus/prepare_doc_milvus.py +++ b/comps/dataprep/milvus/prepare_doc_milvus.py @@ -16,20 +16,33 @@ TEI_EMBEDDING_ENDPOINT, TEI_EMBEDDING_MODEL, ) -from fastapi import File, Form, HTTPException, UploadFile +from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_core.documents import Document from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter from langsmith import traceable from pyspark import SparkConf, SparkContext from comps import DocPath, opea_microservices, register_microservice -from comps.dataprep.utils import document_loader, get_separators, get_tables_result, parse_html +from comps.dataprep.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + get_tables_result, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) # workaround notes: cp comps/dataprep/utils.py ./milvus/utils.py # from utils import document_loader, get_tables_result, parse_html index_params = {"index_type": "FLAT", "metric_type": "IP", "params": {}} +partition_field_name = "filename" +upload_folder = "./uploaded_files/" class MosecEmbeddings(OpenAIEmbeddings): @@ -57,21 +70,11 @@ def empty_embedding() -> List[float]: return [e if e is not None else empty_embedding() for e in batched_embeddings] -async def save_file_to_local_disk(save_path: str, file): - save_path = Path(save_path) - with save_path.open("wb") as fout: - try: - content = await file.read() - fout.write(content) - except Exception as e: - print(f"Write file failed. Exception: {e}") - raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") - - def ingest_data_to_milvus(doc_path: DocPath): """Ingest document to Milvus.""" path = doc_path.path - print(f"Parsing document {path}.") + file_name = path.split("/")[-1] + print(f"[ ingest data ] Parsing document {path}, file name: {file_name}.") if path.endswith(".html"): headers_to_split_on = [ @@ -90,51 +93,47 @@ def ingest_data_to_milvus(doc_path: DocPath): if doc_path.process_table and path.endswith(".pdf"): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + print("[ ingest data ] Done preprocessing. Created ", len(chunks), " chunks of the original pdf") # Create vectorstore if MOSEC_EMBEDDING_ENDPOINT: # create embeddings using MOSEC endpoint service - print(f"MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT},MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}") + print( + f"[ ingest data ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" + ) embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) elif TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service - print(f"TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + print(f"[ ingest data ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) else: # create embeddings using local embedding model - print(f"Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") + print(f"[ ingest data ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = batch_chunks - _ = Milvus.from_texts( - texts=batch_texts, - embedding=embedder, + # insert documents to Milvus + insert_docs = [] + for chunk in chunks: + insert_docs.append(Document(page_content=chunk, metadata={partition_field_name: file_name})) + + try: + _ = Milvus.from_documents( + insert_docs, + embedder, collection_name=COLLECTION_NAME, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, - index_params=index_params, + partition_key_field=partition_field_name, ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - return True + except Exception as e: + print(f"[ ingest data ] fail to ingest data into Milvus. error: {e}") + return False + print(f"[ ingest data ] Docs ingested from {path} to Milvus collection {COLLECTION_NAME}.") -def ingest_link_to_milvus(link_list: List[str]): - data_collection = parse_html(link_list) + return True - texts = [] - metadatas = [] - for data, meta in data_collection: - doc_id = str(uuid.uuid4()) - metadata = {"source": meta, "identify_id": doc_id} - texts.append(data) - metadatas.append(metadata) +async def ingest_link_to_milvus(link_list: List[str]): # Create vectorstore if MOSEC_EMBEDDING_ENDPOINT: # create embeddings using MOSEC endpoint service @@ -149,14 +148,22 @@ def ingest_link_to_milvus(link_list: List[str]): print(f"Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) - _ = Milvus.from_texts( - texts=texts, - metadatas=metadatas, - embedding=embedder, - collection_name=COLLECTION_NAME, - connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, - index_params=index_params, - ) + for link in link_list: + content = parse_html([link])[0][0] + print(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + print(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + + document = Document(page_content=content, metadata={partition_field_name: encoded_link + ".txt"}) + _ = Milvus.from_documents( + document, + embedder, + collection_name=COLLECTION_NAME, + connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, + partition_key_field=partition_field_name, + ) @register_microservice(name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep", host="0.0.0.0", port=6010) @@ -177,22 +184,10 @@ async def ingest_documents( if files: if not isinstance(files, list): files = [files] - upload_folder = "./uploaded_files/" - if not os.path.exists(upload_folder): - Path(upload_folder).mkdir(parents=True, exist_ok=True) uploaded_files = [] for file in files: save_path = upload_folder + file.filename - await save_file_to_local_disk(save_path, file) - ingest_data_to_milvus( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) - ) + await save_content_to_local_disk(save_path, file) uploaded_files.append(save_path) print(f"Successfully saved file {save_path}") @@ -200,7 +195,15 @@ def process_files_wrapper(files): if not isinstance(files, list): files = [files] for file in files: - ingest_data_to_milvus(DocPath(path=file, chunk_size=chunk_size, chunk_overlap=chunk_overlap)) + assert ingest_data_to_milvus( + DocPath( + path=file, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) try: # Create a SparkContext @@ -224,7 +227,7 @@ def process_files_wrapper(files): link_list = json.loads(link_list) # Parse JSON string to list if not isinstance(link_list, list): raise HTTPException(status_code=400, detail="link_list should be a list.") - ingest_link_to_milvus(link_list) + await ingest_link_to_milvus(link_list) print(f"Successfully saved link list {link_list}") return {"status": 200, "message": "Data preparation succeeded"} except json.JSONDecodeError: @@ -233,5 +236,109 @@ def process_files_wrapper(files): raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") +@register_microservice( + name="opea_service@prepare_doc_milvus_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6011 +) +@traceable(run_type="tool") +async def rag_get_file_structure(): + print("[ dataprep - get file ] start to get file structure") + + if not Path(upload_folder).exists(): + print("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(upload_folder) + return file_content + + +def delete_all_data(my_milvus): + print("[ delete ] deleting all data in milvus") + my_milvus.delete(expr="pk >= 0") + my_milvus.col.flush() + print("[ delete ] delete success: all data") + + +def delete_by_partition_field(my_milvus, partition_field): + print(f"[ delete ] deleting {partition_field_name} {partition_field}") + pks = my_milvus.get_pks(f'{partition_field_name} == "{partition_field}"') + print(f"[ delete ] target pks: {pks}") + res = my_milvus.delete(pks) + my_milvus.col.flush() + print(f"[ delete ] delete success: {res}") + + +@register_microservice( + name="opea_service@prepare_doc_milvus_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6012 +) +@traceable(run_type="tool") +async def delete_single_file(file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - file/link path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + # create embedder obj + if MOSEC_EMBEDDING_ENDPOINT: + # create embeddings using MOSEC endpoint service + print( + f"[ dataprep - del ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT},MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" + ) + embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) + elif TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + print(f"[ dataprep - del ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + print(f"[ dataprep - del ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") + embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) + + # define Milvus obj + my_milvus = Milvus( + embedding_function=embedder, + collection_name=COLLECTION_NAME, + connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, + index_params=index_params, + auto_id=True, + ) + + # delete all uploaded files + if file_path == "all": + print("[ dataprep - del ] deleting all files") + delete_all_data(my_milvus) + remove_folder_with_ignore(upload_folder) + print("[ dataprep - del ] successfully delete all files.") + create_upload_folder(upload_folder) + return {"status": True} + + encode_file_name = encode_filename(file_path) + delete_path = Path(upload_folder + "/" + encode_file_name) + print(f"[dataprep - del] delete_path: {delete_path}") + + # partially delete files + if delete_path.exists(): + # file + if delete_path.is_file(): + print(f"[dataprep - del] deleting file {encode_file_name}") + try: + delete_by_partition_field(my_milvus, encode_file_name) + delete_path.unlink() + print(f"[dataprep - del] file {encode_file_name} deleted") + return {"status": True} + except Exception as e: + print(f"[dataprep - del] fail to delete file {delete_path}: {e}") + return {"status": False} + # folder + else: + print("[dataprep - del] delete folder is not supported for now.") + return {"status": False} + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + + if __name__ == "__main__": + create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_milvus"].start() + opea_microservices["opea_service@prepare_doc_milvus_file"].start() + opea_microservices["opea_service@prepare_doc_milvus_del"].start() diff --git a/comps/dataprep/milvus/requirements.txt b/comps/dataprep/milvus/requirements.txt index c6f5f4fd2..cf088a1c0 100644 --- a/comps/dataprep/milvus/requirements.txt +++ b/comps/dataprep/milvus/requirements.txt @@ -27,3 +27,4 @@ sentence_transformers shortuuid tiktoken unstructured[all-docs]==0.11.5 +uvicorn diff --git a/comps/dataprep/pgvector/README.md b/comps/dataprep/pgvector/README.md index 36b99b6eb..af25ae56d 100644 --- a/comps/dataprep/pgvector/README.md +++ b/comps/dataprep/pgvector/README.md @@ -8,11 +8,7 @@ pip install -r requirements.txt ``` -## 1.2 Start PGVector - -Please refer to this [readme](../../../vectorstores/langchain/pgvcetor/README.md). - -## 1.3 Setup Environment Variables +## 1.2 Setup Environment Variables ```bash export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb @@ -22,6 +18,10 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep" ``` +## 1.3 Start PGVector + +Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md). + ## 1.4 Start Document Preparation Microservice for PGVector with Python Script Start document preparation microservice for PGVector with below command. @@ -34,7 +34,7 @@ python prepare_doc_pgvector.py ## 2.1 Start PGVector -Please refer to this [readme](../../../vectorstores/langchain/pgvector/README.md). +Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md). ## 2.2 Setup Environment Variables @@ -49,14 +49,14 @@ export LANGCHAIN_PROJECT="opea/dataprep" ## 2.3 Build Docker Image ```bash -cd comps/dataprep/langchain/pgvector/docker -docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/langchain/pgvector/docker/Dockerfile . +cd GenAIComps +docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pgvector/langchain/docker/Dockerfile . ``` ## 2.4 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest +docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest ``` ## 2.5 Run with Docker Compose (Option B) @@ -68,6 +68,8 @@ docker compose -f docker-compose-dataprep-pgvector.yaml up -d # 🚀3. Consume Microservice +## 3.1 Consume Upload API + Once document preparation microservice for PGVector is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. ```bash @@ -76,3 +78,58 @@ curl -X POST \ -d '{"path":"/path/to/document"}' \ http://localhost:6007/v1/dataprep ``` + +## 3.2 Consume get_file API + +To get uploaded file structures, use the following command: + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6007/v1/dataprep/get_file +``` + +Then you will get the response JSON like this: + +```json +[ + { + "name": "uploaded_file_1.txt", + "id": "uploaded_file_1.txt", + "type": "File", + "parent": "" + }, + { + "name": "uploaded_file_2.txt", + "id": "uploaded_file_2.txt", + "type": "File", + "parent": "" + } +] +``` + +## 4.3 Consume delete_file API + +To delete uploaded file/link, use the following command. + +The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. + +```bash +# delete link +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "https://www.ces.tech/.txt"}' \ + http://localhost:6007/v1/dataprep/delete_file + +# delete file +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "uploaded_file_1.txt"}' \ + http://localhost:6007/v1/dataprep/delete_file + +# delete all files and links +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "all"}' \ + http://localhost:6007/v1/dataprep/delete_file +``` diff --git a/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml b/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml index f11a88b93..d396bda3a 100644 --- a/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml +++ b/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml @@ -16,6 +16,9 @@ services: - POSTGRES_USER=testuser - POSTGRES_PASSWORD=testpwd - POSTGRES_HOST_AUTH_METHOD=trust + - no_proxy= ${no_proxy} + - http_proxy= ${http_proxy} + - https_proxy= ${https_proxy} volumes: - ./init.sql:/docker-entrypoint-initdb.d/init.sql diff --git a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py index 9c38cbe6a..efb394991 100644 --- a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py +++ b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py @@ -3,21 +3,32 @@ import json import os -import uuid from pathlib import Path from typing import List, Optional, Union +from urllib.parse import urlparse +import psycopg2 from config import CHUNK_OVERLAP, CHUNK_SIZE, EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING -from fastapi import File, Form, HTTPException, UploadFile +from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import PGVector from langsmith import traceable -from comps import DocPath, ServiceType, opea_microservices, register_microservice, register_statistics -from comps.dataprep.utils import document_loader, get_separators, parse_html +from comps import DocPath, opea_microservices, register_microservice +from comps.dataprep.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +upload_folder = "./uploaded_files/" async def save_file_to_local_disk(save_path: str, file): @@ -31,18 +42,61 @@ async def save_file_to_local_disk(save_path: str, file): raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") +def delete_embeddings(doc_name): + """Get all ids from a vectorstore.""" + try: + result = urlparse(PG_CONNECTION_STRING) + username = result.username + password = result.password + database = result.path[1:] + hostname = result.hostname + port = result.port + + connection = psycopg2.connect(database=database, user=username, password=password, host=hostname, port=port) + + # Create a cursor object to execute SQL queries + print(f"Deleting {doc_name} from vectorstore") + + cur = connection.cursor() + if doc_name == "all": + cur.execute( + "DELETE FROM langchain_pg_collection lpe WHERE lpe.name = %(index_name)s", + {"index_name": INDEX_NAME}, + ) + else: + cur.execute( + "DELETE FROM langchain_pg_embedding lpe WHERE lpe.uuid in (SELECT lpc.uuid\ + FROM langchain_pg_embedding lpc where lpc.cmetadata ->> 'doc_name' = %(doc_name)s)", + {"doc_name": doc_name}, + ) + + connection.commit() # commit the transaction + cur.close() + + return True + + except psycopg2.Error as e: + print(f"Error deleting document from vectorstore: {e}") + return False + + except Exception as e: + print(f"An unexpected error occurred: {e}") + return False + + def ingest_doc_to_pgvector(doc_path: DocPath): """Ingest document to PGVector.""" doc_path = doc_path.path print(f"Parsing document {doc_path}.") text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators() + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() ) content = document_loader(doc_path) chunks = text_splitter.split_text(content) print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") print("PG Connection", PG_CONNECTION_STRING) + metadata = [dict({"doc_name": str(doc_path)})] # Create vectorstore if tei_embedding_endpoint: @@ -60,23 +114,17 @@ def ingest_doc_to_pgvector(doc_path: DocPath): batch_texts = batch_chunks _ = PGVector.from_texts( - texts=batch_texts, embedding=embedder, collection_name=INDEX_NAME, connection_string=PG_CONNECTION_STRING + texts=batch_texts, + embedding=embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, ) print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") return True -def ingest_link_to_pgvector(link_list: List[str]): - data_collection = parse_html(link_list) - - texts = [] - metadatas = [] - for data, meta in data_collection: - doc_id = str(uuid.uuid4()) - metadata = {"source": meta, "identify_id": doc_id} - texts.append(data) - metadatas.append(metadata) - +async def ingest_link_to_pgvector(link_list: List[str]): # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service @@ -85,24 +133,48 @@ def ingest_link_to_pgvector(link_list: List[str]): # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - _ = PGVector.from_texts( - texts=texts, - embedding=embedder, - metadatas=metadatas, - collection_name=INDEX_NAME, - connection_string=PG_CONNECTION_STRING, + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() ) + for link in link_list: + texts = [] + content = parse_html([link])[0][0] + print(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + doc_path = upload_folder + link + ".txt" + print(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + metadata = [dict({"doc_name": str(doc_path)})] + + chunks = text_splitter.split_text(content) + + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = PGVector.from_texts( + texts=batch_texts, + embedding=embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, + ) + print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + return True + @register_microservice( name="opea_service@prepare_doc_pgvector", - service_type=ServiceType.DATAPREP, endpoint="/v1/dataprep", host="0.0.0.0", port=6007, ) @traceable(run_type="tool") -@register_statistics(names=["opea_service@dataprep_pgvector"]) async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None) ): @@ -114,12 +186,13 @@ async def ingest_documents( if files: if not isinstance(files, list): files = [files] - upload_folder = "./uploaded_files/" + if not os.path.exists(upload_folder): Path(upload_folder).mkdir(parents=True, exist_ok=True) for file in files: save_path = upload_folder + file.filename await save_file_to_local_disk(save_path, file) + ingest_doc_to_pgvector(DocPath(path=save_path)) print(f"Successfully saved file {save_path}") return {"status": 200, "message": "Data preparation succeeded"} @@ -129,7 +202,7 @@ async def ingest_documents( link_list = json.loads(link_list) # Parse JSON string to list if not isinstance(link_list, list): raise HTTPException(status_code=400, detail="link_list should be a list.") - ingest_link_to_pgvector(link_list) + await ingest_link_to_pgvector(link_list) print(f"Successfully saved link list {link_list}") return {"status": 200, "message": "Data preparation succeeded"} except json.JSONDecodeError: @@ -138,5 +211,64 @@ async def ingest_documents( raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") +@register_microservice( + name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 +) +@traceable(run_type="tool") +async def rag_get_file_structure(): + print("[ dataprep - get file ] start to get file structure") + + if not Path(upload_folder).exists(): + print("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(upload_folder) + return file_content + + +@register_microservice( + name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 +) +@traceable(run_type="tool") +async def delete_single_file(file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - folder path (e.g. /path/to/folder) + - "all": delete all files uploaded + """ + if file_path == "all": + print("[dataprep - del] delete all files") + remove_folder_with_ignore(upload_folder) + assert delete_embeddings(file_path) + print("[dataprep - del] successfully delete all files.") + create_upload_folder(upload_folder) + return {"status": True} + + delete_path = Path(upload_folder + "/" + encode_filename(file_path)) + doc_path = upload_folder + file_path + print(f"[dataprep - del] delete_path: {delete_path}") + + # partially delete files/folders + if delete_path.exists(): + # delete file + if delete_path.is_file(): + try: + assert delete_embeddings(doc_path) + delete_path.unlink() + except Exception as e: + print(f"[dataprep - del] fail to delete file {delete_path}: {e}") + return {"status": False} + # delete folder + else: + print("[dataprep - del] delete folder is not supported for now.") + return {"status": False} + return {"status": True} + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + + if __name__ == "__main__": + create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_pgvector"].start() diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt index 0dbbf9e2e..84fd48e52 100644 --- a/comps/dataprep/pgvector/langchain/requirements.txt +++ b/comps/dataprep/pgvector/langchain/requirements.txt @@ -7,6 +7,7 @@ fastapi huggingface_hub langchain langchain-community +langchain-text-splitters langsmith markdown numpy @@ -16,10 +17,16 @@ opentelemetry-sdk pandas pgvector==0.2.5 Pillow -prometheus-fastapi-instrumentator==7.0.0 +prometheus-fastapi-instrumentator psycopg2-binary pymupdf +pyspark python-docx +python-multipart python-pptx sentence_transformers shortuuid +tiktoken +unstructured[all-docs]==0.11.5 +uvicorn + diff --git a/comps/dataprep/pinecone/config.py b/comps/dataprep/pinecone/config.py index e6e62db6c..7a761a09c 100644 --- a/comps/dataprep/pinecone/config.py +++ b/comps/dataprep/pinecone/config.py @@ -4,13 +4,13 @@ import os # Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # Pinecone configuration PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = int(os.getenv("PINECONE_INDEX_NAME", "langchain-test")) +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/dataprep/pinecone/docker/Dockerfile b/comps/dataprep/pinecone/docker/Dockerfile index d19ff6ab4..d61ecf65f 100644 --- a/comps/dataprep/pinecone/docker/Dockerfile +++ b/comps/dataprep/pinecone/docker/Dockerfile @@ -6,11 +6,16 @@ FROM python:3.11-slim ENV LANG=C.UTF-8 +ARG ARCH="cpu" + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ libgl1-mesa-glx \ libjemalloc-dev \ - vim + default-jre \ + vim \ + libcairo2 + RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -23,8 +28,18 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/dataprep/pinecone/requirements.txt +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/dataprep/pinecone/requirements.txt + ENV PYTHONPATH=$PYTHONPATH:/home/user +USER root + +RUN mkdir -p /home/user/comps/dataprep/pinecone/uploaded_files && chown -R user /home/user/comps/dataprep/pinecone/uploaded_files + +USER user + WORKDIR /home/user/comps/dataprep/pinecone ENTRYPOINT ["python", "prepare_doc_pinecone.py"] diff --git a/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml b/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml index 93636f3d0..0ee20389d 100644 --- a/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml +++ b/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml @@ -1,19 +1,40 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 version: "3" services: + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate dataprep-pinecone: - image: opea/gen-ai-comps:dataprep-pinecone-xeon-server + image: opea/dataprep-pinecone:latest container_name: dataprep-pinecone-server ports: - - "6000:6000" + - "6007:6007" + - "6008:6008" + - "6009:6009" ipc: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PINECONE_API_KEY: ${PINECONE_API_KEY} PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped networks: diff --git a/comps/dataprep/pinecone/prepare_doc_pinecone.py b/comps/dataprep/pinecone/prepare_doc_pinecone.py index 1a001a1fd..ec0e200a2 100644 --- a/comps/dataprep/pinecone/prepare_doc_pinecone.py +++ b/comps/dataprep/pinecone/prepare_doc_pinecone.py @@ -1,40 +1,105 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os +import shutil +import uuid +from pathlib import Path +from typing import List, Optional, Union from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME +from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Pinecone +from langchain_pinecone import PineconeVectorStore +from langchain_text_splitters import HTMLHeaderTextSplitter +from langsmith import traceable +from pinecone import Pinecone, ServerlessSpec from comps import DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.utils import document_loader, get_separators +from comps.dataprep.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + get_tables_result, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") +upload_folder = "./uploaded_files/" -@register_microservice( - name="opea_service@prepare_doc_pinecone", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6000, - input_datatype=DocPath, - output_datatype=None, -) -@opea_telemetry -def ingest_documents(doc_path: DocPath): +def check_index_existance(): + print(f"[ check index existence ] checking {PINECONE_INDEX_NAME}") + pc = Pinecone(api_key=PINECONE_API_KEY) + existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] + if PINECONE_INDEX_NAME not in existing_indexes: + print("[ check index existence ] index does not exist") + return None + else: + return True + + +def create_index(client): + print(f"[ create index ] creating index {PINECONE_INDEX_NAME}") + try: + client.create_index( + name=PINECONE_INDEX_NAME, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-east-1"), + ) + print(f"[ create index ] index {PINECONE_INDEX_NAME} successfully created") + except Exception as e: + print(f"[ create index ] fail to create index {PINECONE_INDEX_NAME}: {e}") + return False + return True + + +def drop_index(index_name): + print(f"[ drop index ] dropping index {index_name}") + pc = Pinecone(api_key=PINECONE_API_KEY) + try: + pc.delete_index(index_name) + print(f"[ drop index ] index {index_name} deleted") + except Exception as e: + print(f"[ drop index ] index {index_name} delete failed: {e}") + return False + return True + + +def ingest_data_to_pinecone(doc_path: DocPath): """Ingest document to Pinecone.""" - doc_path = doc_path.path - print(f"Parsing document {doc_path}.") + path = doc_path.path + print(f"Parsing document {path}.") - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators() - ) - content = document_loader(doc_path) - chunks = text_splitter.split_text(content) + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + content = document_loader(path) + chunks = text_splitter.split_text(content) + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service @@ -43,20 +108,157 @@ def ingest_documents(doc_path: DocPath): # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + pc = Pinecone(api_key=PINECONE_API_KEY) + + # Checking Index existence + if not check_index_existance(): + # Creating the index + create_index(pc) + print("Successfully created the index", PINECONE_INDEX_NAME) + # Batch size batch_size = 32 num_chunks = len(chunks) + file_ids = [] + for i in range(0, num_chunks, batch_size): batch_chunks = chunks[i : i + batch_size] batch_texts = batch_chunks - _ = Pinecone.from_texts( + vectorstore = PineconeVectorStore.from_texts( texts=batch_texts, embedding=embedder, index_name=PINECONE_INDEX_NAME, ) print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + # store file_ids into index file-keys + pc = Pinecone(api_key=PINECONE_API_KEY) + + +async def ingest_link_to_pinecone(link_list: List[str]): + # Create embedding obj + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + pc = Pinecone(api_key=PINECONE_API_KEY) + + # Checking Index existence + if not check_index_existance(): + # Creating the index + create_index(pc) + print("Successfully created the index", PINECONE_INDEX_NAME) + + # save link contents and doc_ids one by one + for link in link_list: + content = parse_html([link])[0][0] + print(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + print(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + + vectorstore = PineconeVectorStore.from_texts( + texts=content, + embedding=embedder, + index_name=PINECONE_INDEX_NAME, + ) + + return True + + +@register_microservice(name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) +@traceable(run_type="tool") +async def ingest_documents( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + print(f"files:{files}") + print(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_pinecone( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + print(f"Successfully saved file {save_path}") + + return {"status": 200, "message": "Data preparation succeeded"} + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + await ingest_link_to_pinecone(link_list) + print(f"Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + +@register_microservice( + name="opea_service@prepare_doc_pinecone_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 +) +@traceable(run_type="tool") +async def rag_get_file_structure(): + print("[ dataprep - get file ] start to get file structure") + + if not Path(upload_folder).exists(): + print("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(upload_folder) + return file_content + + +@register_microservice( + name="opea_service@prepare_doc_pinecone_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 +) +@traceable(run_type="tool") +async def delete_all(file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - "all": delete all files uploaded + """ + # delete all uploaded files + if file_path == "all": + print("[dataprep - del] delete all files") + remove_folder_with_ignore(upload_folder) + assert drop_index(index_name=PINECONE_INDEX_NAME) + print("[dataprep - del] successfully delete all files.") + create_upload_folder(upload_folder) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") + if __name__ == "__main__": + create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_pinecone"].start() + opea_microservices["opea_service@prepare_doc_pinecone_file"].start() + opea_microservices["opea_service@prepare_doc_pinecone_del"].start() diff --git a/comps/dataprep/pinecone/requirements.txt b/comps/dataprep/pinecone/requirements.txt index ee308fd17..a2d5c4298 100644 --- a/comps/dataprep/pinecone/requirements.txt +++ b/comps/dataprep/pinecone/requirements.txt @@ -1,12 +1,17 @@ beautifulsoup4 +cairosvg docarray[full] +docx2txt easyocr fastapi huggingface_hub langchain langchain-community +langchain-openai langchain-pinecone +langchain-text-splitters langsmith +markdown numpy opentelemetry-api opentelemetry-exporter-otlp @@ -14,7 +19,13 @@ opentelemetry-sdk pandas Pillow pinecone-client +prometheus-fastapi-instrumentator pymupdf +pyspark +python-bidi==0.4.2 python-docx +python-pptx sentence_transformers shortuuid +unstructured[all-docs]==0.11.5 +uvicorn diff --git a/comps/dataprep/qdrant/README.md b/comps/dataprep/qdrant/README.md index 24f58fc09..30aefb43d 100644 --- a/comps/dataprep/qdrant/README.md +++ b/comps/dataprep/qdrant/README.md @@ -47,7 +47,7 @@ docker build -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy ## Run Docker with CLI ```bash -docker run -d --name="dataprep-qdrant-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest +docker run -d --name="dataprep-qdrant-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest ``` ## Setup Environment Variables @@ -55,7 +55,7 @@ docker run -d --name="dataprep-qdrant-server" -p 6000:6000 --ipc=host -e http_pr ```bash export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export QDRANT=${host_ip} +export QDRANT_HOST=${host_ip} export QDRANT_PORT=6333 export COLLECTION_NAME=${your_collection_name} ``` @@ -72,13 +72,21 @@ docker compose -f docker-compose-dataprep-qdrant.yaml up -d Once document preparation microservice for Qdrant is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + http://localhost:6007/v1/dataprep ``` You can specify chunk_size and chunk_size by the following commands. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","chunk_size":1500,"chunk_overlap":100}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + -F "chunk_size=1500" \ + -F "chunk_overlap=100" \ + http://localhost:6007/v1/dataprep ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -86,5 +94,10 @@ We support table extraction from pdf documents. You can specify process_table an Note: If you specify "table_strategy=llm", You should first start TGI Service, please refer to 1.2.1, 1.3.1 in https://github.com/opea-project/GenAIComps/tree/main/comps/llms/README.md, and then `export TGI_LLM_ENDPOINT="http://${your_ip}:8008"`. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","process_table":true,"table_strategy":"hq"}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./your_file.pdf" \ + -F "process_table=true" \ + -F "table_strategy=hq" \ + http://localhost:6007/v1/dataprep ``` diff --git a/comps/dataprep/qdrant/config.py b/comps/dataprep/qdrant/config.py index 2b30a3682..7cf37f404 100644 --- a/comps/dataprep/qdrant/config.py +++ b/comps/dataprep/qdrant/config.py @@ -7,7 +7,7 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") # Qdrant configuration -QDRANT_HOST = os.getenv("QDRANT", "localhost") +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant") diff --git a/comps/dataprep/qdrant/docker/Dockerfile b/comps/dataprep/qdrant/docker/Dockerfile index bdf0315e2..ff9f6b253 100644 --- a/comps/dataprep/qdrant/docker/Dockerfile +++ b/comps/dataprep/qdrant/docker/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin build-essential \ libgl1-mesa-glx \ libjemalloc-dev \ + default-jre \ vim RUN useradd -m -s /bin/bash user && \ @@ -22,13 +23,18 @@ USER user COPY comps /home/user/comps -RUN pip install --no-cache-dir --upgrade pip && \ - if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/dataprep/qdrant/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user +USER root + +RUN mkdir -p /home/user/comps/dataprep/qdrant/uploaded_files && chown -R user /home/user/comps/dataprep/qdrant/uploaded_files + +USER user + WORKDIR /home/user/comps/dataprep/qdrant ENTRYPOINT ["python", "prepare_doc_qdrant.py"] - diff --git a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml index e86dc2c4e..aaf2a17dd 100644 --- a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml +++ b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml @@ -9,19 +9,36 @@ services: ports: - "6333:6333" - "6334:6334" + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate dataprep-qdrant: image: opea/gen-ai-comps:dataprep-qdrant-xeon-server container_name: dataprep-qdrant-server + depends_on: + - qdrant-vector-db + - tei-embedding-service ports: - - "6000:6000" + - "6007:6007" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - QDRANT: ${QDRANT} + QDRANT_HOST: ${QDRANT_HOST} QDRANT_PORT: ${QDRANT_PORT} COLLECTION_NAME: ${COLLECTION_NAME} + TEI_ENDPOINT: ${TEI_ENDPOINT} restart: unless-stopped networks: diff --git a/comps/dataprep/qdrant/prepare_doc_qdrant.py b/comps/dataprep/qdrant/prepare_doc_qdrant.py index 422854eec..fb8d66571 100644 --- a/comps/dataprep/qdrant/prepare_doc_qdrant.py +++ b/comps/dataprep/qdrant/prepare_doc_qdrant.py @@ -1,30 +1,31 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import os +import json +from typing import List, Optional, Union -from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT +from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT +from fastapi import File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.vectorstores import Qdrant +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter -from comps import DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.utils import document_loader, get_separators, get_tables_result +from comps import DocPath, opea_microservices, register_microservice +from comps.dataprep.utils import ( + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html, + save_content_to_local_disk, +) -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +upload_folder = "./uploaded_files/" -@register_microservice( - name="opea_service@prepare_doc_qdrant", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6000, - input_datatype=DocPath, - output_datatype=None, -) -@opea_telemetry -def ingest_documents(doc_path: DocPath): +def ingest_data_to_qdrant(doc_path: DocPath): """Ingest document to Qdrant.""" path = doc_path.path print(f"Parsing document {path}.") @@ -38,23 +39,30 @@ def ingest_documents(doc_path: DocPath): text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) else: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), ) content = document_loader(path) + chunks = text_splitter.split_text(content) if doc_path.process_table and path.endswith(".pdf"): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + # Create vectorstore - if tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + print("embedder created.") + # Batch size batch_size = 32 num_chunks = len(chunks) @@ -71,6 +79,78 @@ def ingest_documents(doc_path: DocPath): ) print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + return True + + +@register_microservice( + name="opea_service@prepare_doc_qdrant", + endpoint="/v1/dataprep", + host="0.0.0.0", + port=6007, + input_datatype=DocPath, + output_datatype=None, +) +async def ingest_documents( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + print(f"files:{files}") + print(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + print(f"Successfully saved file {save_path}") + + return {"status": 200, "message": "Data preparation succeeded"} + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + try: + await save_content_to_local_disk(save_path, content) + ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") + + print(f"Successfully saved link {link}") + + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + if __name__ == "__main__": opea_microservices["opea_service@prepare_doc_qdrant"].start() diff --git a/comps/dataprep/qdrant/requirements.txt b/comps/dataprep/qdrant/requirements.txt index 2c9df40f5..e5bcf80b3 100644 --- a/comps/dataprep/qdrant/requirements.txt +++ b/comps/dataprep/qdrant/requirements.txt @@ -8,6 +8,7 @@ huggingface_hub langchain langchain-community langchain-text-splitters +langchain_huggingface markdown numpy opentelemetry-api @@ -23,3 +24,4 @@ qdrant-client sentence_transformers shortuuid unstructured[all-docs]==0.11.5 +uvicorn diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/redis/README.md index 58fe3b34d..0845254df 100644 --- a/comps/dataprep/redis/README.md +++ b/comps/dataprep/redis/README.md @@ -38,12 +38,35 @@ Please refer to this [readme](../../vectorstores/langchain/redis/README.md). ```bash export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep" export PYTHONPATH=${path_to_comps} ``` +## 1.4 Start Embedding Service + +First, you need to start a TEI service. + +```bash +your_port=6006 +model="BAAI/bge-large-en-v1.5" +revision="refs/pr/5" +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +``` + +Then you need to test your TEI service using the following commands: + +```bash +curl localhost:$your_port/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +After checking that it works, set up environment variables. + +```bash +export TEI_ENDPOINT="http://localhost:$your_port" +``` + ## 1.4 Start Document Preparation Microservice for Redis with Python Script Start document preparation microservice for Redis with below command. @@ -64,16 +87,16 @@ python prepare_doc_redis_on_ray.py ## 2.1 Start Redis Stack Server -Please refer to this [readme](../../../vectorstores/langchain/redis/README.md). +Please refer to this [readme](../../vectorstores/langchain/redis/README.md). ## 2.2 Setup Environment Variables ```bash +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export TEI_ENDPOINT="http://${your_ip}:6006" export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/dataprep" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ``` ## 2.3 Build Docker Image @@ -106,13 +129,13 @@ docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https - option 1: Start single-process version (for 1-10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest ``` - option 2: Start multi-process version (for >10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest ``` ## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) @@ -220,7 +243,7 @@ To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6008/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get_file ``` Then you will get the response JSON like this: @@ -253,17 +276,17 @@ The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file ``` diff --git a/comps/dataprep/redis/langchain/config.py b/comps/dataprep/redis/langchain/config.py index b0b19d651..75715912c 100644 --- a/comps/dataprep/redis/langchain/config.py +++ b/comps/dataprep/redis/langchain/config.py @@ -5,7 +5,7 @@ # Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-large-en-v1.5") # Redis Connection Information REDIS_HOST = os.getenv("REDIS_HOST", "localhost") @@ -61,9 +61,6 @@ def format_redis_conn_from_env(): INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "schema_dim_768.yml") TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) -schema_path = os.path.join(parent_dir, REDIS_SCHEMA) -INDEX_SCHEMA = schema_path + +SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) diff --git a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml index e2775972d..0ef8a1f1a 100644 --- a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml @@ -9,22 +9,35 @@ services: ports: - "6379:6379" - "8001:8001" + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate dataprep-redis: image: opea/dataprep-redis:latest container_name: dataprep-redis-server ports: - "6007:6007" - - "6008:6008" - - "6009:6009" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + REDIS_HOST: ${REDIS_HOST} + REDIS_PORT: ${REDIS_PORT} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: ${TEI_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped networks: diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py index 2a99a11ee..a749cd557 100644 --- a/comps/dataprep/redis/langchain/prepare_doc_redis.py +++ b/comps/dataprep/redis/langchain/prepare_doc_redis.py @@ -10,13 +10,13 @@ # from pyspark import SparkConf, SparkContext import redis -from config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, KEY_INDEX_NAME, REDIS_HOST, REDIS_PORT, REDIS_URL +from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.vectorstores import Redis +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter -from langsmith import traceable from redis.commands.search.field import TextField from redis.commands.search.indexDefinition import IndexDefinition, IndexType @@ -25,7 +25,7 @@ create_upload_folder, document_loader, encode_filename, - get_file_structure, + format_search_results, get_separators, get_tables_result, parse_html, @@ -35,7 +35,7 @@ tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") upload_folder = "./uploaded_files/" -redis_pool = redis.ConnectionPool(host=REDIS_HOST, port=REDIS_PORT) +redis_pool = redis.ConnectionPool.from_url(REDIS_URL) def check_index_existance(client): @@ -76,7 +76,7 @@ def search_by_id(client, doc_id): print(f"[ search by id ] searching docs of {doc_id}") try: results = client.load_document(doc_id) - print(f"[ search by id ] search success of {doc_id}") + print(f"[ search by id ] search success of {doc_id}: {results}") return results except Exception as e: print(f"[ search by id ] fail to search docs of {doc_id}: {e}") @@ -104,35 +104,12 @@ def delete_by_id(client, id): return True -def ingest_data_to_redis(doc_path: DocPath): - """Ingest document to Redis.""" - path = doc_path.path - print(f"Parsing document {path}.") - - if path.endswith(".html"): - headers_to_split_on = [ - ("h1", "Header 1"), - ("h2", "Header 2"), - ("h3", "Header 3"), - ] - text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - else: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() - ) - - content = document_loader(path) - - chunks = text_splitter.split_text(content) - if doc_path.process_table and path.endswith(".pdf"): - table_chunks = get_tables_result(path, doc_path.table_strategy) - chunks = chunks + table_chunks - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - +def ingest_chunks_to_redis(file_name: str, chunks: List): + print(f"[ ingest chunks ] file name: {file_name}") # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + embedder = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) @@ -143,7 +120,7 @@ def ingest_data_to_redis(doc_path: DocPath): file_ids = [] for i in range(0, num_chunks, batch_size): - print(f"Current batch: {i}") + print(f"[ ingest chunks ] Current batch: {i}") batch_chunks = chunks[i : i + batch_size] batch_texts = batch_chunks @@ -151,64 +128,59 @@ def ingest_data_to_redis(doc_path: DocPath): texts=batch_texts, embedding=embedder, index_name=INDEX_NAME, - index_schema=INDEX_SCHEMA, redis_url=REDIS_URL, ) - print(f"keys: {keys}") + print(f"[ ingest chunks ] keys: {keys}") file_ids.extend(keys) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + print(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") # store file_ids into index file-keys r = redis.Redis(connection_pool=redis_pool) client = r.ft(KEY_INDEX_NAME) if not check_index_existance(client): assert create_index(client) - file_name = doc_path.path.split("/")[-1] - assert store_by_id(client, key=file_name, value="#".join(file_ids)) + try: + assert store_by_id(client, key=file_name, value="#".join(file_ids)) + except Exception as e: + print(f"[ ingest chunks ] {e}. Fail to store chunks of file {file_name}.") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") return True -async def ingest_link_to_redis(link_list: List[str]): - # Create embedding obj - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) +def ingest_data_to_redis(doc_path: DocPath): + """Ingest document to Redis.""" + path = doc_path.path + print(f"Parsing document {path}.") - # Create redis connection obj - r = redis.Redis(connection_pool=redis_pool) - client = r.ft(KEY_INDEX_NAME) + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) - # save link contents and doc_ids one by one - for link in link_list: - content = parse_html([link])[0][0] - print(f"[ ingest link ] link: {link} content: {content}") - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - print(f"[ ingest link ] save_path: {save_path}") - await save_content_to_local_disk(save_path, content) + content = document_loader(path) - _, keys = Redis.from_texts_return_keys( - texts=content, - embedding=embedder, - index_name=INDEX_NAME, - index_schema=INDEX_SCHEMA, - redis_url=REDIS_URL, - ) - print(f"keys: {keys}") - if not check_index_existance(client): - assert create_index(client) - file_name = encoded_link + ".txt" - assert store_by_id(client, key=file_name, value="#".join(keys)) + chunks = text_splitter.split_text(content) + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - return True + file_name = doc_path.path.split("/")[-1] + return ingest_chunks_to_redis(file_name, chunks) @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None), @@ -220,12 +192,30 @@ async def ingest_documents( print(f"files:{files}") print(f"link_list:{link_list}") + r = redis.Redis(connection_pool=redis_pool) + client = r.ft(KEY_INDEX_NAME) + if files: if not isinstance(files, list): files = [files] uploaded_files = [] + for file in files: encode_file = encode_filename(file.filename) + doc_id = "file:" + encode_file + + # check whether the file already exists + key_ids = None + try: + key_ids = search_by_id(client, doc_id).key_ids + print(f"[ upload file ] File {file.filename} already exists.") + except Exception as e: + print(f"[ upload file ] File {file.filename} does not exist.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded file {file.filename} already exists. Please change file name." + ) + save_path = upload_folder + encode_file await save_content_to_local_disk(save_path, file) ingest_data_to_redis( @@ -265,52 +255,113 @@ async def ingest_documents( return {"status": 200, "message": "Data preparation succeeded"} if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - await ingest_link_to_redis(link_list) - print(f"Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + doc_id = "file:" + encoded_link + ".txt" + + # check whether the link file already exists + key_ids = None + try: + key_ids = search_by_id(client, doc_id).key_ids + print(f"[ upload file ] Link {link} already exists.") + except Exception as e: + print(f"[ upload file ] Link {link} does not exist. Keep storing.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." + ) + + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + await save_content_to_local_disk(save_path, content) + ingest_data_to_redis( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + print(f"Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") @register_microservice( - name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def rag_get_file_structure(): print("[ dataprep - get file ] start to get file structure") - if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - return file_content + # define redis client + r = redis.Redis(connection_pool=redis_pool) + offset = 0 + file_list = [] + while True: + response = r.execute_command("FT.SEARCH", KEY_INDEX_NAME, "*", "LIMIT", offset, offset + SEARCH_BATCH_SIZE) + # no doc retrieved + if len(response) < 2: + break + file_list = format_search_results(response, file_list) + offset += SEARCH_BATCH_SIZE + # last batch + if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE: + break + return file_list @register_microservice( - name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. `file_path`: - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - "all": delete all files uploaded """ + + # define redis client + r = redis.Redis(connection_pool=redis_pool) + client = r.ft(KEY_INDEX_NAME) + client2 = r.ft(INDEX_NAME) + # delete all uploaded files if file_path == "all": print("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - assert drop_index(index_name=INDEX_NAME) - assert drop_index(index_name=KEY_INDEX_NAME) + + # drop index KEY_INDEX_NAME + if check_index_existance(client): + try: + assert drop_index(index_name=KEY_INDEX_NAME) + except Exception as e: + print(f"[dataprep - del] {e}. Fail to drop index {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.") + else: + print(f"[dataprep - del] Index {KEY_INDEX_NAME} does not exits.") + + # drop index INDEX_NAME + if check_index_existance(client2): + try: + assert drop_index(index_name=INDEX_NAME) + except Exception as e: + print(f"[dataprep - del] {e}. Fail to drop index {INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.") + else: + print(f"[dataprep - del] Index {INDEX_NAME} does not exits.") + + # delete files on local disk + try: + remove_folder_with_ignore(upload_folder) + except Exception as e: + print(f"[dataprep - del] {e}. Fail to delete {upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") + print("[dataprep - del] successfully delete all files.") create_upload_folder(upload_folder) return {"status": True} @@ -318,39 +369,60 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): delete_path = Path(upload_folder + "/" + encode_filename(file_path)) print(f"[dataprep - del] delete_path: {delete_path}") - # partially delete files/folders + # partially delete files if delete_path.exists(): - r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT) - client = r.ft(KEY_INDEX_NAME) - client2 = r.ft(INDEX_NAME) doc_id = "file:" + encode_filename(file_path) - objs = search_by_id(client, doc_id).key_ids - file_ids = objs.split("#") + + # determine whether this file exists in db KEY_INDEX_NAME + try: + key_ids = search_by_id(client, doc_id).key_ids + except Exception as e: + print(f"[dataprep - del] {e}, File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {KEY_INDEX_NAME}. Please check file_path." + ) + file_ids = key_ids.split("#") # delete file if delete_path.is_file(): + # delete file keys id in db KEY_INDEX_NAME try: - for file_id in file_ids: - assert delete_by_id(client2, file_id) assert delete_by_id(client, doc_id) - delete_path.unlink() except Exception as e: - print(f"[dataprep - del] fail to delete file {delete_path}: {e}") - return {"status": False} + print(f"[dataprep - del] {e}. File {file_path} delete failed for db {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.") + + # delete file content in db INDEX_NAME + for file_id in file_ids: + # determine whether this file exists in db INDEX_NAME + try: + content = search_by_id(client2, file_id).content + except Exception as e: + print(f"[dataprep - del] {e}. File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {INDEX_NAME}. Please check file_path." + ) + + # delete file content + try: + assert delete_by_id(client2, file_id) + except Exception as e: + print(f"[dataprep - del] {e}. File {file_path} delete failed for db {INDEX_NAME}") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.") + + # delete file on local disk + delete_path.unlink() + + return {"status": True} + # delete folder else: - try: - shutil.rmtree(delete_path) - except Exception as e: - print(f"[dataprep - del] fail to delete folder {delete_path}: {e}") - return {"status": False} - return {"status": True} + print(f"[dataprep - del] Delete folder {file_path} is not supported for now.") + raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + raise HTTPException(status_code=404, detail=f"File {file_path} not found. Please check file_path.") if __name__ == "__main__": create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_redis"].start() - opea_microservices["opea_service@prepare_doc_redis_file"].start() - opea_microservices["opea_service@prepare_doc_redis_del"].start() diff --git a/comps/dataprep/redis/langchain/requirements.txt b/comps/dataprep/redis/langchain/requirements.txt index 55bf1ebbc..284b9379b 100644 --- a/comps/dataprep/redis/langchain/requirements.txt +++ b/comps/dataprep/redis/langchain/requirements.txt @@ -5,10 +5,10 @@ docx2txt easyocr fastapi huggingface_hub -langchain +langchain==0.2.12 langchain-community langchain-text-splitters -langsmith +langchain_huggingface markdown numpy opentelemetry-api @@ -26,3 +26,4 @@ redis sentence_transformers shortuuid unstructured[all-docs]==0.11.5 +uvicorn diff --git a/comps/dataprep/redis/langchain/schema.yml b/comps/dataprep/redis/langchain/schema.yml deleted file mode 100644 index 0c0ca9711..000000000 --- a/comps/dataprep/redis/langchain/schema.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: source -numeric: - - name: start_index -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 384 - distance_metric: COSINE diff --git a/comps/dataprep/redis/langchain/schema_dim_1024.yml b/comps/dataprep/redis/langchain/schema_dim_1024.yml deleted file mode 100644 index 0515e3e7f..000000000 --- a/comps/dataprep/redis/langchain/schema_dim_1024.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: source -numeric: - - name: start_index -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 1024 - distance_metric: COSINE diff --git a/comps/dataprep/redis/langchain/schema_dim_768.yml b/comps/dataprep/redis/langchain/schema_dim_768.yml deleted file mode 100644 index adacf9865..000000000 --- a/comps/dataprep/redis/langchain/schema_dim_768.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: source -numeric: - - name: start_index -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 768 - distance_metric: COSINE diff --git a/comps/dataprep/redis/langchain/schema_lcdocs_dim_768.yml b/comps/dataprep/redis/langchain/schema_lcdocs_dim_768.yml deleted file mode 100644 index f36660ddd..000000000 --- a/comps/dataprep/redis/langchain/schema_lcdocs_dim_768.yml +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -text: - - name: content - - name: changefreq - - name: description - - name: language - - name: loc - - name: priority - - name: source - - name: title -vector: - - name: content_vector - algorithm: HNSW - datatype: FLOAT32 - dims: 768 - distance_metric: COSINE diff --git a/comps/dataprep/redis/langchain_ray/config.py b/comps/dataprep/redis/langchain_ray/config.py index 05e155bfc..7fd0b2678 100644 --- a/comps/dataprep/redis/langchain_ray/config.py +++ b/comps/dataprep/redis/langchain_ray/config.py @@ -60,9 +60,4 @@ def format_redis_conn_from_env(): # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "schema_dim_768.yml") TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) -schema_path = os.path.join(parent_dir, REDIS_SCHEMA) -INDEX_SCHEMA = schema_path diff --git a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py index eca41f649..c55165061 100644 --- a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py +++ b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py @@ -21,7 +21,7 @@ from typing import Callable, List, Optional, Union import pandas as pd -from config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL, TIMEOUT_SECONDS +from config import EMBED_MODEL, INDEX_NAME, REDIS_URL, TIMEOUT_SECONDS from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings @@ -75,7 +75,7 @@ def prepare_env(enable_ray=False, pip_requirements=None): def generate_log_name(file_list): file_set = f"{sorted(file_list)}" # print(f"file_set: {file_set}") - md5_str = hashlib.md5(file_set.encode()).hexdigest() + md5_str = hashlib.md5(file_set.encode(), usedforsecurity=False).hexdigest() return f"status/status_{md5_str}.log" @@ -195,7 +195,6 @@ def data_to_redis(data): texts=batch_texts, embedding=embedder, index_name=INDEX_NAME, - index_schema=INDEX_SCHEMA, redis_url=REDIS_URL, ) # print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt index ff994c9e9..b16a4ac82 100644 --- a/comps/dataprep/redis/langchain_ray/requirements.txt +++ b/comps/dataprep/redis/langchain_ray/requirements.txt @@ -24,4 +24,5 @@ ray redis sentence_transformers shortuuid +uvicorn virtualenv diff --git a/comps/dataprep/redis/llama_index/requirements.txt b/comps/dataprep/redis/llama_index/requirements.txt index 5780c3899..e754a4275 100644 --- a/comps/dataprep/redis/llama_index/requirements.txt +++ b/comps/dataprep/redis/llama_index/requirements.txt @@ -15,3 +15,4 @@ python-bidi==0.4.2 redis sentence_transformers shortuuid +uvicorn diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py index 786366a12..ae8361539 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/utils.py @@ -11,6 +11,7 @@ import re import shutil import signal +import subprocess import timeit import unicodedata import urllib.parse @@ -157,7 +158,19 @@ def load_doc(doc_path): """Load doc file.""" print("Converting doc file to docx file...") docx_path = doc_path + "x" - os.system(f"libreoffice --headless --invisible --convert-to docx --outdir {os.path.dirname(docx_path)} {doc_path}") + subprocess.run( + [ + "libreoffice", + "--headless", + "--invisible", + "--convert-to", + "docx", + "--outdir", + os.path.dirname(docx_path), + doc_path, + ], + check=True, + ) print("Converted doc file to docx file.") text = load_docx(docx_path) os.remove(docx_path) @@ -196,7 +209,19 @@ def load_ppt(ppt_path): """Load ppt file.""" print("Converting ppt file to pptx file...") pptx_path = ppt_path + "x" - os.system(f"libreoffice --headless --invisible --convert-to pptx --outdir {os.path.dirname(pptx_path)} {ppt_path}") + subprocess.run( + [ + "libreoffice", + "--headless", + "--invisible", + "--convert-to", + "docx", + "--outdir", + os.path.dirname(pptx_path), + ppt_path, + ], + check=True, + ) print("Converted ppt file to pptx file.") text = load_pptx(pptx_path) os.remove(pptx_path) @@ -692,6 +717,19 @@ def get_file_structure(root_path: str, parent_path: str = "") -> List[Dict[str, return result +def format_search_results(response, file_list: list): + for i in range(1, len(response), 2): + file_name = response[i].decode()[5:] + file_dict = { + "name": decode_filename(file_name), + "id": decode_filename(file_name), + "type": "File", + "parent": "", + } + file_list.append(file_dict) + return file_list + + def remove_folder_with_ignore(folder_path: str, except_patterns: List = []): """Remove the specific folder, and ignore some files/folders. diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index eacefac1f..ce4b4fa46 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -155,6 +155,6 @@ curl http://localhost:6000/v1/health_check\ ```bash curl http://localhost:6000/v1/embeddings\ -X POST \ - -d '{"input":"Hello, world!"}' \ + -d '{"text":"Hello, world!"}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/embeddings/langchain-mosec/README.md b/comps/embeddings/langchain-mosec/README.md index 4ceedc2fa..624fcf6a2 100644 --- a/comps/embeddings/langchain-mosec/README.md +++ b/comps/embeddings/langchain-mosec/README.md @@ -1,7 +1,7 @@ # build Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . ``` # build embedding microservice docker image @@ -13,13 +13,13 @@ docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_p # launch Mosec endpoint docker container ``` -docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 langchain-mosec:latest +docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 opea/embedding-langchain-mosec-endpoint:latest ``` # launch embedding microservice docker container ``` -export MOSEC_EMBEDDING_ENDPOINT=http://127.0.0.1:6001 +export MOSEC_EMBEDDING_ENDPOINT=http://{mosec_embedding_host_ip}:6001 docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:latest ``` diff --git a/comps/embeddings/langchain-mosec/embedding_mosec.py b/comps/embeddings/langchain-mosec/embedding_mosec.py index 2e033ccc0..f34b56a18 100644 --- a/comps/embeddings/langchain-mosec/embedding_mosec.py +++ b/comps/embeddings/langchain-mosec/embedding_mosec.py @@ -9,7 +9,7 @@ from langsmith import traceable from comps import ( - EmbedDoc768, + EmbedDoc, ServiceType, TextDoc, opea_microservices, @@ -51,15 +51,14 @@ def empty_embedding() -> List[float]: host="0.0.0.0", port=6000, input_datatype=TextDoc, - output_datatype=EmbedDoc768, + output_datatype=EmbedDoc, ) @traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_mosec"]) -def embedding(input: TextDoc) -> EmbedDoc768: +def embedding(input: TextDoc) -> EmbedDoc: start = time.time() embed_vector = embeddings.embed_query(input.text) - embed_vector = embed_vector[:768] # Keep only the first 768 elements - res = EmbedDoc768(text=input.text, embedding=embed_vector) + res = EmbedDoc(text=input.text, embedding=embed_vector) statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None) return res @@ -68,7 +67,7 @@ def embedding(input: TextDoc) -> EmbedDoc768: MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" - MODEL_ID = "/root/bge-large-zh" + MODEL_ID = "/root/bge-large-zh-v1.5" embeddings = MosecEmbeddings(model=MODEL_ID) print("Mosec Embedding initialized.") opea_microservices["opea_service@embedding_mosec"].start() diff --git a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile index eb1e510a7..945f7b90c 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,25 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive +RUN apt update && apt install -y python3 python3-pip -COPY comps /root/comps +COPY comps /home/user/comps -RUN apt update && apt install -y python3 python3-pip RUN pip3 install torch==2.2.2 torchvision --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh --local-dir /root/bge-large-zh - -ENV EMB_MODEL="/root/bge-large-zh/" +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /home/user/bge-large-zh-v1.5 +USER user +ENV EMB_MODEL="/home/user/bge-large-zh-v1.5/" -WORKDIR /root/comps/embeddings/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/embeddings/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/embeddings/langchain-mosec/mosec-docker/README.md b/comps/embeddings/langchain-mosec/mosec-docker/README.md index 2f87dd30b..e7f59d616 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/README.md +++ b/comps/embeddings/langchain-mosec/mosec-docker/README.md @@ -4,7 +4,7 @@ This service has an OpenAI compatible restful API to extract text features. It is dedicated to be used on Xeon to accelerate embedding model serving. -Currently the local model is BGE-large-zh. +Currently the local model is BGE-large-zh-v1.5. ## 2. Quick Start @@ -25,13 +25,13 @@ docker run -itd -p 8000:8000 embedding:latest - Restful API by curl ```shell -curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/root/bge-large-zh/", "input": "hello world"}' +curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/root/bge-large-zh-v1.5/", "input": "hello world"}' ``` - generate embedding from python ```python -DEFAULT_MODEL = "/root/bge-large-zh/" +DEFAULT_MODEL = "/root/bge-large-zh-v1.5/" SERVICE_URL = "http://127.0.0.1:8000" INPUT_STR = "Hello world!" diff --git a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py index 05c1c63f3..9639b424a 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py +++ b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py @@ -13,7 +13,7 @@ from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage from mosec import ClientError, Runtime, Server, Worker -DEFAULT_MODEL = "/root/bge-large-zh/" +DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" class Embedding(Worker): @@ -113,8 +113,9 @@ def forward(self, data: List[EmbeddingRequest]) -> List[EmbeddingResponse]: if __name__ == "__main__": MAX_BATCH_SIZE = int(os.environ.get("MAX_BATCH_SIZE", 128)) MAX_WAIT_TIME = int(os.environ.get("MAX_WAIT_TIME", 10)) + MAX_FORWARD_TIMEOUT = int(os.environ.get("FORWARD_TIMEOUT", 60)) server = Server() - emb = Runtime(Embedding, max_batch_size=MAX_BATCH_SIZE, max_wait_time=MAX_WAIT_TIME) + emb = Runtime(Embedding, max_batch_size=MAX_BATCH_SIZE, max_wait_time=MAX_WAIT_TIME, timeout=MAX_FORWARD_TIMEOUT) server.register_runtime( { "/v1/embeddings": [emb], diff --git a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py index 4334249b9..d2d67c836 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py +++ b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py @@ -4,7 +4,7 @@ from openai import Client -DEFAULT_MODEL = "/root/bge-large-zh/" +DEFAULT_MODEL = "/root/bge-large-zh-v1.5/" SERVICE_URL = "http://127.0.0.1:8000" INPUT_STR = "Hello world!" diff --git a/comps/embeddings/langchain-mosec/requirements.txt b/comps/embeddings/langchain-mosec/requirements.txt index efa1a6514..9fa1a059c 100644 --- a/comps/embeddings/langchain-mosec/requirements.txt +++ b/comps/embeddings/langchain-mosec/requirements.txt @@ -8,3 +8,4 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid +uvicorn diff --git a/comps/embeddings/langchain/embedding_tei.py b/comps/embeddings/langchain/embedding_tei.py index 539a2167d..6a5fbc614 100644 --- a/comps/embeddings/langchain/embedding_tei.py +++ b/comps/embeddings/langchain/embedding_tei.py @@ -3,12 +3,13 @@ import os import time +from typing import Union -from langchain_community.embeddings import HuggingFaceHubEmbeddings +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langsmith import traceable from comps import ( - EmbedDoc768, + EmbedDoc, ServiceType, TextDoc, opea_microservices, @@ -16,6 +17,12 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + EmbeddingRequest, + EmbeddingResponse, + EmbeddingResponseData, +) @register_microservice( @@ -24,22 +31,36 @@ endpoint="/v1/embeddings", host="0.0.0.0", port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc768, ) @traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_tei_langchain"]) -def embedding(input: TextDoc) -> EmbedDoc768: +def embedding( + input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] +) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: start = time.time() - embed_vector = embeddings.embed_query(input.text) - embed_vector = embed_vector[:768] # Keep only the first 768 elements - res = EmbedDoc768(text=input.text, embedding=embed_vector) + + if isinstance(input, TextDoc): + embed_vector = embeddings.embed_query(input.text) + res = EmbedDoc(text=input.text, embedding=embed_vector) + else: + embed_vector = embeddings.embed_query(input.input) + if input.dimensions is not None: + embed_vector = embed_vector[: input.dimensions] + + if isinstance(input, ChatCompletionRequest): + input.embedding = embed_vector + # keep + res = input + if isinstance(input, EmbeddingRequest): + # for standard openai embedding format + res = EmbeddingResponse(data=[EmbeddingResponseData(index=0, embedding=embed_vector)]) + statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None) return res if __name__ == "__main__": tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8080") - embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) print("TEI Gaudi Embedding initialized.") opea_microservices["opea_service@embedding_tei_langchain"].start() diff --git a/comps/embeddings/langchain/local_embedding.py b/comps/embeddings/langchain/local_embedding.py index 5740eda07..1a3825c40 100644 --- a/comps/embeddings/langchain/local_embedding.py +++ b/comps/embeddings/langchain/local_embedding.py @@ -1,9 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings -from comps import EmbedDoc1024, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice +from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice @register_microservice( @@ -13,15 +13,15 @@ host="0.0.0.0", port=6000, input_datatype=TextDoc, - output_datatype=EmbedDoc1024, + output_datatype=EmbedDoc, ) @opea_telemetry -def embedding(input: TextDoc) -> EmbedDoc1024: +def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.embed_query(input.text) - res = EmbedDoc1024(text=input.text, embedding=embed_vector) + res = EmbedDoc(text=input.text, embedding=embed_vector) return res if __name__ == "__main__": - embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-large-en-v1.5") + embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5") opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/langchain/requirements.txt b/comps/embeddings/langchain/requirements.txt index 533186b97..8f0dd3ad4 100644 --- a/comps/embeddings/langchain/requirements.txt +++ b/comps/embeddings/langchain/requirements.txt @@ -2,6 +2,7 @@ docarray[full] fastapi huggingface_hub langchain +langchain_huggingface langsmith opentelemetry-api opentelemetry-exporter-otlp @@ -9,3 +10,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator sentence_transformers shortuuid +uvicorn diff --git a/comps/embeddings/llama_index/embedding_tei.py b/comps/embeddings/llama_index/embedding_tei.py index ef6bdc7a3..4f3920d32 100644 --- a/comps/embeddings/llama_index/embedding_tei.py +++ b/comps/embeddings/llama_index/embedding_tei.py @@ -6,7 +6,7 @@ from langsmith import traceable from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference -from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @register_microservice( @@ -16,13 +16,12 @@ host="0.0.0.0", port=6000, input_datatype=TextDoc, - output_datatype=EmbedDoc768, + output_datatype=EmbedDoc, ) @traceable(run_type="embedding") -def embedding(input: TextDoc) -> EmbedDoc768: +def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings._get_query_embedding(input.text) - embed_vector = embed_vector[:768] # Keep only the first 768 elements - res = EmbedDoc768(text=input.text, embedding=embed_vector) + res = EmbedDoc(text=input.text, embedding=embed_vector) return res diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py index 84a61806e..f6a69afaf 100644 --- a/comps/embeddings/llama_index/local_embedding.py +++ b/comps/embeddings/llama_index/local_embedding.py @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 from langsmith import traceable -from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding -from comps import EmbedDoc1024, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @register_microservice( @@ -14,15 +14,15 @@ host="0.0.0.0", port=6000, input_datatype=TextDoc, - output_datatype=EmbedDoc1024, + output_datatype=EmbedDoc, ) @traceable(run_type="embedding") -def embedding(input: TextDoc) -> EmbedDoc1024: +def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.get_text_embedding(input.text) - res = EmbedDoc1024(text=input.text, embedding=embed_vector) + res = EmbedDoc(text=input.text, embedding=embed_vector) return res if __name__ == "__main__": - embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") + embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-large-en-v1.5") opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt index 245710f7f..908c38b06 100644 --- a/comps/embeddings/llama_index/requirements.txt +++ b/comps/embeddings/llama_index/requirements.txt @@ -2,9 +2,11 @@ docarray[full] fastapi huggingface_hub langsmith +llama-index-embeddings-huggingface-api llama-index-embeddings-text-embeddings-inference opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid +uvicorn diff --git a/comps/guardrails/README.md b/comps/guardrails/README.md index 6a7d40bcb..1ee662853 100644 --- a/comps/guardrails/README.md +++ b/comps/guardrails/README.md @@ -1,122 +1,10 @@ -# Guardrails Microservice +# Trust and Safety with LLM -To fortify AI initiatives in production, this microservice introduces guardrails designed to encapsulate LLMs, ensuring the enforcement of responsible behavior. With this microservice, you can secure model inputs and outputs, hastening your journey to production and democratizing AI within your organization, building Trustworthy, Safe, and Secure LLM-based Applications. +The Guardrails service enhances the security of LLM-based applications by offering a suite of microservices designed to ensure trustworthiness, safety, and security. -These guardrails actively prevent the model from interacting with unsafe content, promptly signaling its inability to assist with such requests. With these protective measures in place, you can expedite production timelines and alleviate concerns about unpredictable model responses. +| MicroService | Description | +| ------------------------------------------ | ------------------------------------------------------------------------------------------ | +| [Llama Guard](./llama_guard/README.md) | Provides guardrails for inputs and outputs to ensure safe interactions | +| [PII Detection](./pii_detection/README.md) | Detects Personally Identifiable Information (PII) and Business Sensitive Information (BSI) | -The Guardrails Microservice now offers two primary types of guardrails: - -- Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing. -- Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user. - -We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. - -Any content that is detected in the following categories is determined as unsafe: - -- Violence and Hate -- Sexual Content -- Criminal Planning -- Guns and Illegal Weapons -- Regulated or Controlled Substances -- Suicide & Self Harm - -# 🚀1. Start Microservice with Python (Option 1) - -To start the Guardrails microservice, you need to install python packages first. - -## 1.1 Install Requirements - -```bash -pip install -r requirements.txt -``` - -## 1.2 Start TGI Gaudi Service - -```bash -export HF_TOKEN=${your_hf_api_token} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gaurdrails" -volume=$PWD/data -model_id="meta-llama/Meta-Llama-Guard-2-8B" -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 -``` - -## 1.3 Verify the TGI Gaudi Service - -```bash -curl 127.0.0.1:8088/generate \ - -X POST \ - -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` - -## 1.4 Start Guardrails Service - -Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/LlamaGuard-7b`) [from section 1.2](## 1.2 Start TGI Gaudi Service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2: - -```bash -export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" -``` - -```bash -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -python langchain/guardrails_tgi_gaudi.py -``` - -# 🚀2. Start Microservice with Docker (Option 2) - -If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker. - -## 2.1 Setup Environment Variables - -In order to start TGI and LLM services, you need to setup the following environment variables first. - -```bash -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" -export LLM_MODEL_ID=${your_hf_llm_model} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gen-ai-comps:gaurdrails" -``` - -## 2.2 Build Docker Image - -```bash -cd ../../ -docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/langchain/docker/Dockerfile . -``` - -## 2.3 Run Docker with CLI - -```bash -docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails-tgi:latest -``` - -## 2.4 Run Docker with Docker Compose - -```bash -cd langchain/docker -docker compose -f docker_compose_guardrails.yaml up -d -``` - -# 🚀3. Consume Guardrails Service - -## 3.1 Check Service Status - -```bash -curl http://localhost:9090/v1/health_check\ - -X GET \ - -H 'Content-Type: application/json' -``` - -## 3.2 Consume Guardrails Service - -```bash -curl http://localhost:9090/v1/guardrails\ - -X POST \ - -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ - -H 'Content-Type: application/json' -``` +Additional safety-related microservices will be available soon. diff --git a/comps/guardrails/langchain/README.md b/comps/guardrails/langchain/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/comps/guardrails/llama_guard/README.md b/comps/guardrails/llama_guard/README.md new file mode 100644 index 000000000..94bdcd952 --- /dev/null +++ b/comps/guardrails/llama_guard/README.md @@ -0,0 +1,119 @@ +# Guardrails Microservice + +To fortify AI initiatives in production, this microservice introduces guardrails designed to encapsulate LLMs, ensuring the enforcement of responsible behavior. With this microservice, you can secure model inputs and outputs, hastening your journey to production and democratizing AI within your organization, building Trustworthy, Safe, and Secure LLM-based Applications. + +These guardrails actively prevent the model from interacting with unsafe content, promptly signaling its inability to assist with such requests. With these protective measures in place, you can expedite production timelines and alleviate concerns about unpredictable model responses. + +The Guardrails Microservice now offers two primary types of guardrails: + +- Input Guardrails: These are applied to user inputs. An input guardrail can either reject the input, halting further processing. +- Output Guardrails: These are applied to outputs generated by the LLM. An output guardrail can reject the output, preventing it from being returned to the user. + +We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/Meta-Llama-Guard-2-8B) model. + +Any content that is detected in the following categories is determined as unsafe: + +- Violence and Hate +- Sexual Content +- Criminal Planning +- Guns and Illegal Weapons +- Regulated or Controlled Substances +- Suicide & Self Harm + +# 🚀1. Start Microservice with Python (Option 1) + +To start the Guardrails microservice, you need to install python packages first. + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start TGI Gaudi Service + +```bash +export HF_TOKEN=${your_hf_api_token} +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/gaurdrails" +volume=$PWD/data +model_id="meta-llama/Meta-Llama-Guard-2-8B" +docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 +docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 +``` + +## 1.3 Verify the TGI Gaudi Service + +```bash +curl 127.0.0.1:8088/generate \ + -X POST \ + -d '{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -H 'Content-Type: application/json' +``` + +## 1.4 Start Guardrails Service + +Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](## 1.2 Start TGI Gaudi Service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2: + +```bash +export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" +``` + +```bash +export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" +python langchain/guardrails_tgi.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker. + +## 2.1 Setup Environment Variables + +In order to start TGI and LLM services, you need to setup the following environment variables first. + +```bash +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" +export LLM_MODEL_ID=${your_hf_llm_model} +``` + +## 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile . +``` + +## 2.3 Run Docker with CLI + +```bash +docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails-tgi:latest +``` + +## 2.4 Run Docker with Docker Compose + +```bash +cd langchain/docker +docker compose -f docker_compose_guardrails.yaml up -d +``` + +# 🚀3. Consume Guardrails Service + +## 3.1 Check Service Status + +```bash +curl http://localhost:9090/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' +``` + +## 3.2 Consume Guardrails Service + +```bash +curl http://localhost:9090/v1/guardrails\ + -X POST \ + -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/guardrails/langchain/__init__.py b/comps/guardrails/llama_guard/__init__.py similarity index 100% rename from comps/guardrails/langchain/__init__.py rename to comps/guardrails/llama_guard/__init__.py diff --git a/comps/guardrails/langchain/docker/Dockerfile b/comps/guardrails/llama_guard/docker/Dockerfile similarity index 77% rename from comps/guardrails/langchain/docker/Dockerfile rename to comps/guardrails/llama_guard/docker/Dockerfile index 0cb6df85f..aaec44a07 100644 --- a/comps/guardrails/langchain/docker/Dockerfile +++ b/comps/guardrails/llama_guard/docker/Dockerfile @@ -22,10 +22,10 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/guardrails/requirements.txt + pip install --no-cache-dir -r /home/user/comps/guardrails/llama_guard/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/guardrails/langchain +WORKDIR /home/user/comps/guardrails/llama_guard/ -ENTRYPOINT ["python", "guardrails_tgi_gaudi.py"] +ENTRYPOINT ["python", "guardrails_tgi.py"] diff --git a/comps/guardrails/langchain/docker/docker_compose_guardrails.yaml b/comps/guardrails/llama_guard/docker/docker_compose_guardrails.yaml similarity index 80% rename from comps/guardrails/langchain/docker/docker_compose_guardrails.yaml rename to comps/guardrails/llama_guard/docker/docker_compose_guardrails.yaml index 625b7a223..f9c2c2cae 100644 --- a/comps/guardrails/langchain/docker/docker_compose_guardrails.yaml +++ b/comps/guardrails/llama_guard/docker/docker_compose_guardrails.yaml @@ -5,7 +5,7 @@ version: "3.8" services: tgi_gaudi_service: - image: ghcr.io/huggingface/tgi-gaudi:1.2.1 + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 container_name: tgi-service ports: - "8088:80" @@ -14,9 +14,9 @@ services: environment: HF_TOKEN: ${HF_TOKEN} shm_size: 1g - command: --model-id ${LLM_MODEL_ID} + command: --model-id ${LLM_MODEL_ID} --max-input-tokens 1024 --max-total-tokens 2048 guardrails: - image: opea/gen-ai-comps:guardrails-tgi-gaudi-server + image: opea/guardrails-tgi:latest container_name: guardrails-tgi-gaudi-server ports: - "9090:9090" diff --git a/comps/guardrails/langchain/guardrails_tgi_gaudi.py b/comps/guardrails/llama_guard/guardrails_tgi.py similarity index 96% rename from comps/guardrails/langchain/guardrails_tgi_gaudi.py rename to comps/guardrails/llama_guard/guardrails_tgi.py index 9de0193be..96a89b8c8 100644 --- a/comps/guardrails/langchain/guardrails_tgi_gaudi.py +++ b/comps/guardrails/llama_guard/guardrails_tgi.py @@ -54,7 +54,7 @@ def get_tgi_service_model_id(endpoint_url, default=DEFAULT_MODEL): @register_microservice( - name="opea_service@guardrails_tgi_gaudi", + name="opea_service@guardrails_tgi", service_type=ServiceType.GUARDRAIL, endpoint="/v1/guardrails", host="0.0.0.0", @@ -94,4 +94,4 @@ def safety_guard(input: TextDoc) -> TextDoc: # chat engine for server-side prompt templating llm_engine_hf = ChatHuggingFace(llm=llm_guard, model_id=safety_guard_model) print("guardrails - router] LLM initialized.") - opea_microservices["opea_service@guardrails_tgi_gaudi"].start() + opea_microservices["opea_service@guardrails_tgi"].start() diff --git a/comps/guardrails/requirements.txt b/comps/guardrails/llama_guard/requirements.txt similarity index 96% rename from comps/guardrails/requirements.txt rename to comps/guardrails/llama_guard/requirements.txt index 10ef1bb44..5fd992e66 100644 --- a/comps/guardrails/requirements.txt +++ b/comps/guardrails/llama_guard/requirements.txt @@ -9,3 +9,4 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid +uvicorn diff --git a/comps/guardrails/pii_detection/README.md b/comps/guardrails/pii_detection/README.md index dba386e38..3c1f1180e 100644 --- a/comps/guardrails/pii_detection/README.md +++ b/comps/guardrails/pii_detection/README.md @@ -1,6 +1,31 @@ # PII Detection Microservice -PII Detection a method to detect Personal Identifiable Information in text. This microservice provides users a unified API to either upload your files or send a list of text, and return with a list following original sequence of labels marking if it contains PII or not. +This microservice provides a unified API to detect if there is Personal Identifiable Information or Business Sensitive Information in text. + +We provide 2 detection strategies: + +1. Regular expression matching + named entity recognition (NER) - pass "ner" as strategy in your request to the microservice. +2. Logistic regression classifier - pass "ml" as strategy in your request to the microservice. **Note**: Currently this strategy is for demo only, and only supports using `nomic-ai/nomic-embed-text-v1` as the embedding model and the `Intel/business_safety_logistic_regression_classifier` model as the classifier. Please read the [full disclaimers in the model card](https://huggingface.co/Intel/business_safety_logistic_regression_classifier) before using this strategy. + +## NER strategy + +We adopted the [pii detection code](https://github.com/bigcode-project/bigcode-dataset/tree/main/pii) of the [BigCode](https://www.bigcode-project.org/) project and use the bigcode/starpii model for NER. Currently this strategy can detect IP address, emails, phone numbers, alphanumeric keys, names and passwords. The IP address, emails, phone numbers, alphanumeric keys are detected with regular expression matching. The names and passwords are detected with NER. Please refer to the starpii [model card](https://huggingface.co/bigcode/starpii) for more information of the detection performance. + +## ML strategy + +We have trained a classifier model using the [Patronus EnterprisePII dataset](https://www.patronus.ai/announcements/patronus-ai-launches-enterprisepii-the-industrys-first-llm-dataset-for-detecting-business-sensitive-information) for the demo purpose only. Please note that the demo model has not been extensively tested so is not intended for use in production environment. Please read the [full disclaimers in the model card](https://huggingface.co/Intel/business_safety_logistic_regression_classifier). + +The classifiler model is used together with an embedding model to make predictions. The embedding model used for demo is `nomic-ai/nomic-embed-text-v1` [model](https://blog.nomic.ai/posts/nomic-embed-text-v1) available on Huggingface hub. We picked this open-source embedding model for demo as it is one of the top-performing long-context (max sequence length = 8192 vs. 512 for other BERT-based encoders) encoder models that do well on [Huggingface MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) as well as long-context [LoCo benchmark](https://hazyresearch.stanford.edu/blog/2024-01-11-m2-bert-retrieval). The long-context capability is useful when the text is long (>512 tokens). + +Currently this strategy can detect both personal sensitive and business sensitive information such as financial figures and performance reviews. Please refer to the [model card](<(https://huggingface.co/Intel/business_safety_logistic_regression_classifier)>) to see the performance of our demo model on the Patronus EnterprisePII dataset. + +# Input and output + +Users can send a list of files, a list of text strings, or a list of urls to the microservice, and the microservice will return a list of True or False for each piece of text following the original sequence. + +For a concrete example of what input should look like, please refer to [Consume Microservice](#4-consume-microservice) section below. + +The output will be a list of booleans, which can be parsed and used as conditions in a bigger application. # 🚀1. Start Microservice with Python(Option 1) @@ -62,11 +87,18 @@ import requests import json proxies = {"http": ""} -url = "http://localhost:6357/v1/dataprep" -urls = [ - "https://towardsdatascience.com/no-gpu-no-party-fine-tune-bert-for-sentiment-analysis-with-vertex-ai-custom-jobs-d8fc410e908b?source=rss----7f60cf5620c9---4" +url = "http://localhost:6357/v1/piidetect" + +strategy = "ml" # options: "ner", "ml" +content = [ + "Q1 revenue was $1.23 billion, up 12% year over year. ", + "We are excited to announce the opening of our new office in Miami! ", + "Mary Smith, 123-456-7890,", + "John is a good team leader", + "meeting minutes: sync up with sales team on the new product launch", ] -payload = {"link_list": json.dumps(urls)} + +payload = {"text_list": json.dumps(content), "strategy": strategy} try: resp = requests.post(url=url, data=payload, proxies=proxies) diff --git a/comps/guardrails/pii_detection/data_utils.py b/comps/guardrails/pii_detection/data_utils.py index 29e9c4196..8340579fb 100644 --- a/comps/guardrails/pii_detection/data_utils.py +++ b/comps/guardrails/pii_detection/data_utils.py @@ -6,6 +6,7 @@ import multiprocessing import os import re +import subprocess import unicodedata from urllib.parse import urlparse, urlunparse @@ -79,7 +80,8 @@ def load_doc(doc_path): """Load doc file.""" txt_path = doc_path.replace(".doc", ".txt") try: - os.system(f'antiword "{doc_path}" > "{txt_path}"') + with open(txt_path, "w") as outfile: + subprocess.run(["antiword", doc_path], stdout=outfile, check=True) except: raise AssertionError( "antiword failed or not installed, if not installed," diff --git a/comps/guardrails/pii_detection/pii/pii_utils.py b/comps/guardrails/pii_detection/pii/pii_utils.py index 1d6b02144..9102b8195 100644 --- a/comps/guardrails/pii_detection/pii/pii_utils.py +++ b/comps/guardrails/pii_detection/pii/pii_utils.py @@ -22,14 +22,6 @@ def detect_pii(self, data): return random.choice([True, False]) -class PIIDetectorWithLLM(PIIDetector): - def __init__(self): - super().__init__() - - def detect_pii(self, text): - return True - - class PIIDetectorWithNER(PIIDetector): def __init__(self, model_path=None): super().__init__() @@ -42,11 +34,13 @@ def __init__(self, model_path=None): self.pipeline = pipeline( model=_model_key, task="token-classification", tokenizer=tokenizer, grouped_entities=True ) + print("NER detector instantiated successfully!") except Exception as e: print("Failed to load model, skip NER classification", e) self.pipeline = None def detect_pii(self, text): + print("Scanning text with NER detector...") result = [] # use a regex to detect ip addresses @@ -71,7 +65,26 @@ def detect_pii(self, text): class PIIDetectorWithML(PIIDetector): def __init__(self): + import joblib + from huggingface_hub import hf_hub_download + from sentence_transformers import SentenceTransformer + super().__init__() + print("Loading embedding model...") + embed_model_id = "nomic-ai/nomic-embed-text-v1" + self.model = SentenceTransformer(model_name_or_path=embed_model_id, trust_remote_code=True) + + print("Loading classifier model...") + REPO_ID = "Intel/business_safety_logistic_regression_classifier" + FILENAME = "lr_clf.joblib" + + self.clf = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=FILENAME)) + + print("ML detector instantiated successfully!") def detect_pii(self, text): - return True + # text is a string + print("Scanning text with ML detector...") + embeddings = self.model.encode(text, convert_to_tensor=True).reshape(1, -1).cpu() + predictions = self.clf.predict(embeddings) + return True if predictions[0] == 1 else False diff --git a/comps/guardrails/pii_detection/pii_detection.py b/comps/guardrails/pii_detection/pii_detection.py index b49ac7065..feecf3baf 100644 --- a/comps/guardrails/pii_detection/pii_detection.py +++ b/comps/guardrails/pii_detection/pii_detection.py @@ -20,12 +20,7 @@ from comps import DocPath, opea_microservices, register_microservice from comps.guardrails.pii_detection.data_utils import document_loader, parse_html -from comps.guardrails.pii_detection.pii.pii_utils import ( - PIIDetector, - PIIDetectorWithLLM, - PIIDetectorWithML, - PIIDetectorWithNER, -) +from comps.guardrails.pii_detection.pii.pii_utils import PIIDetector, PIIDetectorWithML, PIIDetectorWithNER from comps.guardrails.pii_detection.ray_utils import ray_execute, ray_runner_initialization, rayds_initialization from comps.guardrails.pii_detection.utils import ( Timer, @@ -38,14 +33,13 @@ def get_pii_detection_inst(strategy="dummy", settings=None): if strategy == "ner": + print("invoking NER detector.......") return PIIDetectorWithNER() elif strategy == "ml": + print("invoking ML detector.......") return PIIDetectorWithML() - elif strategy == "llm": - return PIIDetectorWithLLM() else: - # Default strategy - dummy - return PIIDetector() + raise ValueError(f"Invalid strategy: {strategy}") def file_based_pii_detect(file_list: List[DocPath], strategy, enable_ray=False, debug=False): @@ -67,7 +61,7 @@ def file_based_pii_detect(file_list: List[DocPath], strategy, enable_ray=False, for file in tqdm(file_list, total=len(file_list)): with Timer(f"read document {file}."): data = document_loader(file) - with Timer(f"detect pii on document {file} to Redis."): + with Timer(f"detect pii on document {file}"): ret.append(pii_detector.detect_pii(data)) return ret @@ -95,7 +89,7 @@ def _parse_html(link): data = _parse_html(link) if debug: print("content is: ", data) - with Timer(f"detect pii on document {link} to Redis."): + with Timer(f"detect pii on document {link}"): ret.append(pii_detector.detect_pii(data)) return ret @@ -117,7 +111,7 @@ def text_based_pii_detect(text_list: List[str], strategy, enable_ray=False, debu for data in tqdm(text_list, total=len(text_list)): if debug: print("content is: ", data) - with Timer(f"detect pii on document {data[:50]} to Redis."): + with Timer(f"detect pii on document {data[:50]}"): ret.append(pii_detector.detect_pii(data)) return ret @@ -125,11 +119,20 @@ def text_based_pii_detect(text_list: List[str], strategy, enable_ray=False, debu @register_microservice( name="opea_service@guardrails-pii-detection", endpoint="/v1/piidetect", host="0.0.0.0", port=6357 ) -async def pii_detection(files: List[UploadFile] = File(None), link_list: str = Form(None), text_list: str = Form(None)): +async def pii_detection( + files: List[UploadFile] = File(None), + link_list: str = Form(None), + text_list: str = Form(None), + strategy: str = Form(None), +): if not files and not link_list and not text_list: raise HTTPException(status_code=400, detail="Either files, link_list, or text_list must be provided.") - strategy = "ner" # Default strategy + if strategy is None: + strategy = "ner" + + print("PII detection using strategy: ", strategy) + pip_requirement = ["detect-secrets", "phonenumbers", "gibberish-detector"] if files: @@ -147,7 +150,7 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F await save_file_to_local_disk(save_path, file) saved_path_list.append(DocPath(path=save_path)) - enable_ray = False if len(saved_path_list) <= 10 else True + enable_ray = False if (len(text_list) <= 10 or strategy == "ml") else True if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = file_based_pii_detect(saved_path_list, strategy, enable_ray=enable_ray) @@ -160,7 +163,7 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F text_list = json.loads(text_list) # Parse JSON string to list if not isinstance(text_list, list): text_list = [text_list] - enable_ray = False if len(text_list) <= 10 else True + enable_ray = False if (len(text_list) <= 10 or strategy == "ml") else True if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = text_based_pii_detect(text_list, strategy, enable_ray=enable_ray) @@ -175,7 +178,7 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F link_list = json.loads(link_list) # Parse JSON string to list if not isinstance(link_list, list): link_list = [link_list] - enable_ray = False if len(link_list) <= 10 else True + enable_ray = False if (len(text_list) <= 10 or strategy == "ml") else True if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = link_based_pii_detect(link_list, strategy, enable_ray=enable_ray) diff --git a/comps/guardrails/pii_detection/requirements.txt b/comps/guardrails/pii_detection/requirements.txt index 88690093f..9ca5116da 100644 --- a/comps/guardrails/pii_detection/requirements.txt +++ b/comps/guardrails/pii_detection/requirements.txt @@ -2,6 +2,7 @@ beautifulsoup4 detect_secrets docarray[full] easyocr +einops fastapi gibberish-detector huggingface_hub @@ -21,6 +22,8 @@ pymupdf python-docx ray redis +scikit-learn sentence_transformers shortuuid +uvicorn virtualenv diff --git a/comps/guardrails/pii_detection/test.py b/comps/guardrails/pii_detection/test.py index 214c0d0b9..ab1229402 100644 --- a/comps/guardrails/pii_detection/test.py +++ b/comps/guardrails/pii_detection/test.py @@ -9,14 +9,13 @@ from utils import Timer -def test_html(ip_addr="localhost", batch_size=20): +def test_html(ip_addr="localhost", batch_size=20, strategy=None): import pandas as pd proxies = {"http": ""} url = f"http://{ip_addr}:6357/v1/piidetect" - urls = pd.read_csv("data/ai_rss.csv")["Permalink"] - urls = urls[:batch_size].to_list() - payload = {"link_list": json.dumps(urls)} + urls = ["https://opea.dev/"] * batch_size + payload = {"link_list": json.dumps(urls), "strategy": strategy} with Timer(f"send {len(urls)} link to pii detection endpoint"): try: @@ -28,33 +27,19 @@ def test_html(ip_addr="localhost", batch_size=20): print("An error occurred:", e) -def test_text(ip_addr="localhost", batch_size=20): +def test_text(ip_addr="localhost", batch_size=20, strategy=None): proxies = {"http": ""} url = f"http://{ip_addr}:6357/v1/piidetect" - if os.path.exists("data/ai_rss.csv"): - import pandas as pd - content = pd.read_csv("data/ai_rss.csv")["Description"] - content = content[:batch_size].to_list() - else: - content = ( - [ - """With new architectures, there comes a bit of a dilemma. After having spent billions of dollars training models with older architectures, companies rightfully wonder if it is worth spending billions more on a newer architecture that may itself be outmoded soon. -One possible solution to this dilemma is transfer learning. The idea here is to put noise into the trained model and then use the output given to then backpropagate on the new model. The idea here is that you don’t need to worry about generating huge amounts of novel data and potentially the number of epochs you have to train for is also significantly reduced. This idea has not been perfected yet, so it remains to be seen the role it will play in the future. -Nevertheless, as businesses become more invested in these architectures the potential for newer architectures that improve cost will only increase. Time will tell how quickly the industry moves to adopt them. -For those who are building apps that allow for a seamless transition between models, you can look at the major strives made in throughput and latency by YOCO and have hope that the major bottlenecks your app is having may soon be resolved. -It’s an exciting time to be building. -With special thanks to Christopher Taylor for his feedback on this blog post. -[1] Sun, Y., et al. “You Only Cache Once: Decoder-Decoder Architectures for Language Models” (2024), arXiv -[2] Sun, Y., et al. “Retentive Network: A Successor to Transformer for Large Language Models” (2023), arXiv -[3] Wikimedia Foundation, et al. “Hadamard product (matrices)” (2024), Wikipedia -[4] Sanderson, G. et al., “Attention in transformers, visually explained | Chapter 6, Deep Learning” (2024), YouTube -[5] A. Vaswani, et al., “Attention Is All You Need” (2017), arXiv -Understanding You Only Cache Once was originally published in Towards Data Science on Medium, where people are continuing the conversation by highlighting and responding to this story.""" - ] - * batch_size - ) - payload = {"text_list": json.dumps(content)} + content = [ + "Q1 revenue was $1.23 billion, up 12% year over year. ", + "We are excited to announce the opening of our new office in Miami! ", + "Mary Smith, 123-456-7890,", + "John is a good team leader", + "meeting minutes: sync up with sales team on the new product launch", + ] + + payload = {"text_list": json.dumps(content), "strategy": strategy} with Timer(f"send {len(content)} text to pii detection endpoint"): try: @@ -90,13 +75,17 @@ def test_pdf(ip_addr="localhost", batch_size=20): parser.add_argument("--test_text", action="store_true", help="Test Text pii detection") parser.add_argument("--batch_size", type=int, default=20, help="Batch size for testing") parser.add_argument("--ip_addr", type=str, default="localhost", help="IP address of the server") + parser.add_argument("--strategy", type=str, default="ml", help="Strategy for pii detection") args = parser.parse_args() + + print(args) + if args.test_html: test_html(ip_addr=args.ip_addr, batch_size=args.batch_size) elif args.test_pdf: test_pdf(ip_addr=args.ip_addr, batch_size=args.batch_size) elif args.test_text: - test_text(ip_addr=args.ip_addr, batch_size=args.batch_size) + test_text(ip_addr=args.ip_addr, batch_size=args.batch_size, strategy=args.strategy) else: print("Please specify the test type") diff --git a/comps/guardrails/pii_detection/utils.py b/comps/guardrails/pii_detection/utils.py index 0766bec70..21f402c2a 100644 --- a/comps/guardrails/pii_detection/utils.py +++ b/comps/guardrails/pii_detection/utils.py @@ -74,7 +74,7 @@ def wrapper(*args, **kwargs): def generate_log_name(file_list): file_set = f"{sorted(file_list)}" # print(f"file_set: {file_set}") - md5_str = hashlib.md5(file_set.encode()).hexdigest() + md5_str = hashlib.md5(file_set.encode(), usedforsecurity=False).hexdigest() return f"status/status_{md5_str}.log" diff --git a/comps/knowledgegraphs/langchain/README.md b/comps/knowledgegraphs/langchain/README.md deleted file mode 100755 index e69de29bb..000000000 diff --git a/comps/knowledgegraphs/requirements.txt b/comps/knowledgegraphs/requirements.txt index a6318d715..ecb5228af 100755 --- a/comps/knowledgegraphs/requirements.txt +++ b/comps/knowledgegraphs/requirements.txt @@ -23,3 +23,4 @@ redis sentence-transformers shortuuid tiktoken +uvicorn diff --git a/comps/llms/README.md b/comps/llms/README.md index 15c7c366c..584f2ba12 100644 --- a/comps/llms/README.md +++ b/comps/llms/README.md @@ -32,7 +32,7 @@ docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/h ```bash export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" +docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data opea/vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" ``` ## 1.2.3 Start Ray Service diff --git a/comps/llms/faq-generation/tgi/requirements.txt b/comps/llms/faq-generation/tgi/requirements.txt index f770cf5d7..623a8f667 100644 --- a/comps/llms/faq-generation/tgi/requirements.txt +++ b/comps/llms/faq-generation/tgi/requirements.txt @@ -10,3 +10,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn diff --git a/comps/llms/requirements.txt b/comps/llms/requirements.txt index 1de1eaedc..888eefd31 100644 --- a/comps/llms/requirements.txt +++ b/comps/llms/requirements.txt @@ -6,3 +6,4 @@ opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk shortuuid +uvicorn diff --git a/comps/llms/summarization/tgi/README.md b/comps/llms/summarization/tgi/README.md index e69de29bb..9e5858b4b 100644 --- a/comps/llms/summarization/tgi/README.md +++ b/comps/llms/summarization/tgi/README.md @@ -0,0 +1,96 @@ +# Document Summary TGI Microservice + +In this microservice, we utilize LangChain to implement summarization strategies and facilitate LLM inference using Text Generation Inference on Intel Xeon and Gaudi2 processors. +[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. + +# 🚀1. Start Microservice with Python (Option 1) + +To start the LLM microservice, you need to install python packages first. + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start LLM Service + +```bash +export HF_TOKEN=${your_hf_api_token} +docker run -p 8008:80 -v ./data:/data --name llm-docsum-tgi --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} +``` + +## 1.3 Verify the TGI Service + +```bash +curl http://${your_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +## 1.4 Start LLM Service with Python Script + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:8008" +python llm.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. + +## 2.1 Setup Environment Variables + +In order to start TGI and LLM services, you need to setup the following environment variables first. + +```bash +export HF_TOKEN=${your_hf_api_token} +export TGI_LLM_ENDPOINT="http://${your_ip}:8008" +export LLM_MODEL_ID=${your_hf_llm_model} +``` + +## 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/Dockerfile . +``` + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +## 2.3 Run Docker with CLI (Option A) + +```bash +docker run -d --name="llm-docsum-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-docsum-tgi:latest +``` + +## 2.4 Run Docker with Docker Compose (Option B) + +```bash +docker compose -f docker_compose_llm.yaml up -d +``` + +# 🚀3. Consume LLM Service + +## 3.1 Check Service Status + +```bash +curl http://${your_ip}:9000/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' +``` + +## 3.2 Consume LLM Service + +```bash +curl http://${your_ip}:9000/v1/chat/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/llms/summarization/tgi/docker_compose_llm.yaml b/comps/llms/summarization/tgi/docker_compose_llm.yaml index 41ae5d076..2b517333e 100644 --- a/comps/llms/summarization/tgi/docker_compose_llm.yaml +++ b/comps/llms/summarization/tgi/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: tgi_service: - image: ghcr.io/huggingface/text-generation-inference:1.4 + image: ghcr.io/huggingface/text-generation-inference:2.1.0 container_name: tgi-service ports: - "8008:80" @@ -16,8 +16,8 @@ services: shm_size: 1g command: --model-id ${LLM_MODEL_ID} llm: - image: opea/gen-ai-comps:llm-tgi-server - container_name: llm-tgi-server + image: opea/llm-docsum-tgi:latest + container_name: llm-docsum-tgi-server ports: - "9000:9000" ipc: host @@ -27,7 +27,6 @@ services: https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} restart: unless-stopped networks: diff --git a/comps/llms/summarization/tgi/requirements.txt b/comps/llms/summarization/tgi/requirements.txt index a79486a55..c6c151f6e 100644 --- a/comps/llms/summarization/tgi/requirements.txt +++ b/comps/llms/summarization/tgi/requirements.txt @@ -13,3 +13,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn diff --git a/comps/llms/text-generation/native/requirements.txt b/comps/llms/text-generation/native/requirements.txt index 1b0acdfce..e8473a80c 100644 --- a/comps/llms/text-generation/native/requirements.txt +++ b/comps/llms/text-generation/native/requirements.txt @@ -7,3 +7,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn diff --git a/comps/llms/text-generation/ollama/llm.py b/comps/llms/text-generation/ollama/llm.py index 5374cfa69..aadb2e2fa 100644 --- a/comps/llms/text-generation/ollama/llm.py +++ b/comps/llms/text-generation/ollama/llm.py @@ -21,7 +21,7 @@ def llm_generate(input: LLMParamsDoc): ollama = Ollama( base_url=ollama_endpoint, - model=input.model, + model=input.model if input.model else model_name, num_predict=input.max_new_tokens, top_k=input.top_k, top_p=input.top_p, @@ -49,4 +49,5 @@ async def stream_generator(): if __name__ == "__main__": ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") + model_name = os.getenv("OLLAMA_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") opea_microservices["opea_service@llm_ollama"].start() diff --git a/comps/llms/text-generation/ollama/requirements.txt b/comps/llms/text-generation/ollama/requirements.txt index 92c652351..e224aaaa5 100644 --- a/comps/llms/text-generation/ollama/requirements.txt +++ b/comps/llms/text-generation/ollama/requirements.txt @@ -9,3 +9,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn diff --git a/comps/llms/text-generation/ray_serve/README.md b/comps/llms/text-generation/ray_serve/README.md deleted file mode 100644 index ce58f6347..000000000 --- a/comps/llms/text-generation/ray_serve/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# Ray-Serve Endpoint Service - -[Ray](https://docs.ray.io/en/latest/serve/index.html) is an LLM serving solution that makes it easy to deploy and manage a variety of open source LLMs, built on [Ray Serve](https://docs.ray.io/en/latest/serve/index.html), has native support for autoscaling and multi-node deployments, which is easy to use for LLM inference serving on Intel Gaudi2 accelerators. The Intel Gaudi2 accelerator supports both training and inference for deep learning models in particular for LLMs. Please visit [Habana AI products](<(https://habana.ai/products)>) for more details. - -## set up environment variables - -```bash -export HUGGINGFACEHUB_API_TOKEN= -export RAY_Serve_ENDPOINT="http://${your_ip}:8008" -export LLM_MODEL="meta-llama/Llama-2-7b-chat-hf" -``` - -For gated models such as `LLAMA-2`, you will have to pass the environment HUGGINGFACEHUB_API_TOKEN. Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. - -## Set up Ray Serve Service - -### Build docker - -```bash -bash build_docker_rayserve.sh -``` - -### Launch Ray Serve service - -```bash -bash launch_ray_service.sh -``` - -The `launch_vllm_service.sh` script accepts five parameters: - -- port_number: The port number assigned to the Ray Gaudi endpoint, with the default being 8008. -- model_name: The model name utilized for LLM, with the default set to meta-llama/Llama-2-7b-chat-hf. -- chat_processor: The chat processor for handling the prompts, with the default set to 'ChatModelNoFormat', and the optional selection can be 'ChatModelLlama', 'ChatModelGptJ" and "ChatModelGemma'. -- num_cpus_per_worker: The number of CPUs specifies the number of CPUs per worker process. -- num_hpus_per_worker: The number of HPUs specifies the number of HPUs per worker process. - -If you want to customize the port or model_name, can run: - -```bash -bash ./launch_ray_service.sh ${port_number} ${model_name} ${chat_processor} ${num_cpus_per_worker} ${num_hpus_per_worker} -``` - -### Query the service - -And then you can make requests with the OpenAI-compatible APIs like below to check the service status: - -```bash -curl http://${your_ip}:8008/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{"model": "Llama-2-7b-chat-hf", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }' -``` - -For more information about the OpenAI APIs, you can checkeck the [OpenAI official document](https://platform.openai.com/docs/api-reference/). - -## Set up OPEA microservice - -Then we warp the Ray Serve service into OPEA microcervice. - -### Build docker - -```bash -bash build_docker_microservice.sh -``` - -### Launch the microservice - -```bash -bash launch_microservice.sh -``` - -### Query the microservice - -```bash -curl http://${your_ip}:9000/v1/chat/completions \ - -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ - -H 'Content-Type: application/json' -``` diff --git a/comps/llms/text-generation/ray_serve/__init__.py b/comps/llms/text-generation/ray_serve/__init__.py deleted file mode 100644 index 916f3a44b..000000000 --- a/comps/llms/text-generation/ray_serve/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/__init__.py b/comps/llms/text-generation/ray_serve/api_openai_backend/__init__.py deleted file mode 100644 index 916f3a44b..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/openai_protocol.py b/comps/llms/text-generation/ray_serve/api_openai_backend/openai_protocol.py deleted file mode 100644 index 7135f2be3..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/openai_protocol.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import time -import uuid -from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union - -import yaml -from pydantic import BaseModel, Field, root_validator - -TModel = TypeVar("TModel", bound="ModelList") -TCompletion = TypeVar("TCompletion", bound="CompletionResponse") -TChatCompletion = TypeVar("TChatCompletion", bound="ChatCompletionResponse") -ModelT = TypeVar("ModelT", bound=BaseModel) - - -class ErrorResponse(BaseModel): - object: str = "error" - message: str - internal_message: str - type: str - param: Dict[str, Any] = {} - code: int - - -class ModelCard(BaseModel): - id: str - object: str = "model" - created: int = Field(default_factory=lambda: int(time.time())) - owned_by: str = "llmonray" - root: Optional[str] = None - parent: Optional[str] = None - - -class ModelList(BaseModel): - object: str = "list" - data: List[ModelCard] = [] - - -class UsageInfo(BaseModel): - prompt_tokens: int - total_tokens: int - completion_tokens: Optional[int] = 0 - - @classmethod - def from_response(cls, response: Union["ModelResponse", Dict[str, Any]]) -> "UsageInfo": - if isinstance(response, BaseModel): - response_dict = response.dict() - else: - response_dict = response - return cls( - prompt_tokens=response_dict["num_input_tokens"] or 0, - completion_tokens=response_dict["num_generated_tokens"] or 0, - total_tokens=(response_dict["num_input_tokens"] or 0) + (response_dict["num_generated_tokens"] or 0), - ) - - -class CompletionResponseChoice(BaseModel): - index: int - text: str - logprobs: Optional[int] = None - finish_reason: Optional[str] - - -class CompletionResponse(BaseModel): - id: str = Field(default_factory=lambda: f"cmpl-{str(uuid.uuid4().hex)}") - object: str = "text_completion" - created: int = Field(default_factory=lambda: int(time.time())) - model: str - choices: List[CompletionResponseChoice] - usage: Optional[UsageInfo] - - -class FunctionCall(BaseModel): - name: str - arguments: Optional[str] = None - - -class ToolCall(BaseModel): - function: FunctionCall - type: Literal["function"] - id: str - - def __str__(self): - return str(self.dict()) - - -class ChatMessage(BaseModel): - role: Literal["system", "assistant", "user", "tool"] - content: Optional[Union[str, list]] = None - tool_calls: Optional[List[ToolCall]] = None - tool_call_id: Optional[str] = None - - def __str__(self): - # if tool_calls is not None, then we are passing a tool message - # using get attr instead of just in case the attribute is deleted off of - # the object - if getattr(self, "tool_calls", None): - return str(self.content) - return str(self.dict()) - - -class ChatCompletionResponseChoice(BaseModel): - index: int - message: ChatMessage - finish_reason: Optional[str] - - -class Function(BaseModel): - name: str - description: Optional[str] = None - parameters: Optional[Dict[str, Any]] = None - - -class ToolChoice(BaseModel): - type: Literal["function"] - function: Function - - -class Tool(BaseModel): - type: Literal["function"] - function: Function - - -class DeltaRole(BaseModel): - role: Literal["system", "assistant", "user"] - - def __str__(self): - return self.role - - -class DeltaEOS(BaseModel): - class Config: - extra = "forbid" - - -class DeltaContent(BaseModel): - content: str - tool_calls: Optional[List[ToolCall]] = None - - def __str__(self): - if self.tool_calls: - return str(self.tool_calls) - else: - return str(self.dict()) - - -class DeltaChoices(BaseModel): - delta: Union[DeltaRole, DeltaContent, DeltaEOS] - index: int - finish_reason: Optional[str] - - -class ChatCompletionResponse(BaseModel): - id: str = Field(default_factory=lambda: f"chatcmpl-{str(uuid.uuid4().hex)}") - object: str - created: int = Field(default_factory=lambda: int(time.time())) - model: str - choices: List[Union[ChatCompletionResponseChoice, DeltaChoices]] - usage: Optional[UsageInfo] - - -class Prompt(BaseModel): - prompt: Union[str, List[ChatMessage]] - use_prompt_format: bool = True - parameters: Optional[Union[Dict[str, Any], BaseModel]] = None - tools: Optional[List[Tool]] = None - tool_choice: Union[Literal["auto", "none"], ToolChoice] = "auto" - - -class BaseModelExtended(BaseModel): - @classmethod - def parse_yaml(cls: Type[ModelT], file, **kwargs) -> ModelT: - kwargs.setdefault("Loader", yaml.SafeLoader) - dict_args = yaml.load(file, **kwargs) - return cls.parse_obj(dict_args) - - def yaml( - self, - *, - stream=None, - include=None, - exclude=None, - by_alias: bool = False, - skip_defaults: Union[bool, None] = None, - exclude_unset: bool = False, - exclude_defaults: bool = False, - exclude_none: bool = False, - **kwargs, - ): - """Generate a YAML representation of the model, `include` and `exclude` - arguments as per `dict()`.""" - return yaml.dump( - self.dict( - include=include, - exclude=exclude, - by_alias=by_alias, - skip_defaults=skip_defaults, - exclude_unset=exclude_unset, - exclude_defaults=exclude_defaults, - exclude_none=exclude_none, - ), - stream=stream, - **kwargs, - ) - - -class ComputedPropertyMixin: - """Include properties in the dict and json representations of the model.""" - - # Replace with pydantic.computed_field once it's available - @classmethod - def get_properties(cls): - return [prop for prop in dir(cls) if isinstance(getattr(cls, prop), property)] - - def dict(self, *args, **kwargs): - self.__dict__.update({prop: getattr(self, prop) for prop in self.get_properties()}) - return super().dict(*args, **kwargs) # type: ignore - - def json( - self, - *args, - **kwargs, - ) -> str: - self.__dict__.update({prop: getattr(self, prop) for prop in self.get_properties()}) - - return super().json(*args, **kwargs) # type: ignore - - -class ModelResponse(ComputedPropertyMixin, BaseModelExtended): - generated_text: Optional[str] = None - tool_calls: Optional[List[ToolCall]] = None - num_input_tokens: Optional[int] = None - num_input_tokens_batch: Optional[int] = None - num_generated_tokens: Optional[int] = None - num_generated_tokens_batch: Optional[int] = None - preprocessing_time: Optional[float] = None - generation_time: Optional[float] = None - timestamp: Optional[float] = Field(default_factory=time.time) - finish_reason: Optional[str] = None - error: Optional[ErrorResponse] = None - - @root_validator(skip_on_failure=True) - def text_or_error_or_finish_reason(cls, values): - if values.get("generated_text") is None and values.get("error") is None and values.get("finish_reason") is None: - raise ValueError("Either 'generated_text' or 'error' or 'finish_reason' must be set") - return values - - @classmethod - def merge_stream(cls, *responses: "ModelResponse") -> "ModelResponse": - """Merge a stream of responses into a single response. - - The generated text is concatenated. Fields are maxed, except for - num_generated_tokens and generation_time, which are summed. - """ - if len(responses) == 1: - return responses[0] - - generated_text = "".join([response.generated_text or "" for response in responses]) - num_input_tokens = [ - response.num_input_tokens for response in responses if response.num_input_tokens is not None - ] - max_num_input_tokens = max(num_input_tokens) if num_input_tokens else None - num_input_tokens_batch = [ - response.num_input_tokens_batch for response in responses if response.num_input_tokens_batch is not None - ] - max_num_input_tokens_batch = max(num_input_tokens_batch) if num_input_tokens_batch else None - num_generated_tokens = [ - response.num_generated_tokens for response in responses if response.num_generated_tokens is not None - ] - total_generated_tokens = sum(num_generated_tokens) if num_generated_tokens else None - num_generated_tokens_batch = [ - response.num_generated_tokens_batch - for response in responses - if response.num_generated_tokens_batch is not None - ] - total_generated_tokens_batch = sum(num_generated_tokens_batch) if num_generated_tokens_batch else None - preprocessing_time = [ - response.preprocessing_time for response in responses if response.preprocessing_time is not None - ] - max_preprocessing_time = max(preprocessing_time) if preprocessing_time else None - generation_time = [response.generation_time for response in responses if response.generation_time is not None] - total_generation_time = sum(generation_time) if generation_time else None - error = next((response.error for response in reversed(responses) if response.error), None) - - return cls( - generated_text=generated_text, - num_input_tokens=max_num_input_tokens, - num_input_tokens_batch=max_num_input_tokens_batch, - num_generated_tokens=total_generated_tokens, - num_generated_tokens_batch=total_generated_tokens_batch, - preprocessing_time=max_preprocessing_time, - generation_time=total_generation_time, - timestamp=responses[-1].timestamp, - finish_reason=responses[-1].finish_reason, - error=error, - ) - - @property - def total_time(self) -> Optional[float]: - if self.generation_time is None and self.preprocessing_time is None: - return None - return (self.preprocessing_time or 0) + (self.generation_time or 0) - - @property - def num_total_tokens(self) -> Optional[float]: - try: - return (self.num_input_tokens or 0) + (self.num_generated_tokens or 0) - except Exception: - return None - - @property - def num_total_tokens_batch(self) -> Optional[float]: - try: - return (self.num_input_tokens_batch or 0) + (self.num_generated_tokens_batch or 0) - except Exception: - return None - - def unpack(self) -> Tuple["ModelResponse", ...]: - return (self,) - - -class CompletionRequest(BaseModel): - model: str - prompt: str - suffix: Optional[str] = None - temperature: Optional[float] = None - top_p: Optional[float] = None - n: int = 1 - max_tokens: Optional[int] = 16 - stop: Optional[List[str]] = None - stream: bool = False - echo: Optional[bool] = False - presence_penalty: Optional[float] = None - frequency_penalty: Optional[float] = None - logprobs: Optional[int] = None - logit_bias: Optional[Dict[str, float]] = None - user: Optional[str] = None - - -class ChatCompletionRequest(BaseModel): - model: str - messages: List[ChatMessage] - temperature: Optional[float] = None - top_p: Optional[float] = None - n: int = 1 - max_tokens: Optional[int] = None - stop: Optional[List[str]] = None - stream: bool = False - presence_penalty: Optional[float] = None - frequency_penalty: Optional[float] = None - logprobs: Optional[int] = None - logit_bias: Optional[Dict[str, float]] = None - user: Optional[str] = None - tools: Optional[List[Tool]] = None - tool_choice: Union[Literal["auto", "none"], ToolChoice] = "auto" - ignore_eos: bool = False # used in vllm engine benchmark - - -class FinishReason(str, Enum): - LENGTH = "length" - STOP = "stop" - ERROR = "error" - CANCELLED = "cancelled" - TOOL_CALLS = "tool_calls" - - def __str__(self) -> str: - return self.value - - @classmethod - def from_vllm_finish_reason(cls, finish_reason: Optional[str]) -> Optional["FinishReason"]: - if finish_reason is None: - return None - if finish_reason == "stop": - return cls.STOP - if finish_reason == "length": - return cls.LENGTH - if finish_reason == "abort": - return cls.CANCELLED - return cls.STOP diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/query_client.py b/comps/llms/text-generation/ray_serve/api_openai_backend/query_client.py deleted file mode 100644 index 66b0c9d09..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/query_client.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from typing import Dict - -from fastapi import HTTPException -from ray_serve.api_openai_backend.openai_protocol import ModelCard, Prompt -from ray_serve.api_openai_backend.request_handler import handle_request - - -class RouterQueryClient: - def __init__(self, serve_deployments): - self.serve_deployments = serve_deployments - - async def query(self, model: str, prompt: Prompt, request_id: str, streaming_reponse: bool): - if model in self.serve_deployments: - deploy_handle = self.serve_deployments[model] - else: - raise HTTPException(404, f"Could not find model with id {model}") - - request_config = prompt.parameters - temperature = request_config.get("temperature", 1.0) - top_p = request_config.get("top_p", 1.0) - max_new_tokens = request_config.get("max_tokens", None) - gen_config = {"max_new_tokens": max_new_tokens, "temperature": temperature, "top_p": top_p} - if temperature != 1.0 or top_p != 1.0: - gen_config.update({"do_sample": True}) - if request_config.get("ignore_eos", False): - gen_config.update({"ignore_eos": True}) - - async for x in handle_request( - model=model, - prompt=prompt, - request_id=request_id, - async_iterator=deploy_handle.options(stream=True) - .openai_call.options(stream=True, use_new_handle_api=True) - .remote( - prompt.prompt, - gen_config, - streaming_response=streaming_reponse, - tools=prompt.tools, - tool_choice=prompt.tool_choice, - ), - ): - yield x - - async def model(self, model_id: str) -> ModelCard: - """Get configurations for a supported model.""" - return ModelCard( - id=model_id, - root=model_id, - ) - - async def models(self) -> Dict[str, ModelCard]: - """Get configurations for supported models.""" - metadatas = {} - for model_id in self.serve_deployments: - metadatas[model_id] = await self.model(model_id) - return metadatas diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/request_handler.py b/comps/llms/text-generation/ray_serve/api_openai_backend/request_handler.py deleted file mode 100644 index 60b4aba9a..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/request_handler.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import asyncio -import traceback -from typing import AsyncIterator, List - -from fastapi import HTTPException, Request, status -from pydantic import ValidationError as PydanticValidationError -from ray_serve.api_openai_backend.openai_protocol import ErrorResponse, FinishReason, ModelResponse, Prompt -from starlette.responses import JSONResponse - - -class OpenAIHTTPException(Exception): - def __init__( - self, - status_code: int, - message: str, - type: str = "Unknown", - ) -> None: - self.status_code = status_code - self.message = message - self.type = type - - -def openai_exception_handler(r: Request, exc: OpenAIHTTPException): - assert isinstance(exc, OpenAIHTTPException), f"Unable to handle invalid exception {type(exc)}" - if exc.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR: - message = "Internal Server Error" - internal_message = message - exc_type = "InternalServerError" - else: - internal_message = extract_message_from_exception(exc) - message = exc.message - exc_type = exc.type - err_response = ModelResponse( - error=ErrorResponse( - message=message, - code=exc.status_code, - internal_message=internal_message, - type=exc_type, - ) - ) - return JSONResponse(content=err_response.dict(), status_code=exc.status_code) - - -def extract_message_from_exception(e: Exception) -> str: - # If the exception is a Ray exception, we need to dig through the text to get just - # the exception message without the stack trace - # This also works for normal exceptions (we will just return everything from - # format_exception_only in that case) - message_lines = traceback.format_exception_only(type(e), e)[-1].strip().split("\n") - message = "" - # The stack trace lines will be prefixed with spaces, so we need to start - # from the bottom and stop at the last line before a line with a space - found_last_line_before_stack_trace = False - for line in reversed(message_lines): - if not line.startswith(" "): - found_last_line_before_stack_trace = True - if found_last_line_before_stack_trace and line.startswith(" "): - break - message = line + "\n" + message - message = message.strip() - return message - - -async def handle_request( - model: str, - request_id: str, - prompt: Prompt, - async_iterator: AsyncIterator[ModelResponse], -): - # Handle errors for an ModelResopnse stream. - model_tags = {"model_id": model} - print("handle_request: ", model_tags) - - responses: List[ModelResponse] = [] - try: - async for response in async_iterator: - responses.append(response) - yield response - except asyncio.CancelledError as e: - # The request is cancelled. Try to return a last Model response, then raise - # We raise here because we don't want to interrupt the cancellation - yield _get_response_for_error(e, request_id=request_id) - raise - except Exception as e: - # Something went wrong. - yield _get_response_for_error(e, request_id=request_id) - # DO NOT RAISE. - # We do not raise here because that would cause a disconnection for streaming. - - -def _get_response_for_error(e, request_id: str): - """Convert an exception to an ModelResponse object.""" - status_code = status.HTTP_500_INTERNAL_SERVER_ERROR - if isinstance(e, HTTPException): - status_code = e.status_code - elif isinstance(e, OpenAIHTTPException): - status_code = e.status_code - elif isinstance(e, PydanticValidationError): - status_code = 400 - else: - # Try to get the status code attribute - status_code = getattr(e, "status_code", status_code) - - if status_code == status.HTTP_500_INTERNAL_SERVER_ERROR: - message = "Internal Server Error" - exc_type = "InternalServerError" - else: - message = extract_message_from_exception(e) - exc_type = e.__class__.__name__ - - message += f" (Request ID: {request_id})" - - return ModelResponse( - error=ErrorResponse( - message=message, - code=status_code, - internal_message=message, - type=exc_type, - ), - finish_reason=FinishReason.ERROR, - ) diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/router_app.py b/comps/llms/text-generation/ray_serve/api_openai_backend/router_app.py deleted file mode 100644 index 8e6e946ab..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/router_app.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import uuid -from typing import AsyncGenerator, List - -import async_timeout -from fastapi import FastAPI -from fastapi import Response as FastAPIResponse -from fastapi import status -from fastapi.middleware.cors import CORSMiddleware -from ray_serve.api_openai_backend.openai_protocol import ( - ChatCompletionRequest, - ChatCompletionResponse, - ChatCompletionResponseChoice, - ChatMessage, - CompletionRequest, - CompletionResponse, - CompletionResponseChoice, - DeltaChoices, - DeltaContent, - DeltaEOS, - DeltaRole, - ModelCard, - ModelList, - ModelResponse, - Prompt, - UsageInfo, -) -from ray_serve.api_openai_backend.query_client import RouterQueryClient -from ray_serve.api_openai_backend.request_handler import OpenAIHTTPException, openai_exception_handler -from starlette.responses import Response, StreamingResponse - -# timeout in 10 minutes. Streaming can take longer than 3 min -TIMEOUT = float(os.environ.get("ROUTER_HTTP_TIMEOUT", 1800)) - - -def init() -> FastAPI: - router_app = FastAPI() - router_app.add_exception_handler(OpenAIHTTPException, openai_exception_handler) - router_app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) - - return router_app - - -router_app = init() - - -async def _completions_wrapper( - completion_id: str, - body: CompletionRequest, - response: Response, - generator: AsyncGenerator[ModelResponse, None], -) -> AsyncGenerator[str, None]: - had_error = False - async with async_timeout.timeout(TIMEOUT): - all_results = [] - async for results in generator: - for subresult in results.unpack(): - all_results.append(subresult) - subresult_dict = subresult.dict() - if subresult_dict.get("error"): - response.status_code = subresult_dict["error"]["code"] - # Drop finish reason as OpenAI doesn't expect it - # for errors in streaming - subresult_dict["finish_reason"] = None - all_results.pop() - had_error = True - yield "data: " + ModelResponse(**subresult_dict).json() + "\n\n" - # Return early in case of an error - break - choices = [ - CompletionResponseChoice( - index=0, - text=subresult_dict["generated_text"] or "", - finish_reason=subresult_dict["finish_reason"], - ) - ] - usage = None - yield "data: " + CompletionResponse( - id=completion_id, - object="text_completion", - model=body.model, - choices=choices, - usage=usage, - ).json() + "\n\n" - if had_error: - # Return early in case of an error - break - if not had_error: - usage = UsageInfo.from_response(ModelResponse.merge_stream(*all_results)) if all_results else None - yield "data: " + CompletionResponse( - id=completion_id, - object="text_completion", - model=body.model, - choices=choices, - usage=usage, - ).json() + "\n\n" - yield "data: [DONE]\n\n" - - -async def _chat_completions_wrapper( - completion_id: str, - body: ChatCompletionRequest, - response: Response, - generator: AsyncGenerator[ModelResponse, None], -) -> AsyncGenerator[str, None]: - had_error = False - async with async_timeout.timeout(TIMEOUT): - finish_reason = None - choices: List[DeltaChoices] = [ - DeltaChoices( - delta=DeltaRole(role="assistant"), - index=0, - finish_reason=None, - ) - ] - chunk = ChatCompletionResponse( - id=completion_id, - object="chat.completion.chunk", - model=body.model, - choices=choices, - usage=None, - ) - data = chunk.json() - yield f"data: {data}\n\n" - - all_results = [] - async for results in generator: - for subresult in results.unpack(): - all_results.append(subresult) - subresult_dict = subresult.dict() - if subresult_dict.get("error"): - response.status_code = subresult_dict["error"]["code"] - # Drop finish reason as OpenAI doesn't expect it - # for errors in streaming - subresult_dict["finish_reason"] = None - all_results.pop() - had_error = True - yield "data: " + ModelResponse(**subresult_dict).json() + "\n\n" - # Return early in case of an error - break - else: - finish_reason = subresult_dict["finish_reason"] - choices = [ - DeltaChoices( - delta=DeltaContent( - content=subresult_dict["generated_text"] or "", - tool_calls=subresult_dict["tool_calls"] or None, - ), - index=0, - finish_reason=None, - ) - ] - chunk = ChatCompletionResponse( - id=completion_id, - object="chat.completion.chunk", - model=body.model, - choices=choices, - usage=None, - ) - # data = chunk.json(exclude_unset=True, ensure_ascii=False) - data = chunk.json() - yield f"data: {data}\n\n" - if had_error: - # Return early in case of an error - break - if not had_error: - choices = [ - DeltaChoices( - delta=DeltaEOS(), - index=0, - finish_reason=finish_reason, - ) - ] - usage = UsageInfo.from_response(ModelResponse.merge_stream(*all_results)) if all_results else None - chunk = ChatCompletionResponse( - id=completion_id, - object="chat.completion.result", - model=body.model, - choices=choices, - usage=usage, - ) - data = chunk.json() - yield f"data: {data}\n\n" - yield "data: [DONE]\n\n" - - -class Router: - def __init__( - self, - query_client: RouterQueryClient, - ) -> None: - self.query_client = query_client - - @router_app.get("/v1/models", response_model=ModelList) - async def models(self) -> ModelList: - """OpenAI API-compliant endpoint to get all models.""" - models = await self.query_client.models() - return ModelList(data=list(models.values())) - - # :path allows us to have slashes in the model name - @router_app.get("/v1/models/{model:path}", response_model=ModelCard) - async def model_data(self, model: str) -> ModelCard: - """OpenAI API-compliant endpoint to get one model. - - :param model: The model ID (e.g. "amazon/LightGPT") - """ - model = model.replace("--", "/") - model_data = await self.query_client.model(model) - if model_data is None: - raise OpenAIHTTPException( - message=f"Invalid model '{model}'", - status_code=status.HTTP_400_BAD_REQUEST, - type="InvalidModel", - ) - return model_data - - @router_app.post("/v1/completions") - async def completions( - self, - body: CompletionRequest, - response: FastAPIResponse, - ): - """Given a prompt, the model will return one or more predicted completions, - and can also return the probabilities of alternative tokens at each position. - - Returns: - A response object with completions. - """ - prompt = Prompt( - prompt=body.prompt, - parameters=dict(body), - use_prompt_format=False, - ) - request_id = f"cmpl-{str(uuid.uuid4().hex)}" - - if body.stream: - return StreamingResponse( - _completions_wrapper( - request_id, - body, - response, - self.query_client.query(body.model, prompt, request_id, body.stream), - ), - media_type="text/event-stream", - ) - else: - async with async_timeout.timeout(TIMEOUT): - results_reponse = self.query_client.query(body.model, prompt, request_id, body.stream) - async for results in results_reponse: - if results.error: - raise OpenAIHTTPException( - message=results.error.message, - status_code=results.error.code, - type=results.error.type, - ) - results = results.dict() - - choices = [ - CompletionResponseChoice( - index=0, - text=results["generated_text"] or "", - finish_reason=results["finish_reason"], - ) - ] - usage = UsageInfo.from_response(results) - - return CompletionResponse( - id=request_id, - object="text_completion", - model=body.model, - choices=choices, - usage=usage, - ) - - @router_app.post("/v1/chat/completions") - async def chat( - self, - body: ChatCompletionRequest, - response: FastAPIResponse, - ): - """Given a prompt, the model will return one or more predicted completions, - and can also return the probabilities of alternative tokens at each position. - - Returns: - A response object with completions. - """ - prompt = Prompt( - prompt=body.messages, - parameters=dict(body), - tools=body.tools, - tool_choice=body.tool_choice, - ) - request_id = f"chatcmpl-{str(uuid.uuid4().hex)}" - if body.stream: - return StreamingResponse( - _chat_completions_wrapper( - request_id, - body, - response, - self.query_client.query(body.model, prompt, request_id, body.stream), - ), - media_type="text/event-stream", - ) - else: - async with async_timeout.timeout(TIMEOUT): - results_reponse = self.query_client.query(body.model, prompt, request_id, body.stream) - async for results in results_reponse: - if results.error: - raise OpenAIHTTPException( - message=results.error.message, - status_code=results.error.code, - type=results.error.type, - ) - - if results.tool_calls is not None: - msg = ChatMessage(role="assistant", tool_calls=results.tool_calls) - # deleting this fields so that they don't appear in the response - del msg.tool_call_id - else: - msg = ChatMessage(role="assistant", content=results.generated_text or "") - - usage = UsageInfo.from_response(results.dict()) - return ChatCompletionResponse( - id=request_id, - object="chat.completion", - model=body.model, - choices=[ - ChatCompletionResponseChoice( - index=0, - message=msg, - finish_reason=results.finish_reason, - ) - ], - usage=usage, - ) - - @router_app.get("/v1/health_check") - async def health_check(self) -> bool: - """Check if the routher is still running.""" - return True diff --git a/comps/llms/text-generation/ray_serve/api_openai_backend/tools.py b/comps/llms/text-generation/ray_serve/api_openai_backend/tools.py deleted file mode 100644 index e815eb146..000000000 --- a/comps/llms/text-generation/ray_serve/api_openai_backend/tools.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import re -from enum import Enum -from typing import List, Union - -import jinja2 -from ray_serve.api_openai_backend.openai_protocol import ChatMessage, FunctionCall, ToolCall - - -class ToolsCallsTemplateContext(Enum): - """This is used within the template to generate depending on the context.""" - - CALL_TOKEN = 1 - FUNCTIONS_LIST = 2 - FORCE_CALL = 3 - CALLS_NOTIF = 4 - TOOL_RESPONSE = 5 - - -class ToolsCallsTemplate: - def __init__(self, template_path=None): - self.trim_blocks = True - self.lstrip_blocks = True - if template_path is None: - template_path = os.path.dirname(__file__) + "/templates/tools_functions.jinja" - self.environment = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.dirname(template_path))) - self.template = self.environment.get_template(os.path.basename(template_path)) - self.template.globals["FUNCTIONS_LIST"] = ToolsCallsTemplateContext.FUNCTIONS_LIST - self.template.globals["FORCE_CALL"] = ToolsCallsTemplateContext.FORCE_CALL - self.template.globals["CALL_TOKEN"] = ToolsCallsTemplateContext.CALL_TOKEN - self.template.globals["CALLS_NOTIF"] = ToolsCallsTemplateContext.CALLS_NOTIF - self.template.globals["TOOL_RESPONSE"] = ToolsCallsTemplateContext.TOOL_RESPONSE - - def get_func_call_token(self) -> str: - """Return the special token used to find functions calls.""" - return self.template.render(CONTEXT=ToolsCallsTemplateContext.CALL_TOKEN) - - def render_toolcalls(self, tool_calls: List[ToolCall]): - return self.template.render(CONTEXT=ToolsCallsTemplateContext.CALLS_NOTIF, tool_calls=tool_calls) - - def render_toolmessage(self, message: ChatMessage): - return self.template.render(CONTEXT=ToolsCallsTemplateContext.TOOL_RESPONSE, message=message) - - def render_toolslist(self, tool_choice: Union[str, None], tools_list) -> str: - if isinstance(tool_choice, str) and tool_choice == "auto": - tool_choice = None - if tool_choice is not None: - for tool in tools_list: - # Search if the tool_choice is in the tools_list - if tool.type == "function" and tool.function.name == tool_choice: - return self.template.render(CONTEXT=ToolsCallsTemplateContext.FORCE_CALL, tool=tool) - return "" - else: - return self.template.render(CONTEXT=ToolsCallsTemplateContext.FUNCTIONS_LIST, tools_list=tools_list) - - -class OpenAIToolsPrompter: - """ - https://platform.openai.com/docs/assistants/tools - """ - - def __init__(self, template_path=None): - self.template = ToolsCallsTemplate(template_path) - self.call_token_str = self.template.get_func_call_token() - if self.call_token_str is None: - raise ValueError("There is something wrong with the tools template.") - else: - self.call_token_pre = self.call_token_str[0] - - def func_call_token_pre(self) -> str: - return self.call_token_pre - - def func_call_token_size(self) -> int: - return len(self.call_token_str) - - def func_call_token(self) -> str: - return self.call_token_str - - def content_from_assistant(self, message: ChatMessage) -> str: - text = self.template.render_toolcalls(message.tool_calls) - if message.content is None: - return text - else: - return message.content + "\n" + text - - def content_from_tool(self, message: ChatMessage) -> str: - return self.template.render_toolmessage(message) - - def inject_prompt(self, request, tools, tool_choice): - """Generate and inject the prompt for tools calls.""" - if tools is not None and self.call_token_str is not None and len(tools): - select_tool_choice = tool_choice if (tool_choice is not None and tool_choice != "auto") else None - text_inject = self.template.render_toolslist(tool_choice=select_tool_choice, tools_list=tools) - if request[-1].role == "user": - request[-1].content = text_inject + "\n The following is User Question: \n" + request[-1].content - return request - - -class ChatPromptCapture: - def __init__(self): - self.content: str = "" - self.func_call_content: str = "" - self.func_start_pos: int = -1 - self.print_end_pos: int = 0 - self.calls_list = [] - self.call_indx = 0 - - def reset(self): - self.content: str = "" - self.func_call_content: str = "" - self.func_start_pos: int = -1 - self.print_end_pos: int = 0 - self.calls_list = [] - - def make_calls_list(self, call_id: int, func_call_content): - if func_call_content is None: - return - try: - call_dict = json.loads(func_call_content) - call_dict["arguments"] = json.dumps(call_dict["arguments"]) - self.calls_list.append(ToolCall(id=f"call_{call_id}", type="function", function=FunctionCall(**call_dict))) - except Exception: - pass - - def process_full_output(self, output: str, openai_tools_prompter: OpenAIToolsPrompter, original_prompts): - ret_output = "" - # FIXME: for some model, prompt will be append along with answer, need to remove - start_pos = sum([len(prompt) for prompt in original_prompts]) - 6 - - if openai_tools_prompter.func_call_token() in output[start_pos:]: # we found func_call - if is_func := re.findall("(\{(.*)\})", output[start_pos:]): - for idx, found in enumerate(is_func): - func_call_content = found[0] - c1 = func_call_content.count("{") - c2 = func_call_content.count("}") - if c1 == c2: # We have the complete call block - func_call_content = found[0] - self.make_calls_list(idx, func_call_content) - else: - ret_output = output[start_pos:] - - return ret_output, self.calls_list - - def process_stream_output(self, output: str, openai_tools_prompter: OpenAIToolsPrompter): - ret_output = "" - self.content += output - - # scenario 1: not reach the length for identifying a func call. - if len(self.content) < openai_tools_prompter.func_call_token_size(): - # wait for possible function call - return ret_output, self.calls_list - - # scenario 2: reach the length for identifying if a func call. - if self.func_start_pos == -1: - if openai_tools_prompter.func_call_token() in self.content: # we found func_call - self.func_start_pos = self.content.index(openai_tools_prompter.func_call_token()) - return ret_output, self.calls_list - else: # unhold self.content - print_start_pos = self.print_end_pos - self.print_end_pos = len(self.content) - ret_output = self.content[print_start_pos : self.print_end_pos] - return ret_output, self.calls_list - - # scenario 3: wait until we can extract the function call - calls_list = [] - if is_func := re.findall("(\{(.*)\})", self.content): - for idx, found in enumerate(is_func): - func_call_content = found[0] - c1 = func_call_content.count("{") - c2 = func_call_content.count("}") - if c1 == c2: # We have the complete call block - self.make_calls_list(self.call_indx, func_call_content) - calls_list = self.calls_list - self.call_indx += 1 - self.reset() - return ret_output, calls_list diff --git a/comps/llms/text-generation/ray_serve/api_server_openai.py b/comps/llms/text-generation/ray_serve/api_server_openai.py deleted file mode 100644 index 002b3a633..000000000 --- a/comps/llms/text-generation/ray_serve/api_server_openai.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import re -import sys -from typing import Any, Dict - -import ray -from easydict import EasyDict as edict -from ray import serve -from ray_serve.api_openai_backend.query_client import RouterQueryClient -from ray_serve.api_openai_backend.router_app import Router, router_app -from ray_serve.serve import LLMServe - - -def router_application(deployments, max_concurrent_queries): - """Create a Router Deployment. - - Router Deployment will point to a Serve Deployment for each specified base model, - and have a client to query each one. - """ - merged_client = RouterQueryClient(deployments) - - RouterDeployment = serve.deployment( - route_prefix="/", - max_concurrent_queries=max_concurrent_queries, # Maximum backlog for a single replica - )(serve.ingress(router_app)(Router)) - - return RouterDeployment.bind(merged_client) - - -def openai_serve_run(deployments, host, route_prefix, port, max_concurrent_queries): - router_app = router_application(deployments, max_concurrent_queries) - - serve.start(http_options={"host": host, "port": port}) - serve.run( - router_app, - name="router", - route_prefix=route_prefix, - ).options( - stream=True, - use_new_handle_api=True, - ) - deployment_address = f"http://{host}:{port}{route_prefix}" - print(f"Deployment is ready at `{deployment_address}`.") - return deployment_address - - -def get_deployment_actor_options(hpus_per_worker, ipex_enabled=False): - _ray_env_key = "env_vars" - # OMP_NUM_THREADS will be set by num_cpus, so not set in env - _predictor_runtime_env_ipex = { - "KMP_BLOCKTIME": "1", - "KMP_SETTINGS": "1", - "KMP_AFFINITY": "granularity=fine,compact,1,0", - "MALLOC_CONF": "oversize_threshold:1,background_thread:true,\ - metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000", - } - runtime_env: Dict[str, Any] = {_ray_env_key: {}} - if ipex_enabled: - runtime_env[_ray_env_key].update(_predictor_runtime_env_ipex) - ray_actor_options: Dict[str, Any] = {"runtime_env": runtime_env} - ray_actor_options["resources"] = {"HPU": hpus_per_worker} - - return ray_actor_options - - -def main(argv=None): - import argparse - - parser = argparse.ArgumentParser(description="Serve LLM models with Ray Serve.", add_help=True) - parser.add_argument("--port_number", default=8080, type=int, help="Port number to serve on.") - parser.add_argument( - "--model_id_or_path", default="meta-llama/Llama-2-7b-chat-hf", type=str, help="Model id or path." - ) - parser.add_argument( - "--chat_processor", default="ChatModelNoFormat", type=str, help="Chat processor for aligning the prompts." - ) - parser.add_argument("--max_num_seqs", default=256, type=int, help="Maximum number of sequences to generate.") - parser.add_argument("--max_batch_size", default=8, type=int, help="Maximum batch size.") - parser.add_argument("--num_replicas", default=1, type=int, help="Number of replicas to start.") - parser.add_argument("--num_cpus_per_worker", default=8, type=int, help="Number of CPUs per worker.") - parser.add_argument("--num_hpus_per_worker", default=1, type=int, help="Number of HPUs per worker.") - - if len(sys.argv) == 1: - parser.print_help(sys.stderr) - sys.exit(1) - - args = parser.parse_args(argv) - - ray.init(address="auto") - - host_port = os.environ.get("RAY_Serve_ENDPOINT", "http://0.0.0.0:8080") - host = re.search(r"([\d\.]+)", host_port).group(1) - port = args.port_number - model_name = args.model_id_or_path.split("/")[-1] if args.model_id_or_path else "" - route_prefix = "/" - - infer_conf = {} - infer_conf["use_auth_token"] = os.environ.get("HF_TOKEN", None) - infer_conf["trust_remote_code"] = os.environ.get("TRUST_REMOTE_CODE", None) - infer_conf["model_id_or_path"] = args.model_id_or_path - infer_conf["chat_processor"] = args.chat_processor - infer_conf["max_batch_size"] = args.max_batch_size - infer_conf["max_num_seqs"] = args.max_num_seqs - infer_conf["num_replicas"] = args.num_replicas - infer_conf["num_cpus_per_worker"] = args.num_cpus_per_worker - infer_conf["num_hpus_per_worker"] = args.num_hpus_per_worker - infer_conf["max_concurrent_queries"] = int(os.environ.get("MAX_CONCURRENT_QUERIES", 100)) - infer_conf = edict(infer_conf) - - print(f"infer_conf: {infer_conf}") - - deployment = {} - ray_actor_options = get_deployment_actor_options(infer_conf["num_hpus_per_worker"]) - deployment[model_name] = LLMServe.options( - num_replicas=infer_conf["num_replicas"], - ray_actor_options=ray_actor_options, - max_concurrent_queries=infer_conf["max_concurrent_queries"], - ).bind(infer_conf, infer_conf["max_num_seqs"], infer_conf["max_batch_size"]) - deployment = edict(deployment) - openai_serve_run(deployment, host, route_prefix, port, infer_conf["max_concurrent_queries"]) - # input("Service is deployed successfully.") - while 1: - pass - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/comps/llms/text-generation/ray_serve/build_docker_microservice.sh b/comps/llms/text-generation/ray_serve/build_docker_microservice.sh deleted file mode 100644 index f317f89d4..000000000 --- a/comps/llms/text-generation/ray_serve/build_docker_microservice.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd ../../../../ -docker build \ - -t opea/llm-ray:latest \ - --build-arg https_proxy=$https_proxy \ - --build-arg http_proxy=$http_proxy \ - -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice . diff --git a/comps/llms/text-generation/ray_serve/build_docker_rayserve.sh b/comps/llms/text-generation/ray_serve/build_docker_rayserve.sh deleted file mode 100755 index 4ea462722..000000000 --- a/comps/llms/text-generation/ray_serve/build_docker_rayserve.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd ../../../../ - -docker build \ - -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve \ - -t ray_serve:habana \ - --network=host \ - --build-arg http_proxy=${http_proxy} \ - --build-arg https_proxy=${https_proxy} \ - --build-arg no_proxy=${no_proxy} . diff --git a/comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice b/comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice deleted file mode 100644 index 39a50ac8a..000000000 --- a/comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -FROM langchain/langchain:latest - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev \ - vim - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps - -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/llms/text-generation/ray_serve/requirements.txt - -ENV PYTHONPATH=$PYTHONPATH:/home/user - -WORKDIR /home/user/comps/llms/text-generation/ray_serve - -ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve b/comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve deleted file mode 100644 index 220acc237..000000000 --- a/comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest -FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest - -ENV LANG=en_US.UTF-8 - -WORKDIR /root/ray_serve - -# copy the source code to the package directory -COPY comps/llms/text-generation/ray_serve/ /root/ray_serve - -RUN pip install -r /root/ray_serve/docker/requirements.txt && \ - pip install --upgrade-strategy eager optimum[habana] - -RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - service ssh restart - -ENV no_proxy=localhost,127.0.0.1 -ENV PYTHONPATH=$PYTHONPATH:/root:/root/ray_serve - -# Required by DeepSpeed -ENV RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES=1 - -ENV PT_HPU_LAZY_ACC_PAR_MODE=0 - -ENV PT_HPU_ENABLE_REFINE_DYNAMIC_SHAPES=0 - -ENV PT_HPU_ENABLE_WEIGHT_CPU_PERMUTE=0 - -ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true \ No newline at end of file diff --git a/comps/llms/text-generation/ray_serve/docker/requirements.txt b/comps/llms/text-generation/ray_serve/docker/requirements.txt deleted file mode 100644 index da4e88e13..000000000 --- a/comps/llms/text-generation/ray_serve/docker/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ - -async_timeout -easydict -numpy -py-cpuinfo -pydantic-yaml -ray>=2.10 -ray[serve,tune]>=2.10 -typer -typing>=3.7.4.3 diff --git a/comps/llms/text-generation/ray_serve/docker_compose_llm.yaml b/comps/llms/text-generation/ray_serve/docker_compose_llm.yaml deleted file mode 100644 index 0bfed637d..000000000 --- a/comps/llms/text-generation/ray_serve/docker_compose_llm.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: "3.8" - -services: - ray-service: - image: ray_serve:habana - container_name: ray-gaudi-server - ports: - - "8008:80" - volumes: - - "./data:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL: ${LLM_MODEL} - TRUST_REMOTE_CODE: True - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1" - llm: - image: opea/llm-ray:latest - container_name: llm-ray-gaudi-server - depends_on: - - ray-service - ports: - - "9000:9000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - RAY_Serve_ENDPOINT: ${RAY_Serve_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL: ${LLM_MODEL} - restart: unless-stopped - -networks: - default: - driver: bridge diff --git a/comps/llms/text-generation/ray_serve/entrypoint.sh b/comps/llms/text-generation/ray_serve/entrypoint.sh deleted file mode 100644 index d60eddd36..000000000 --- a/comps/llms/text-generation/ray_serve/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python llm.py diff --git a/comps/llms/text-generation/ray_serve/launch_microservice.sh b/comps/llms/text-generation/ray_serve/launch_microservice.sh deleted file mode 100644 index 5037fe62a..000000000 --- a/comps/llms/text-generation/ray_serve/launch_microservice.sh +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -docker run -d --rm \ - --name="llm-ray-server" \ - -p 9000:9000 \ - --ipc=host \ - -e http_proxy=$http_proxy \ - -e https_proxy=$https_proxy \ - -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - opea/llm-ray:latest diff --git a/comps/llms/text-generation/ray_serve/launch_ray_service.sh b/comps/llms/text-generation/ray_serve/launch_ray_service.sh deleted file mode 100755 index 7a98acf07..000000000 --- a/comps/llms/text-generation/ray_serve/launch_ray_service.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Set default values -default_port=8008 -default_model=${LLM_MODEL} -default_chat_processor="ChatModelLlama" -default_num_cpus_per_worker=8 -default_num_hpus_per_worker=1 - -# Assign arguments to variables -port_number=${1:-$default_port} -model_name=${2:-$default_model} -chat_processor=${3:-$default_chat_processor} -num_cpus_per_worker=${4:-$default_num_cpus_per_worker} -num_hpus_per_worker=${5:-$default_num_hpus_per_worker} - -# Check if all required arguments are provided -if [ "$#" -lt 0 ] || [ "$#" -gt 5 ]; then - echo "Usage: $0 [port_number] [model_name] [chat_processor] [num_cpus_per_worker] [num_hpus_per_worker]" - echo "Please customize the arguments you want to use. - - port_number: The port number assigned to the Ray Gaudi endpoint, with the default being 8080. - - model_name: The model name utilized for LLM, with the default set to meta-llama/Llama-2-7b-chat-hf. - - chat_processor: The chat processor for handling the prompts, with the default set to 'ChatModelNoFormat', and the optional selection can be 'ChatModelLlama', 'ChatModelGptJ" and "ChatModelGemma'. - - num_cpus_per_worker: The number of CPUs specifies the number of CPUs per worker process. - - num_hpus_per_worker: The number of HPUs specifies the number of HPUs per worker process." - exit 1 -fi - -# Build the Docker run command based on the number of cards -docker run -d --rm \ - --runtime=habana \ - --name="ray-service" \ - -v $PWD/data:/data \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - --cap-add=sys_nice \ - --ipc=host \ - -p $port_number:80 \ - -e HF_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e TRUST_REMOTE_CODE=True \ - ray_serve:habana \ - /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $model_name --chat_processor $chat_processor --num_cpus_per_worker $num_cpus_per_worker --num_hpus_per_worker $num_hpus_per_worker" diff --git a/comps/llms/text-generation/ray_serve/llm.py b/comps/llms/text-generation/ray_serve/llm.py deleted file mode 100644 index 5dad1fdd0..000000000 --- a/comps/llms/text-generation/ray_serve/llm.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from fastapi.responses import StreamingResponse -from langchain_openai import ChatOpenAI -from langsmith import traceable - -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - - -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - -@register_microservice( - name="opea_service@llm_ray", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=9000, -) -@traceable(run_type="llm") -def llm_generate(input: LLMParamsDoc): - llm_endpoint = os.getenv("RAY_Serve_ENDPOINT", "http://localhost:8080") - llm_model = os.getenv("LLM_MODEL", "Llama-2-7b-chat-hf") - if "/" in llm_model: - llm_model = llm_model.split("/")[-1] - llm = ChatOpenAI( - openai_api_base=llm_endpoint + "/v1", - model_name=llm_model, - openai_api_key=os.getenv("OPENAI_API_KEY", "not_needed"), - max_tokens=input.max_new_tokens, - temperature=input.temperature, - streaming=input.streaming, - request_timeout=600, - ) - - if input.streaming: - - async def stream_generator(): - chat_response = "" - async for text in llm.astream(input.query): - text = text.content - chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text - print(f"[llm - chat_stream] stream response: {chat_response}") - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = llm.invoke(input.query) - response = response.content - return GeneratedDoc(text=response, prompt=input.query) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_ray"].start() diff --git a/comps/llms/text-generation/ray_serve/requirements-runtime.txt b/comps/llms/text-generation/ray_serve/requirements-runtime.txt deleted file mode 100644 index 225adde27..000000000 --- a/comps/llms/text-generation/ray_serve/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/comps/llms/text-generation/ray_serve/requirements.txt b/comps/llms/text-generation/ray_serve/requirements.txt deleted file mode 100644 index 2f8b2ff4e..000000000 --- a/comps/llms/text-generation/ray_serve/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -docarray[full] -fastapi -huggingface_hub -langchain==0.1.16 -langchain_openai -langsmith -openai -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -ray[serve]>=2.10 -shortuuid -transformers diff --git a/comps/llms/text-generation/ray_serve/serve.py b/comps/llms/text-generation/ray_serve/serve.py deleted file mode 100644 index 5c33dde00..000000000 --- a/comps/llms/text-generation/ray_serve/serve.py +++ /dev/null @@ -1,569 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import asyncio -import functools -import os -import re -from enum import Enum -from queue import Empty -from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Tuple, Union - -import ray -import torch -from fastapi import HTTPException -from pydantic import BaseModel -from ray import serve -from ray_serve.api_openai_backend.openai_protocol import ChatMessage, ErrorResponse, ModelResponse -from ray_serve.api_openai_backend.tools import ChatPromptCapture, OpenAIToolsPrompter -from starlette.requests import Request -from starlette.responses import JSONResponse, StreamingResponse -from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer - -DEVICE_CPU = "cpu" -DEVICE_HPU = "hpu" - - -def load_tokenizer(model, tokenizer_name_or_path): - tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) - if not model.config.is_encoder_decoder: - tokenizer.padding_side = "left" - # Some models like GPT2 do not have a PAD token so we have to set it if necessary - if model.config.model_type == "llama": - # unwind broken decapoda-research config - model.generation_config.pad_token_id = 0 - model.generation_config.bos_token_id = 1 - model.generation_config.eos_token_id = 2 - tokenizer.bos_token_id = model.generation_config.bos_token_id - tokenizer.eos_token_id = model.generation_config.eos_token_id - tokenizer.pad_token_id = model.generation_config.pad_token_id - tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id) - tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) - tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) - - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - model.generation_config.pad_token_id = model.generation_config.eos_token_id - - return tokenizer - - -class PromptFormat(Enum): - CHAT_FORMAT = 1 - PROMPTS_FORMAT = 2 - INVALID_FORMAT = 3 - - -def get_prompt_format(input: Union[List[str], List[dict], List[ChatMessage]]): - chat_format = True - prompts_format = True - for item in input: - if isinstance(item, str) or isinstance(item, list): - chat_format = False - elif isinstance(item, dict) or isinstance(item, ChatMessage): - prompts_format = False - else: - chat_format = False - prompts_format = False - break - if chat_format: - return PromptFormat.CHAT_FORMAT - if prompts_format: - return PromptFormat.PROMPTS_FORMAT - return PromptFormat.INVALID_FORMAT - - -class ChatModel: - human_id = "" - bot_id = "" - unknown_id = "" - MEANINGLESS_WORDS = ["", "", "<|endoftext|>", "
"] - stop_words = [""] - - def __init__(self, intro, human_id, bot_id, stop_words) -> None: - self.intro = intro - self.human_id = human_id - self.bot_id = bot_id - self.stop_words = stop_words - self.MEANINGLESS_WORDS.extend(self.stop_words) - - def prepare_prompt(self, messages: list): - """Prepare prompt from history messages.""" - prompt = "" - for msg in messages: - role, content = msg.role, msg.content - if role == "user": - prompt += f"{self.human_id}: {content}\n" - elif role == "assistant": - prompt += f"{self.bot_id}: {content}\n" - else: - prompt += f"{self.unknown_id}: {content}\n" - prompt += f"{self.bot_id}:" - return prompt - - def convert_output(self, output: str): - """Convert the model output to final answer.""" - human_id = self.human_id.strip() - bot_id = self.bot_id.strip() - if human_id != "": - output = output.split(human_id)[0] - if bot_id != "": - output = output.split(bot_id)[0] - for word in self.MEANINGLESS_WORDS: - output = output.replace(word, "") - text = output - # remove partial human_id or bot id - if "\n" in text and ( - human_id.startswith(text[text.rfind("\n") + 1 :]) or bot_id.startswith(text[text.rfind("\n") + 1]) - ): - text = text[: text.rfind("\n")] - return text - - def get_prompt(self, messages): - """Generate response based on messages.""" - prompt = self.prepare_prompt(messages) - return prompt - - -class ChatModelGptJ(ChatModel): - def __init__(self, intro, human_id, bot_id, stop_words): - super().__init__(intro, human_id, bot_id, stop_words) - - def prepare_prompt(self, messages: list): - """Prepare prompt from history messages.""" - prompt = self.intro - for msg in messages: - msg = dict(msg) - role, content = msg["role"], msg["content"] - if role == "user": - if self.human_id != "": - prompt += f"{self.human_id}:\n{content}\n" - else: - prompt += f"{content}\n" - elif role == "assistant": - if self.bot_id != "": - prompt += f"{self.bot_id}:\n{content}\n" - else: - prompt += f"{content}\n" - else: - prompt += f"### Unknown:\n{content}\n" - if self.bot_id != "": - prompt += f"{self.bot_id}:\n" - return prompt - - -class ChatModelLLama(ChatModel): - def __init__(self, intro="", human_id="[INST] {msg} [/INST]", bot_id="", stop_words=[]): - super().__init__(intro, human_id, bot_id, stop_words) - - def prepare_prompt(self, messages: list): - """Prepare prompt from history messages.""" - prompt = self.intro - for msg in messages: - msg = dict(msg) - role, content = msg["role"], msg["content"] - if role == "user": - if self.human_id != "": - prompt += self.human_id.format(msg=content) - else: - prompt += f"{content}\n" - elif role == "assistant": - prompt += f"{content}\n" - elif role == "tool": - prompt += f"{content}\n" - elif role == "system": - prompt += f"### system:\n{content}\n" - else: - prompt += f"### Unknown:\n{content}\n" - if self.bot_id != "": - prompt += f"{self.bot_id}:\n" - return prompt - - -class ChatModelGemma(ChatModel): - def __init__(self, intro, human_id, bot_id, stop_words): - super().__init__(intro, human_id, bot_id, stop_words) - - def prepare_prompt(self, messages: list): - """Prepare prompt from history messages.""" - prompt = self.intro - for msg in messages: - msg = dict(msg) - role, content = msg["role"], msg["content"] - if role == "user": - if self.human_id != "": - prompt += f"{self.human_id} {content}\n" - else: - prompt += f"{content}\n" - elif role == "assistant": - if self.bot_id != "": - prompt += f"{self.bot_id} {content}\n" - else: - prompt += f"{content}\n" - else: - prompt += f"### Unknown:\n{content}\n" - if self.bot_id != "": - prompt += f"{self.bot_id}:\n" - return prompt - - -class ChatModelNoFormat(ChatModel): - def __init__(self, intro, human_id, bot_id, stop_words): - super().__init__(intro, human_id, bot_id, stop_words) - - def prepare_prompt(self, messages: list): - """Prepare prompt from history messages.""" - prompt = "" - for msg in messages: - msg = dict(msg) - prompt += msg["content"] - return prompt - - -class GenerateResult(BaseModel): - text: str = "" - input_length: int = None - generate_length: int = None - - -class Predictor: - def __init__(self, infer_conf: dict) -> None: - model_id_or_path = infer_conf["model_id_or_path"] - use_auth_token = infer_conf["use_auth_token"] - trust_remote_code = infer_conf["trust_remote_code"] - - device = os.environ.get("DEVICE", "hpu") - - self.tokenizer = AutoTokenizer.from_pretrained( - model_id_or_path, use_auth_token=use_auth_token, trust_remote_code=trust_remote_code - ) - self.device = torch.device(device) - # now deepspeed predictor don't have the model - # so configure_tokenizer cannot be called - # this should be solved in the next pr - # where it is also a worker - # This can be removed then - if self.tokenizer.pad_token_id is None: - self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - - self.input_length = None - - def tokenize_inputs(self, text): - input_tokens = self.tokenizer(text, return_tensors="pt", padding=True) - input_ids = input_tokens.input_ids - self.input_length = input_ids.size()[1] - input_ids = input_ids.to(device=self.device) - return input_ids, self.input_length - - def configure_tokenizer(self, model_name): - model = self.model - tokenizer = self.tokenizer - if re.search("llama", model.config.architectures[0], re.IGNORECASE): - # unwind broken decapoda-research config - model.generation_config.pad_token_id = 0 - model.generation_config.bos_token_id = 1 - model.generation_config.eos_token_id = 2 - - if ( - hasattr(model.generation_config, "pad_token_id") - and model.generation_config.pad_token_id is not None - and "chatglm" not in model_name - ): - tokenizer.pad_token_id = model.generation_config.pad_token_id - if ( - hasattr(model.generation_config, "eos_token_id") - and model.generation_config.eos_token_id is not None - and "chatglm" not in model_name - ): - tokenizer.eos_token_id = model.generation_config.eos_token_id - if hasattr(model.generation_config, "bos_token_id") and model.generation_config.bos_token_id is not None: - tokenizer.bos_token_id = model.generation_config.bos_token_id - - if tokenizer.pad_token_id is None: - model.generation_config.pad_token_id = tokenizer.pad_token_id = tokenizer.eos_token_id - - if model.generation_config.eos_token_id is None: - model.generation_config.eos_token_id = tokenizer.eos_token_id - - if not model.config.is_encoder_decoder: - tokenizer.padding_side = "left" - - if tokenizer.pad_token is None and tokenizer.pad_token_id is None: - tokenizer.pad_token = tokenizer.eos_token - model.generation_config.pad_token_id = model.generation_config.eos_token_id - - def generate(self, prompts: Union[str, List[str]], **config) -> Union[GenerateResult, List[GenerateResult], None]: - pass - - async def generate_async(self, prompts: Union[str, List[str]], **config) -> Union[str, List[str]]: - pass - - # output is streamed into streamer - def streaming_generate(self, prompt: str, streamer, **config) -> None: - pass - - def get_streamer(self): - pass - - async def stream_results(self, results_generator) -> AsyncGenerator[str, None]: - pass - - -class HPUPredictor(Predictor): - def __init__(self, infer_conf: dict): - super().__init__(infer_conf) - - model_id_or_path = infer_conf["model_id_or_path"] - use_auth_token = infer_conf["use_auth_token"] - trust_remote_code = infer_conf["trust_remote_code"] - self.cpus_per_worker = infer_conf["num_cpus_per_worker"] - self.hpus_per_worker = infer_conf["num_hpus_per_worker"] - # decide correct torch type for loading HF model - self.use_lazy_mode = True - self.use_hpu_graphs = False - # TODO add torch_compile, i.e. hpu specific configs. including quant - # if args.torch_compile and model.config.model_type == "llama": - # self.use_lazy_mode = False - - from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - - # Tweak transformer to optimize performance on Gaudi - adapt_transformers_to_gaudi() - # Not using DeepSpeed, load model locally - self.device = torch.device("hpu") - model = AutoModelForCausalLM.from_pretrained( - model_id_or_path, use_auth_token=use_auth_token, trust_remote_code=trust_remote_code - ) - self.model = model.eval().to(self.device) - if self.use_hpu_graphs: - from habana_frameworks.torch.hpu import wrap_in_hpu_graph # pylint: disable=E0401 - - self.model = wrap_in_hpu_graph(self.model) - else: - print("Warning: use_hpu_graphs is set to False. This will hurt the performance.") - self.tokenizer = load_tokenizer(model, model_id_or_path) - - # Use dummy streamer to ignore other workers' outputs - def _create_dummy_streamer(self): - class DummyStreamer: - def put(self, value): - pass - - def end(self): - pass - - return DummyStreamer() - - def _process_config(self, config): - config["lazy_mode"] = self.use_lazy_mode - config["hpu_graphs"] = self.use_hpu_graphs - # max_new_tokens is required for hpu - if "max_new_tokens" not in config: - config["max_new_tokens"] = 128 - - def get_streamer(self): - return TextIteratorStreamer(self.tokenizer, skip_prompt=True, timeout=0, skip_special_tokens=True) - - def generate(self, prompt, **config): - self._process_config(config) - - input_ids, input_length = self.tokenize_inputs(prompt) - gen_tokens = self.model.generate(input_ids, **config) - decode_result = self.tokenizer.batch_decode(gen_tokens, skip_special_tokens=True) - if isinstance(decode_result, list) and len(decode_result) > 1: - return decode_result - elif isinstance(decode_result, list) and len(decode_result) == 1: - decode_result = decode_result[0] - return GenerateResult( - text=decode_result, - input_length=input_length, - generate_length=gen_tokens.size()[1] - input_length, - ) - - def streaming_generate(self, prompt, streamer, **config): - self._process_config(config) - input_ids, _ = self.tokenize_inputs(prompt) - self.model.generate( - input_ids, - streamer=streamer, - **config, - ) - - -chat_processor = { - "ChatModelLlama": ChatModelLLama, - "ChatModelGptJ": ChatModelGptJ, - "ChatModelGemma": ChatModelGemma, - "ChatModelNoFormat": ChatModelNoFormat, -} - - -# 1: Define a Ray Serve deployment. -@serve.deployment -class LLMServe: - _DEFAULT_MAX_BATCH_SIZE = 8 - _DEFAULT_MAX_NUM_SEQS = 256 - - def __init__( - self, infer_conf: dict, max_batch_size=_DEFAULT_MAX_BATCH_SIZE, max_num_seqs=_DEFAULT_MAX_NUM_SEQS - ) -> None: - # All the initialization code goes here - self.predictor = HPUPredictor(infer_conf) - self.loop = asyncio.get_running_loop() - self.process_tool = chat_processor[infer_conf["chat_processor"]]() - self.use_openai = False - - def consume_streamer(self, streamer): - for text in streamer: - yield text - - async def consume_streamer_async(self, streamer: TextIteratorStreamer): - while True: - try: - for token in streamer: - yield token - break - except Empty: - await asyncio.sleep(0.001) - - async def handle_streaming(self, prompt: Union[str, List[str]], config: Dict[str, Any]): - if isinstance(prompt, List): - error_message = "Streaming response is not supported when multiple prompts are provided." - if not self.use_openai: - yield JSONResponse( - status_code=400, - content=error_message, - ) - else: - yield ModelResponse( - error=ErrorResponse( - message=error_message, - code=400, - internal_message=error_message, - type="InternalServerError", - ) - ) - streamer = self.predictor.get_streamer() - self.loop.run_in_executor( - None, functools.partial(self.predictor.streaming_generate, prompt, streamer, **config) - ) - - if not self.use_openai: - yield StreamingResponse(self.consume_streamer_async(streamer), status_code=200, media_type="text/plain") - else: - async for output in self.consume_streamer_async(streamer): - processed_output = output - tool_call_list = None - if self.tools_capture_texts is not None: - (processed_output, tool_call_list) = self.tools_capture_texts( - output, self.openai_tools_prompter, prompt - ) - model_reponse = ModelResponse( - generated_text=processed_output, - tool_calls=tool_call_list, - num_input_tokens=self.predictor.input_length, - num_generate_tokens=1, - preprocessing_time=0, - ) - yield model_reponse - - async def handle_non_streaming(self, prompts, config) -> Union[JSONResponse, str]: - if isinstance(prompts, list): - return await self.handle_static_batch(prompts, **config) - return await self.handle_dynamic_batch((prompts, config)) - - @serve.batch(max_batch_size=_DEFAULT_MAX_BATCH_SIZE) - async def handle_dynamic_batch(self, requests): - batched_prompts: Dict[str, Tuple[Union[str, List[str]]]] = {} - for i, request in enumerate(requests): - prompt = request[0] - config = request[1] - key = str(dict(sorted(config.items()))) - batched_prompts.setdefault(key, ([], [])) - batched_prompts[key][0].append(prompt) - batched_prompts[key][1].append(i) - - results = [None] * len(requests) - for key, (prompts, indices) in batched_prompts.items(): - config = dict(eval(key)) - batched_results = self.predictor.generate(prompts, **config) - for index, result in zip(indices, batched_results): - results[index] = result - if not self.use_openai: - return results - else: - responses = [] - tool_call_list = None - for result in results: - if self.tools_capture_texts is not None: - result.text, tool_call_list = self.tools_capture_texts.process_full_output( - result.text, self.openai_tools_prompter, prompt - ) - responses.append( - ModelResponse( - generated_text=result[-1], - tool_calls=tool_call_list, - num_input_tokens=self.predictor.input_length, - num_generated_tokens=len(result[-1]), - preprocessing_time=0, - ) - ) - return responses - - async def handle_static_batch(self, prompts: List[str], **config: Dict[str, any]): - results = self.predictor.generate(prompts, **config) - if not self.use_openai: - return results - else: - return ModelResponse( - generated_text=results[0].text, - num_input_tokens=results[0].input_length, - num_input_tokens_batch=results[0].input_length, - num_generated_tokens=results[0].generate_length, - preprocessing_time=0, - ) - - def preprocess_prompts(self, input: Union[str, List], tools=None, tool_choice=None): - if isinstance(input, str): - return input - elif isinstance(input, List): - prompts = [] - images = [] - - prompt_format = get_prompt_format(input) - if prompt_format == PromptFormat.CHAT_FORMAT: - # Process the input prompts with tools - self.tool_call_list = None - self.openai_tools_prompter: OpenAIToolsPrompter = OpenAIToolsPrompter() if tools is not None else None - self.tools_capture_texts: ChatPromptCapture = None - if self.openai_tools_prompter is not None: - input = self.openai_tools_prompter.inject_prompt(input, tools, tool_choice) - self.tools_capture_texts = ChatPromptCapture() - for m in input: - if m.tool_calls is not None: # type: ignore - m.content = self.openai_tools_prompter.content_from_assistant(m) # type: ignore - elif m.tool_call_id is not None: # type: ignore - m.content = self.openai_tools_prompter.content_from_tool(m) # type: ignore - # Process the input prompts with MLLM tool - if self.process_tool is not None: - prompt = self.process_tool.get_prompt(input) - return prompt - else: - prompts.extend(input) - elif prompt_format == PromptFormat.PROMPTS_FORMAT: - prompts.extend(input) - else: - raise HTTPException(400, "Invalid prompt format.") - return prompts - else: - raise HTTPException(400, "Invalid prompt format.") - - async def openai_call(self, input: str, config: Dict, streaming_response=True, tools=None, tool_choice=None): - self.use_openai = True - prompts = self.preprocess_prompts(input, tools, tool_choice) - - if streaming_response: - async for result in self.handle_streaming(prompts, config): - yield result - else: - yield await self.handle_non_streaming(prompts, config) diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md index 6c9607ca9..57f476720 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/text-generation/tgi/README.md @@ -110,6 +110,12 @@ curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' + +# custom chat template +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true, "chat_template":"### You are a helpful, respectful and honest assistant to help the user with questions.\n### Context: {context}\n### Question: {question}\n### Answer:"}' \ + -H 'Content-Type: application/json' ``` ## 4. Validated Model diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py index e267c21dc..c202aede7 100644 --- a/comps/llms/text-generation/tgi/llm.py +++ b/comps/llms/text-generation/tgi/llm.py @@ -3,10 +3,14 @@ import os import time +from typing import Union from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient +from langchain_core.prompts import PromptTemplate from langsmith import traceable +from openai import OpenAI +from template import ChatTemplate from comps import ( GeneratedDoc, @@ -17,6 +21,13 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse + +llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +llm = AsyncInferenceClient( + model=llm_endpoint, + timeout=600, +) @register_microservice( @@ -28,36 +39,32 @@ ) @traceable(run_type="llm") @register_statistics(names=["opea_service@llm_tgi"]) -async def llm_generate(input: LLMParamsDoc): +async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest]): + + prompt_template = None + if input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + stream_gen_time = [] start = time.time() - if input.streaming: - - async def stream_generator(): - chat_response = "" - text_generation = await llm.text_generation( - prompt=input.query, - stream=input.streaming, - max_new_tokens=input.max_new_tokens, - repetition_penalty=input.repetition_penalty, - temperature=input.temperature, - top_k=input.top_k, - top_p=input.top_p, - ) - async for text in text_generation: - stream_gen_time.append(time.time() - start) - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - print(f"[llm - chat_stream] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - print(f"[llm - chat_stream] stream response: {chat_response}") - statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm.text_generation( - prompt=input.query, + + if isinstance(input, LLMParamsDoc): + prompt = input.query + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.query) + else: + print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) + + text_generation = await llm.text_generation( + prompt=prompt, stream=input.streaming, max_new_tokens=input.max_new_tokens, repetition_penalty=input.repetition_penalty, @@ -65,14 +72,117 @@ async def stream_generator(): top_k=input.top_k, top_p=input.top_p, ) - statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) - return GeneratedDoc(text=response, prompt=input.query) + if input.streaming: + + async def stream_generator(): + chat_response = "" + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + print(f"[llm - chat_stream] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + print(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) + return GeneratedDoc(text=text_generation, prompt=input.query) + + else: + client = OpenAI( + api_key="EMPTY", + base_url=llm_endpoint + "/v1", + ) + + if isinstance(input.messages, str): + prompt = input.messages + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.messages, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.messages) + else: + print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.messages, input.documents) + + chat_completion = client.completions.create( + model="tgi", + prompt=prompt, + best_of=input.best_of, + echo=input.echo, + frequency_penalty=input.frequency_penalty, + logit_bias=input.logit_bias, + logprobs=input.logprobs, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + seed=input.seed, + stop=input.stop, + stream=input.stream, + suffix=input.suffix, + temperature=input.temperature, + top_p=input.top_p, + user=input.user, + ) + else: + if input.messages[0]["role"] == "system": + if "{context}" in input.messages[0]["content"]: + if input.documents is None or input.documents == []: + input.messages[0]["content"].format(context="") + else: + input.messages[0]["content"].format(context="\n".join(input.documents)) + else: + if prompt_template: + system_prompt = prompt_template + if input_variables == ["context"]: + system_prompt = prompt_template.format(context="\n".join(input.documents)) + else: + print(f"{prompt_template} not used, only support 1 input variables ['context']") + + input.messages.insert(0, {"role": "system", "content": system_prompt}) + + chat_completion = client.chat.completions.create( + model="tgi", + messages=input.messages, + frequency_penalty=input.frequency_penalty, + logit_bias=input.logit_bias, + logprobs=input.logprobs, + top_logprobs=input.top_logprobs, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + response_format=input.response_format, + seed=input.seed, + service_tier=input.service_tier, + stop=input.stop, + stream=input.stream, + stream_options=input.stream_options, + temperature=input.temperature, + top_p=input.top_p, + tools=input.tools, + tool_choice=input.tool_choice, + parallel_tool_calls=input.parallel_tool_calls, + user=input.user, + ) + + if input.stream: + + def stream_generator(): + for c in chat_completion: + print(c) + yield f"data: {c.model_dump_json()}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + return chat_completion if __name__ == "__main__": - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - llm = AsyncInferenceClient( - model=llm_endpoint, - timeout=600, - ) opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/text-generation/tgi/requirements.txt index 621b52cc0..9670813d6 100644 --- a/comps/llms/text-generation/tgi/requirements.txt +++ b/comps/llms/text-generation/tgi/requirements.txt @@ -4,9 +4,11 @@ fastapi httpx huggingface_hub langsmith +openai==1.35.13 opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn diff --git a/comps/llms/text-generation/tgi/template.py b/comps/llms/text-generation/tgi/template.py new file mode 100644 index 000000000..447efcc67 --- /dev/null +++ b/comps/llms/text-generation/tgi/template.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import re + + +class ChatTemplate: + @staticmethod + def generate_rag_prompt(question, documents): + context_str = "\n".join(documents) + if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: + # chinese context + template = """ +### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 +### 搜索结果:{context} +### 问题:{question} +### 回答: +""" + else: + template = """ +### You are a helpful, respectful and honest assistant to help the user with questions. \ +Please refer to the search results obtained from the local knowledge base. \ +But be careful to not incorporate the information that you think is not relevant to the question. \ +If you don't know the answer to a question, please don't share false information. \n +### Search results: {context} \n +### Question: {question} \n +### Answer: +""" + return template.format(context=context_str, question=question) diff --git a/comps/llms/text-generation/vllm-openvino/README.md b/comps/llms/text-generation/vllm-openvino/README.md index 48f8f3305..d26a7f569 100644 --- a/comps/llms/text-generation/vllm-openvino/README.md +++ b/comps/llms/text-generation/vllm-openvino/README.md @@ -1,5 +1,10 @@ # Use vLLM with OpenVINO +vLLM powered by OpenVINO supports all LLM models from [vLLM supported models list](https://github.com/vllm-project/vllm/blob/main/docs/source/models/supported_models.rst) and can perform optimal model serving on all x86-64 CPUs with, at least, AVX2 support. OpenVINO vLLM backend supports the following advanced vLLM features: + +- Prefix caching (`--enable-prefix-caching`) +- Chunked prefill (`--enable-chunked-prefill`) + ## Build Docker Image To build the docker image, run the command @@ -59,15 +64,19 @@ export vLLM_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8000" export LLM_MODEL= # example: export LLM_MODEL="meta-llama/Llama-2-7b-hf" ``` -## Use Int-8 Weights Compression +## Performance tips + +vLLM OpenVINO backend uses the following environment variables to control behavior: + +- `VLLM_OPENVINO_KVCACHE_SPACE` to specify the KV Cache size (e.g, `VLLM_OPENVINO_KVCACHE_SPACE=40` means 40 GB space for KV cache), larger setting will allow vLLM running more requests in parallel. This parameter should be set based on the hardware configuration and memory management pattern of users. + +- `VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8` to control KV cache precision. By default, FP16 / BF16 is used depending on platform. -Weights int-8 compression is disabled by default. For better performance and lower memory consumption, the weights compression can be enabled by setting the environment variable `VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=1`. -To pass the variable in docker, use `-e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=1` as an additional argument to `docker run` command in the examples above. +- `VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON` to enable U8 weights compression during model loading stage. By default, compression is turned off. -The variable enables weights compression logic described in [optimum-intel 8-bit weights quantization](https://huggingface.co/docs/optimum/intel/optimization_ov#8-bit). -Hence, even if the variable is enabled, the compression is applied only for models starting with a certain size and avoids compression of too small models due to a significant accuracy drop. +To enable better TPOT / TTFT latency, you can use vLLM's chunked prefill feature (`--enable-chunked-prefill`). Based on the experiments, the recommended batch size is `256` (`--max-num-batched-tokens`) -## Use UInt-8 KV cache Compression +OpenVINO best known configuration is: -KV cache uint-8 compression is disabled by default. For better performance and lower memory consumption, the KV cache compression can be enabled by setting the environment variable `VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8`. -To pass the variable in docker, use `-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8` as an additional argument to `docker run` command in the examples above. + $ VLLM_OPENVINO_KVCACHE_SPACE=100 VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \ + python3 vllm/benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-chat-hf --dataset vllm/benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json --enable-chunked-prefill --max-num-batched-tokens 256 diff --git a/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh b/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh index 1b3e159fc..4566263bc 100755 --- a/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh +++ b/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh @@ -3,7 +3,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -git clone --branch openvino-model-executor https://github.com/ilya-lavrenov/vllm.git +BASEDIR="$( cd "$( dirname "$0" )" && pwd )" +git clone https://github.com/vllm-project/vllm.git vllm cd ./vllm/ docker build -t vllm:openvino -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +cd $BASEDIR && rm -rf vllm diff --git a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh index 887c31629..8eef92f52 100755 --- a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh +++ b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh @@ -42,5 +42,20 @@ port_number=${port:-$default_port} # Set the Huggingface cache directory variable HF_CACHE_DIR=$HOME/.cache/huggingface -# Start the model server using Openvino as the backend inference engine. Provide the container name that is unique and meaningful, typically one that includes the model name. -docker run --rm --name="vllm-openvino-server" -p $port_number:$port_number -v $HF_CACHE_DIR:/root/.cache/huggingface vllm:openvino --model $model_name --port $port_number --disable-log-requests --swap-space $swap_space +# Start the model server using Openvino as the backend inference engine. +# Provide the container name that is unique and meaningful, typically one that includes the model name. + +docker run -d --rm --name="vllm-openvino-server" \ + -p $port_number:80 \ + --ipc=host \ + -e HTTPS_PROXY=$https_proxy \ + -e HTTP_PROXY=$https_proxy \ + -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ + -v $HOME/.cache/huggingface:/root/.cache/huggingface \ + vllm:openvino /bin/bash -c "\ + cd / && \ + export VLLM_CPU_KVCACHE_SPACE=50 && \ + python3 -m vllm.entrypoints.openai.api_server \ + --model \"$model_name\" \ + --host 0.0.0.0 \ + --port 80" diff --git a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh index 9e9fe3b71..8c4c13d3b 100755 --- a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh @@ -5,7 +5,7 @@ cd ../../../../ docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana \ + -t opea/vllm_ray:habana \ --network=host \ --build-arg http_proxy=${http_proxy} \ --build-arg https_proxy=${https_proxy} \ diff --git a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml index a3ae3ec04..76d3423f1 100644 --- a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-ray-service: - image: vllm_ray:habana + image: opea/vllm_ray:habana container_name: vllm-ray-gaudi-server ports: - "8006:8000" diff --git a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh index fcff33265..895e6a066 100755 --- a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh @@ -39,5 +39,5 @@ docker run -d --rm \ -e HTTPS_PROXY=$https_proxy \ -e HTTP_PROXY=$https_proxy \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - vllm_ray:habana \ + opea/vllm_ray:habana \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $model_name --tensor_parallel_size $parallel_number --enforce_eager $enforce_eager" diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index dc0c4b669..6d8abd028 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -21,18 +21,6 @@ from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - @register_microservice( name="opea_service@llm_vllm_ray", service_type=ServiceType.LLM, @@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: - async def stream_generator(): + def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + for text in llm.stream(input.query): text = text.content chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "
" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" diff --git a/comps/llms/text-generation/vllm-ray/requirements.txt b/comps/llms/text-generation/vllm-ray/requirements.txt index 1c511793d..083a2910b 100644 --- a/comps/llms/text-generation/vllm-ray/requirements.txt +++ b/comps/llms/text-generation/vllm-ray/requirements.txt @@ -11,7 +11,8 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator ray[serve]>=2.10 -setuptools==69.5.1 +setuptools shortuuid transformers +uvicorn vllm diff --git a/comps/llms/text-generation/vllm-xft/docker/Dockerfile b/comps/llms/text-generation/vllm-xft/docker/Dockerfile index db682e04f..95cd596d7 100644 --- a/comps/llms/text-generation/vllm-xft/docker/Dockerfile +++ b/comps/llms/text-generation/vllm-xft/docker/Dockerfile @@ -95,4 +95,3 @@ RUN chmod +x /root/comps/llms/text-generation/vllm-xft/run.sh WORKDIR /root/comps/llms/text-generation/vllm-xft/ ENTRYPOINT ["/root/comps/llms/text-generation/vllm-xft/run.sh"] - diff --git a/comps/llms/text-generation/vllm-xft/requirements.txt b/comps/llms/text-generation/vllm-xft/requirements.txt index ddf7310b6..bc9f457c4 100644 --- a/comps/llms/text-generation/vllm-xft/requirements.txt +++ b/comps/llms/text-generation/vllm-xft/requirements.txt @@ -7,4 +7,5 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid +uvicorn vllm-xft diff --git a/comps/llms/text-generation/vllm/README.md b/comps/llms/text-generation/vllm/README.md index 1445d1bd1..3f0184ed9 100644 --- a/comps/llms/text-generation/vllm/README.md +++ b/comps/llms/text-generation/vllm/README.md @@ -50,6 +50,12 @@ bash ./build_docker_vllm.sh hpu Set `hw_mode` to `hpu`. +Note: If you want to enable tensor parallel, please set `setuptools==69.5.1` in Dockerfile.hpu before build docker with following command. + +``` +sed -i "s/RUN pip install setuptools/RUN pip install setuptools==69.5.1/g" docker/Dockerfile.hpu +``` + #### Launch vLLM service on single node For small model, we can just use single node. diff --git a/comps/llms/text-generation/vllm/build_docker_vllm.sh b/comps/llms/text-generation/vllm/build_docker_vllm.sh index 3680f076c..c1037a5c7 100644 --- a/comps/llms/text-generation/vllm/build_docker_vllm.sh +++ b/comps/llms/text-generation/vllm/build_docker_vllm.sh @@ -30,9 +30,9 @@ fi # Build the docker image for vLLM based on the hardware mode if [ "$hw_mode" = "hpu" ]; then - docker build -f docker/Dockerfile.hpu -t vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f docker/Dockerfile.hpu -t opea/vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy else git clone https://github.com/vllm-project/vllm.git cd ./vllm/ - docker build -f Dockerfile.cpu -t vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f Dockerfile.cpu -t opea/vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy fi diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu index c7093d4c0..730fe37e7 100644 --- a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu @@ -1,18 +1,19 @@ # FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ENV LANG=en_US.UTF-8 - +RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + service ssh restart +USER user WORKDIR /root RUN pip install --upgrade-strategy eager optimum[habana] RUN pip install -v git+https://github.com/HabanaAI/vllm-fork.git@cf6952d -RUN pip install setuptools==69.5.1 - -RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - service ssh restart +RUN pip install setuptools ENV no_proxy=localhost,127.0.0.1 diff --git a/comps/llms/text-generation/vllm/docker_compose_llm.yaml b/comps/llms/text-generation/vllm/docker_compose_llm.yaml index 818fdf54a..205c9293a 100644 --- a/comps/llms/text-generation/vllm/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-service: - image: vllm:hpu + image: opea/vllm:hpu container_name: vllm-gaudi-server ports: - "8008:80" diff --git a/comps/llms/text-generation/vllm/launch_vllm_service.sh b/comps/llms/text-generation/vllm/launch_vllm_service.sh index b94bbc183..0c7ed90de 100644 --- a/comps/llms/text-generation/vllm/launch_vllm_service.sh +++ b/comps/llms/text-generation/vllm/launch_vllm_service.sh @@ -38,7 +38,7 @@ volume=$PWD/data # Build the Docker run command based on hardware mode if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " + docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model $model_name --host 0.0.0.0 --port 80" + docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm:cpu --model $model_name --host 0.0.0.0 --port 80 fi diff --git a/comps/llms/text-generation/vllm/llm.py b/comps/llms/text-generation/vllm/llm.py index 439e233d0..ea8691f1a 100644 --- a/comps/llms/text-generation/vllm/llm.py +++ b/comps/llms/text-generation/vllm/llm.py @@ -49,14 +49,8 @@ def stream_generator(): chat_response = "" for text in llm.stream(input.query): chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" diff --git a/comps/llms/text-generation/vllm/requirements.txt b/comps/llms/text-generation/vllm/requirements.txt index 8cf73d924..d096a69ca 100644 --- a/comps/llms/text-generation/vllm/requirements.txt +++ b/comps/llms/text-generation/vllm/requirements.txt @@ -8,4 +8,5 @@ opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid transformers +uvicorn vllm diff --git a/comps/llms/utils/lm-eval/Dockerfile.cpu b/comps/llms/utils/lm-eval/Dockerfile.cpu index 933a523a5..ceb98887d 100644 --- a/comps/llms/utils/lm-eval/Dockerfile.cpu +++ b/comps/llms/utils/lm-eval/Dockerfile.cpu @@ -1,6 +1,8 @@ ARG UBUNTU_VER=22.04 FROM ubuntu:${UBUNTU_VER} as devel - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git ARG BRANCH=main ENV LANG=C.UTF-8 @@ -16,7 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ git \ vim \ wget - +USER user RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \ cd /home/user/GenAIComps/ && python3 setup.py install && \ pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/utils/lm-eval/requirements.txt diff --git a/comps/lvms/Dockerfile b/comps/lvms/Dockerfile index 73be60ba6..734d2cdb6 100644 --- a/comps/lvms/Dockerfile +++ b/comps/lvms/Dockerfile @@ -2,17 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms +WORKDIR /home/user/comps/lvms ENTRYPOINT ["python", "lvm.py"] \ No newline at end of file diff --git a/comps/lvms/Dockerfile_tgi b/comps/lvms/Dockerfile_tgi new file mode 100644 index 000000000..c6412ac5e --- /dev/null +++ b/comps/lvms/Dockerfile_tgi @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms + +ENTRYPOINT ["python", "lvm_tgi.py"] + diff --git a/comps/lvms/llava/Dockerfile b/comps/lvms/llava/Dockerfile index efd2b1d45..07d5cc41d 100644 --- a/comps/lvms/llava/Dockerfile +++ b/comps/lvms/llava/Dockerfile @@ -2,18 +2,21 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/lvms/llava/Dockerfile_hpu b/comps/lvms/llava/Dockerfile_hpu index bb2bf0676..272fad826 100644 --- a/comps/lvms/llava/Dockerfile_hpu +++ b/comps/lvms/llava/Dockerfile_hpu @@ -3,21 +3,25 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu -RUN rm -rf /etc/ssh/ssh_host* +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +RUN rm -rf /etc/ssh/ssh_host* +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava ENTRYPOINT ["python", "llava_server.py"] \ No newline at end of file diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py new file mode 100644 index 000000000..b2eddf9f1 --- /dev/null +++ b/comps/lvms/lvm_tgi.py @@ -0,0 +1,87 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from fastapi.responses import StreamingResponse +from huggingface_hub import AsyncInferenceClient + +from comps import ( + LVMDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + + +@register_microservice( + name="opea_service@lvm_tgi", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9399, + input_datatype=LVMDoc, + output_datatype=TextDoc, +) +@register_statistics(names=["opea_service@lvm_tgi"]) +async def lvm(request: LVMDoc): + start = time.time() + stream_gen_time = [] + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + streaming = request.streaming + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p + + image = f"data:image/png;base64,{img_b64_str}" + image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" + + if streaming: + + async def stream_generator(): + chat_response = "" + text_generation = await lvm_client.text_generation( + prompt=image_prompt, + stream=streaming, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + print(f"[llm - chat_stream] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + print(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@lvm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + generated_str = await lvm_client.text_generation( + image_prompt, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) + return TextDoc(text=generated_str) + + +if __name__ == "__main__": + lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + lvm_client = AsyncInferenceClient(lvm_endpoint) + print("[LVM] LVM initialized.") + opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt index 32076a20d..556dfb0c1 100644 --- a/comps/lvms/requirements.txt +++ b/comps/lvms/requirements.txt @@ -1,6 +1,7 @@ datasets docarray[full] fastapi +huggingface_hub opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk @@ -9,3 +10,4 @@ prometheus-fastapi-instrumentator pydantic==2.7.2 pydub shortuuid +uvicorn diff --git a/comps/prompt_registry/mongo/README.md b/comps/prompt_registry/mongo/README.md index 799fec7ca..0cbfd6f99 100644 --- a/comps/prompt_registry/mongo/README.md +++ b/comps/prompt_registry/mongo/README.md @@ -41,7 +41,7 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest 2. Run prompt_registry service ```bash -docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest +docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest ``` ## Invoke Microservice @@ -64,7 +64,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -73,7 +73,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -84,7 +84,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -95,7 +95,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6014/v1/prompt/delete \ + http://{host_ip}:6012/v1/prompt/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/prompt_registry/mongo/docker/Dockerfile b/comps/prompt_registry/mongo/docker/Dockerfile index 3438c86fb..db2e9c59d 100644 --- a/comps/prompt_registry/mongo/docker/Dockerfile +++ b/comps/prompt_registry/mongo/docker/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11-slim -ENV LANG C.UTF-8 +ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ diff --git a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml index 23db61c89..f6cb68831 100644 --- a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml +++ b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: promptregistry-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} @@ -28,6 +26,7 @@ services: no_proxy: ${no_proxy} MONGO_HOST: ${MONGO_HOST} MONGO_PORT: ${MONGO_PORT} + DB_NAME: ${DB_NAME} COLLECTION_NAME: ${COLLECTION_NAME} restart: unless-stopped diff --git a/comps/prompt_registry/mongo/prompt.py b/comps/prompt_registry/mongo/prompt.py index 4a3f52bc1..e8d7d285e 100644 --- a/comps/prompt_registry/mongo/prompt.py +++ b/comps/prompt_registry/mongo/prompt.py @@ -34,7 +34,7 @@ class PromptId(BaseModel): @register_microservice( - name="opea_service@prompt_mongo_create", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/create", host="0.0.0.0", input_datatype=PromptCreate, @@ -62,11 +62,11 @@ async def create_prompt(prompt: PromptCreate): @register_microservice( - name="opea_service@prompt_mongo_get", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/get", host="0.0.0.0", input_datatype=PromptId, - port=6013, + port=6012, ) async def get_prompt(prompt: PromptId): """Retrieves prompt from prompt store based on provided PromptId or user. @@ -95,11 +95,11 @@ async def get_prompt(prompt: PromptId): @register_microservice( - name="opea_service@prompt_mongo_delete", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/delete", host="0.0.0.0", input_datatype=PromptId, - port=6014, + port=6012, ) async def delete_prompt(prompt: PromptId): """Delete a prompt from prompt store by given PromptId. @@ -125,6 +125,4 @@ async def delete_prompt(prompt: PromptId): if __name__ == "__main__": - opea_microservices["opea_service@prompt_mongo_get"].start() - opea_microservices["opea_service@prompt_mongo_create"].start() - opea_microservices["opea_service@prompt_mongo_delete"].start() + opea_microservices["opea_service@prompt_mongo"].start() diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt index 29cad9670..3fa49150e 100644 --- a/comps/ragas/tgi/requirements.txt +++ b/comps/ragas/tgi/requirements.txt @@ -11,3 +11,4 @@ prometheus-fastapi-instrumentator ragas shortuuid transformers +uvicorn diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt index 5cf7a7c44..c4ded91c8 100644 --- a/comps/reranks/fastrag/requirements.txt +++ b/comps/reranks/fastrag/requirements.txt @@ -8,3 +8,4 @@ opentelemetry-exporter-otlp opentelemetry-sdk sentence_transformers shortuuid +uvicorn diff --git a/comps/reranks/langchain-mosec/README.md b/comps/reranks/langchain-mosec/README.md index d67cf78b0..59592a4ba 100644 --- a/comps/reranks/langchain-mosec/README.md +++ b/comps/reranks/langchain-mosec/README.md @@ -1,7 +1,7 @@ # build reranking Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t reranking-langchain-mosec:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec-endpoint:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . ``` # build reranking microservice docker image @@ -13,7 +13,7 @@ docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_p # launch Mosec endpoint docker container ``` -docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 reranking-langchain-mosec:latest +docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 opea/reranking-langchain-mosec-endpoint:latest ``` # launch embedding microservice docker container diff --git a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile index 0c634fb90..dcf38aee5 100644 --- a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive -COPY comps /root/comps +COPY comps /home/user/comps RUN apt update && apt install -y python3 python3-pip + RUN pip3 install torch==2.2.2 torchvision --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers sentence-transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /root/bge-reranker-large - -ENV EMB_MODEL="/root/bge-reranker-large/" +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /home/user/bge-reranker-large +USER user +ENV EMB_MODEL="/home/user/bge-reranker-large/" -WORKDIR /root/comps/reranks/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/reranks/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py b/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py index cd81fbf33..c7127c6ea 100644 --- a/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py +++ b/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py @@ -16,7 +16,7 @@ from torch.utils.data import DataLoader from tqdm.autonotebook import tqdm, trange -DEFAULT_MODEL = "/root/bge-reranker-large" +DEFAULT_MODEL = "/home/user/bge-reranker-large" class MyCrossEncoder(CrossEncoder): diff --git a/comps/reranks/langchain-mosec/requirements.txt b/comps/reranks/langchain-mosec/requirements.txt index f71cfaeb6..f9327419d 100644 --- a/comps/reranks/langchain-mosec/requirements.txt +++ b/comps/reranks/langchain-mosec/requirements.txt @@ -8,3 +8,4 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus_fastapi_instrumentator shortuuid +uvicorn diff --git a/comps/reranks/requirements.txt b/comps/reranks/requirements.txt index 36e38e89e..67503038f 100644 --- a/comps/reranks/requirements.txt +++ b/comps/reranks/requirements.txt @@ -9,3 +9,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator sentence_transformers shortuuid +uvicorn diff --git a/comps/reranks/tei/reranking_tei.py b/comps/reranks/tei/reranking_tei.py index 1beaa83f7..2440f800a 100644 --- a/comps/reranks/tei/reranking_tei.py +++ b/comps/reranks/tei/reranking_tei.py @@ -6,6 +6,7 @@ import os import re import time +from typing import Union import requests from langsmith import traceable @@ -19,6 +20,12 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RerankingRequest, + RerankingResponse, + RerankingResponseData, +) @register_microservice( @@ -32,42 +39,44 @@ ) @traceable(run_type="llm") @register_statistics(names=["opea_service@reranking_tgi_gaudi"]) -def reranking(input: SearchedDoc) -> LLMParamsDoc: +def reranking( + input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest] +) -> Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest]: + start = time.time() + reranking_results = [] if input.retrieved_docs: docs = [doc.text for doc in input.retrieved_docs] url = tei_reranking_endpoint + "/rerank" - data = {"query": input.initial_query, "texts": docs} + if isinstance(input, SearchedDoc): + query = input.initial_query + else: + # for RerankingRequest, ChatCompletionRequest + query = input.input + data = {"query": query, "texts": docs} headers = {"Content-Type": "application/json"} response = requests.post(url, data=json.dumps(data), headers=headers) response_data = response.json() - best_response_list = heapq.nlargest(input.top_n, response_data, key=lambda x: x["score"]) - context_str = "" - for best_response in best_response_list: - context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text - if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: - # chinese context - template = """ -### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 -### 搜索结果:{context} -### 问题:{question} -### 回答: -""" - else: - template = """ -### You are a helpful, respectful and honest assistant to help the user with questions. \ -Please refer to the search results obtained from the local knowledge base. \ -But be careful to not incorporate the information that you think is not relevant to the question. \ -If you don't know the answer to a question, please don't share false information. \ -### Search results: {context} \n -### Question: {question} \n -### Answer: -""" - final_prompt = template.format(context=context_str, question=input.initial_query) - statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) - return LLMParamsDoc(query=final_prompt.strip()) + + for best_response in response_data[: input.top_n]: + reranking_results.append( + {"text": input.retrieved_docs[best_response["index"]].text, "score": best_response["score"]} + ) + + statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) + if isinstance(input, SearchedDoc): + return LLMParamsDoc(query=input.initial_query, documents=[doc["text"] for doc in reranking_results]) else: - return LLMParamsDoc(query=input.initial_query) + reranking_docs = [] + for doc in reranking_results: + reranking_docs.append(RerankingResponseData(text=doc["text"], score=doc["score"])) + if isinstance(input, RerankingRequest): + return RerankingResponse(reranked_docs=reranking_docs) + + if isinstance(input, ChatCompletionRequest): + input.reranked_docs = reranking_docs + input.documents = [doc["text"] for doc in reranking_results] + return input if __name__ == "__main__": diff --git a/comps/retrievers/haystack/qdrant/README.md b/comps/retrievers/haystack/qdrant/README.md index 70d2845ed..66da3c627 100644 --- a/comps/retrievers/haystack/qdrant/README.md +++ b/comps/retrievers/haystack/qdrant/README.md @@ -1,49 +1,54 @@ # Retriever Microservice with Qdrant -# 🚀Start Microservice with Python +# 1. 🚀Start Microservice with Python (Option 1) -## Install Requirements +## 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## Start Qdrant Server +## 1.2 Start Qdrant Server Please refer to this [readme](../../../vectorstores/langchain/qdrant/README.md). -## Setup Environment Variables +## 1.3 Setup Environment Variables ```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_https_proxy} export QDRANT_HOST=${your_qdrant_host_ip} export QDRANT_PORT=6333 export EMBED_DIMENSION=${your_embedding_dimension} export INDEX_NAME=${your_index_name} -export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} ``` -## Start Retriever Service +## 1.4 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python haystack/qdrant/retriever_qdrant.py ``` -# 🚀Start Microservice with Docker +# 2. 🚀Start Microservice with Docker (Option 2) -## Build Docker Image +## 2.1 Setup Environment Variables + +```bash +export QDRANT_HOST=${your_qdrant_host_ip} +export QDRANT_PORT=6333 +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +``` + +## 2.2 Build Docker Image ```bash cd ../../ docker build -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile . ``` -## Run Docker with CLI +## 2.3 Run Docker with CLI ```bash -docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} -e QDRANT_HOST=${your_qdrant_host_ip} -e QDRANT_PORT=${your_qdrant_port} opea/retriever-qdrant:latest +docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT opea/retriever-qdrant:latest ``` # 🚀3. Consume Retriever Service diff --git a/comps/retrievers/haystack/qdrant/requirements.txt b/comps/retrievers/haystack/qdrant/requirements.txt index 8cee8ce36..9b99c00fb 100644 --- a/comps/retrievers/haystack/qdrant/requirements.txt +++ b/comps/retrievers/haystack/qdrant/requirements.txt @@ -1,13 +1,15 @@ docarray[full] easyocr fastapi -haystack-ai +haystack-ai==2.2.4 langchain_community langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +prometheus_fastapi_instrumentator pymupdf qdrant-haystack sentence_transformers shortuuid +uvicorn diff --git a/comps/retrievers/haystack/qdrant/retriever_qdrant.py b/comps/retrievers/haystack/qdrant/retriever_qdrant.py index 83ee64a2e..d57232563 100644 --- a/comps/retrievers/haystack/qdrant/retriever_qdrant.py +++ b/comps/retrievers/haystack/qdrant/retriever_qdrant.py @@ -7,7 +7,7 @@ from langsmith import traceable from qdrant_config import EMBED_DIMENSION, EMBED_ENDPOINT, EMBED_MODEL, INDEX_NAME, QDRANT_HOST, QDRANT_PORT -from comps import EmbedDoc768, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice # Create a pipeline for querying a Qdrant document store @@ -29,7 +29,7 @@ def initialize_qdrant_retriever() -> QdrantEmbeddingRetriever: port=7000, ) @traceable(run_type="retriever") -def retrieve(input: EmbedDoc768) -> SearchedDoc: +def retrieve(input: EmbedDoc) -> SearchedDoc: search_res = retriever.run(query_embedding=input.embedding)["documents"] searched_docs = [TextDoc(text=r.content) for r in search_res] result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) diff --git a/comps/retrievers/langchain/milvus/requirements.txt b/comps/retrievers/langchain/milvus/requirements.txt index 8aa51936b..fd6e197c8 100644 --- a/comps/retrievers/langchain/milvus/requirements.txt +++ b/comps/retrievers/langchain/milvus/requirements.txt @@ -22,3 +22,4 @@ python-docx==0.8.11 sentence_transformers shortuuid tiktoken +uvicorn diff --git a/comps/retrievers/langchain/milvus/retriever_milvus.py b/comps/retrievers/langchain/milvus/retriever_milvus.py index ba7bb38db..1625eed0a 100644 --- a/comps/retrievers/langchain/milvus/retriever_milvus.py +++ b/comps/retrievers/langchain/milvus/retriever_milvus.py @@ -20,7 +20,7 @@ from langsmith import traceable from comps import ( - EmbedDoc768, + EmbedDoc, SearchedDoc, ServiceType, TextDoc, @@ -65,7 +65,7 @@ def empty_embedding() -> List[float]: ) @traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_milvus"]) -def retrieve(input: EmbedDoc768) -> SearchedDoc: +def retrieve(input: EmbedDoc) -> SearchedDoc: vector_db = Milvus( embeddings, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, diff --git a/comps/retrievers/langchain/pgvector/requirements.txt b/comps/retrievers/langchain/pgvector/requirements.txt index d5caecc40..18609d361 100644 --- a/comps/retrievers/langchain/pgvector/requirements.txt +++ b/comps/retrievers/langchain/pgvector/requirements.txt @@ -12,3 +12,4 @@ psycopg2-binary pymupdf sentence_transformers shortuuid +uvicorn diff --git a/comps/retrievers/langchain/pgvector/retriever_pgvector.py b/comps/retrievers/langchain/pgvector/retriever_pgvector.py index 7460b801d..2fba1f1c0 100644 --- a/comps/retrievers/langchain/pgvector/retriever_pgvector.py +++ b/comps/retrievers/langchain/pgvector/retriever_pgvector.py @@ -10,7 +10,7 @@ from langsmith import traceable from comps import ( - EmbedDoc768, + EmbedDoc, SearchedDoc, ServiceType, TextDoc, @@ -32,7 +32,7 @@ ) @traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_pgvector"]) -def retrieve(input: EmbedDoc768) -> SearchedDoc: +def retrieve(input: EmbedDoc) -> SearchedDoc: start = time.time() search_res = vector_db.similarity_search_by_vector(embedding=input.embedding) searched_docs = [] diff --git a/comps/retrievers/langchain/pinecone/config.py b/comps/retrievers/langchain/pinecone/config.py index e6e62db6c..cd7f9e508 100644 --- a/comps/retrievers/langchain/pinecone/config.py +++ b/comps/retrievers/langchain/pinecone/config.py @@ -8,9 +8,9 @@ # Pinecone configuration PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = int(os.getenv("PINECONE_INDEX_NAME", "langchain-test")) +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/retrievers/langchain/pinecone/docker/Dockerfile b/comps/retrievers/langchain/pinecone/docker/Dockerfile index 7eedfab10..dbb6d57c2 100644 --- a/comps/retrievers/langchain/pinecone/docker/Dockerfile +++ b/comps/retrievers/langchain/pinecone/docker/Dockerfile @@ -4,6 +4,8 @@ FROM langchain/langchain:latest +ARG ARCH="cpu" + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ libjemalloc-dev \ @@ -15,15 +17,15 @@ RUN useradd -m -s /bin/bash user && \ COPY comps /home/user/comps -RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh - USER user RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt + ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/langchain/pinecone -ENTRYPOINT ["/home/user/comps/retrievers/langchain/pinecone/run.sh"] +ENTRYPOINT ["python", "retriever_pinecone.py"] \ No newline at end of file diff --git a/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml b/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml index f9aac5b0b..3c0f7cef2 100644 --- a/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml +++ b/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml @@ -24,7 +24,9 @@ services: https_proxy: ${https_proxy} PINECONE_API_KEY: ${PINECONE_API_KEY} INDEX_NAME: ${PINECONE_INDEX_NAME} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped networks: diff --git a/comps/retrievers/langchain/pinecone/ingest.py b/comps/retrievers/langchain/pinecone/ingest.py deleted file mode 100644 index e17b5ebf5..000000000 --- a/comps/retrievers/langchain/pinecone/ingest.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# - -import io -import os - -import numpy as np -from config import EMBED_MODEL, INDEX_NAME, PINECONE_API_KEY -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Pinecone -from PIL import Image - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - -if os.getenv("PINECONE_API_KEY", None) is None: - raise Exception("Missing `PINECONE_API_KEY` environment variable.") - - -def pdf_loader(file_path): - try: - import easyocr - import fitz - except ImportError: - raise ImportError( - "`PyMuPDF` or 'easyocr' package is not found, please install it with " - "`pip install pymupdf or pip install easyocr.`" - ) - - doc = fitz.open(file_path) - reader = easyocr.Reader(["en"]) - result = "" - for i in range(doc.page_count): - page = doc.load_page(i) - pagetext = page.get_text().strip() - if pagetext: - result = result + pagetext - if len(doc.get_page_images(i)) > 0: - for img in doc.get_page_images(i): - if img: - pageimg = "" - xref = img[0] - img_data = doc.extract_image(xref) - img_bytes = img_data["image"] - pil_image = Image.open(io.BytesIO(img_bytes)) - img = np.array(pil_image) - img_result = reader.readtext(img, paragraph=True, detail=0) - pageimg = pageimg + ", ".join(img_result).strip() - if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."): - pass - else: - pageimg = pageimg + "." - result = result + pageimg - return result - - -def ingest_documents(): - """Ingest PDF to Pinecone from the data/ directory that - contains Edgar 10k filings data for Nike.""" - # Load list of pdfs - company_name = "Nike" - data_path = "../data/" - doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0] - - print("Parsing 10k filing doc for NIKE", doc_path) - - text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True) - content = pdf_loader(doc_path) - chunks = text_splitter.split_text(content) - - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks] - - _ = Pinecone.from_texts( - texts=batch_texts, - embedding=embedder, - index_name=INDEX_NAME, - ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - -if __name__ == "__main__": - ingest_documents() diff --git a/comps/retrievers/langchain/pinecone/requirements.txt b/comps/retrievers/langchain/pinecone/requirements.txt new file mode 100644 index 000000000..32df7f40f --- /dev/null +++ b/comps/retrievers/langchain/pinecone/requirements.txt @@ -0,0 +1,22 @@ +beautifulsoup4 +docarray[full] +easyocr +fastapi +huggingface_hub +langchain +langchain-community +langchain-pinecone +langsmith +numpy +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +pinecone-client +prometheus_fastapi_instrumentator +pymupdf +python-docx +sentence_transformers +shortuuid +uvicorn diff --git a/comps/retrievers/langchain/pinecone/retriever_pinecone.py b/comps/retrievers/langchain/pinecone/retriever_pinecone.py new file mode 100644 index 000000000..ba8e6526f --- /dev/null +++ b/comps/retrievers/langchain/pinecone/retriever_pinecone.py @@ -0,0 +1,84 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_pinecone import PineconeVectorStore +from langsmith import traceable +from pinecone import Pinecone, ServerlessSpec + +from comps import ( + EmbedDoc, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + + +@register_microservice( + name="opea_service@retriever_pinecone", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@traceable(run_type="retriever") +@register_statistics(names=["opea_service@retriever_pinecone"]) +def retrieve(input: EmbedDoc) -> SearchedDoc: + start = time.time() + + pc = Pinecone(api_key=PINECONE_API_KEY) + + index = pc.Index(PINECONE_INDEX_NAME) + print(index.describe_index_stats()["total_vector_count"]) + # check if the Pinecone index has data + if index.describe_index_stats()["total_vector_count"] == 0: + result = SearchedDoc(retrieved_docs=[], initial_query=input.text) + statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) + return result + + search_res = vector_db.max_marginal_relevance_search(query=input.text, k=input.k, fetch_k=input.fetch_k) + # if the Pinecone index has data, perform the search + if input.search_type == "similarity": + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(query=input.text, k=input.k) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + searched_docs = [] + for r in search_res: + searched_docs.append(TextDoc(text=r.page_content)) + result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) + return result + + +if __name__ == "__main__": + # Create vectorstore + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + vector_db = PineconeVectorStore(embedding=embeddings, index_name=PINECONE_INDEX_NAME) + opea_microservices["opea_service@retriever_pinecone"].start() diff --git a/comps/retrievers/langchain/pinecone/run.sh b/comps/retrievers/langchain/pinecone/run.sh deleted file mode 100644 index ba658360b..000000000 --- a/comps/retrievers/langchain/pinecone/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd /home/user/comps/retrievers/langchain/pinecone -python ingest.py - -python retriever_pinecone.py diff --git a/comps/retrievers/langchain/redis/ingest.py b/comps/retrievers/langchain/redis/ingest.py index dbd01c65e..eeb1bc77b 100644 --- a/comps/retrievers/langchain/redis/ingest.py +++ b/comps/retrievers/langchain/redis/ingest.py @@ -14,7 +14,7 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis from PIL import Image -from redis_config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL +from redis_config import EMBED_MODEL, INDEX_NAME, REDIS_URL tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -112,7 +112,6 @@ def ingest_documents(): texts=batch_texts, embedding=embedder, index_name=INDEX_NAME, - index_schema=INDEX_SCHEMA, redis_url=REDIS_URL, ) print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") diff --git a/comps/retrievers/langchain/redis/redis_config.py b/comps/retrievers/langchain/redis/redis_config.py index 93946fcef..35233d8ff 100644 --- a/comps/retrievers/langchain/redis/redis_config.py +++ b/comps/retrievers/langchain/redis/redis_config.py @@ -73,5 +73,3 @@ def format_redis_conn_from_env(): current_file_path = os.path.abspath(__file__) parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") -INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA) diff --git a/comps/retrievers/langchain/redis/requirements.txt b/comps/retrievers/langchain/redis/requirements.txt index a763d10af..3720190d3 100644 --- a/comps/retrievers/langchain/redis/requirements.txt +++ b/comps/retrievers/langchain/redis/requirements.txt @@ -11,3 +11,4 @@ pymupdf redis sentence_transformers shortuuid +uvicorn diff --git a/comps/retrievers/langchain/redis/retriever_redis.py b/comps/retrievers/langchain/redis/retriever_redis.py index a1d70b48a..43f3e0c05 100644 --- a/comps/retrievers/langchain/redis/retriever_redis.py +++ b/comps/retrievers/langchain/redis/retriever_redis.py @@ -3,6 +3,7 @@ import os import time +from typing import Union from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis @@ -10,7 +11,7 @@ from redis_config import EMBED_MODEL, INDEX_NAME, REDIS_URL from comps import ( - EmbedDoc768, + EmbedDoc, SearchedDoc, ServiceType, TextDoc, @@ -19,6 +20,12 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RetrievalRequest, + RetrievalResponse, + RetrievalResponseData, +) tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -32,36 +39,57 @@ ) @traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_redis"]) -def retrieve(input: EmbedDoc768) -> SearchedDoc: +def retrieve( + input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] +) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: + start = time.time() # check if the Redis index has data if vector_db.client.keys() == []: - result = SearchedDoc(retrieved_docs=[], initial_query=input.text) - statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) - return result + search_res = [] + else: + if isinstance(input, EmbedDoc): + query = input.text + else: + # for RetrievalRequest, ChatCompletionRequest + query = input.input + # if the Redis index has data, perform the search + if input.search_type == "similarity": + search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = vector_db.similarity_search_by_vector( + embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + # return different response format + retrieved_docs = [] + if isinstance(input, EmbedDoc): + for r in search_res: + retrieved_docs.append(TextDoc(text=r.page_content)) + result = SearchedDoc(retrieved_docs=retrieved_docs, initial_query=input.text) + else: + for r in search_res: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(input, RetrievalRequest): + result = RetrievalResponse(retrieved_docs=retrieved_docs) + elif isinstance(input, ChatCompletionRequest): + input.retrieved_docs = retrieved_docs + input.documents = [doc.text for doc in retrieved_docs] + result = input - # if the Redis index has data, perform the search - if input.search_type == "similarity": - search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = vector_db.similarity_search_by_vector( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = vector_db.similarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = vector_db.max_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) return result diff --git a/comps/retrievers/llamaindex/redis_config.py b/comps/retrievers/llamaindex/redis_config.py index 93946fcef..619b2b822 100644 --- a/comps/retrievers/llamaindex/redis_config.py +++ b/comps/retrievers/llamaindex/redis_config.py @@ -69,9 +69,3 @@ def format_redis_conn_from_env(): # Vector Index Configuration INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") - - -current_file_path = os.path.abspath(__file__) -parent_dir = os.path.dirname(current_file_path) -REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") -INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA) diff --git a/comps/retrievers/llamaindex/requirements.txt b/comps/retrievers/llamaindex/requirements.txt index d6bc736e3..236ea9af8 100644 --- a/comps/retrievers/llamaindex/requirements.txt +++ b/comps/retrievers/llamaindex/requirements.txt @@ -13,4 +13,4 @@ pymupdf redis sentence_transformers shortuuid - +uvicorn diff --git a/comps/retrievers/llamaindex/retriever_redis.py b/comps/retrievers/llamaindex/retriever_redis.py index 965aecd88..4999a7235 100644 --- a/comps/retrievers/llamaindex/retriever_redis.py +++ b/comps/retrievers/llamaindex/retriever_redis.py @@ -7,9 +7,8 @@ from llama_index.core.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.redis import RedisVectorStore from redis_config import INDEX_NAME, REDIS_URL -from redisvl.schema import IndexSchema -from comps import EmbedDoc768, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -22,7 +21,7 @@ port=7000, ) @traceable(run_type="retriever") -def retrieve(input: EmbedDoc768) -> SearchedDoc: +def retrieve(input: EmbedDoc) -> SearchedDoc: vector_store_query = VectorStoreQuery(query_embedding=input.embedding) search_res = vector_store.query(query=vector_store_query) searched_docs = [] @@ -33,27 +32,8 @@ def retrieve(input: EmbedDoc768) -> SearchedDoc: if __name__ == "__main__": - custom_schema = IndexSchema.from_dict( - { - "index": {"name": INDEX_NAME, "prefix": "doc"}, - "fields": [ - {"name": "id", "type": "tag"}, - {"name": "doc_id", "type": "tag"}, - {"name": "text", "type": "text"}, - {"name": "content", "type": "text"}, - {"name": "source", "type": "text"}, - {"name": "start_index", "type": "numeric"}, - { - "name": "vector", - "type": "vector", - "attrs": {"dims": 768, "algorithm": "HNSW", "date_type": "FLOAT32"}, - }, - ], - } - ) vector_store = RedisVectorStore( - schema=custom_schema, redis_url=REDIS_URL, ) opea_microservices["opea_service@retriever_redis"].start() diff --git a/comps/tts/Dockerfile b/comps/tts/Dockerfile index 73272567d..69d3c8a77 100644 --- a/comps/tts/Dockerfile +++ b/comps/tts/Dockerfile @@ -2,16 +2,24 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user ENV LANG=C.UTF-8 +ARG ARCH=cpu -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts +WORKDIR /home/user/comps/tts ENTRYPOINT ["python", "tts.py"] \ No newline at end of file diff --git a/comps/tts/requirements.txt b/comps/tts/requirements.txt index 3234d8559..7f9363676 100644 --- a/comps/tts/requirements.txt +++ b/comps/tts/requirements.txt @@ -11,3 +11,4 @@ shortuuid soundfile torch transformers +uvicorn diff --git a/comps/tts/speecht5/Dockerfile b/comps/tts/speecht5/Dockerfile index e4afd07db..fd34aa0df 100644 --- a/comps/tts/speecht5/Dockerfile +++ b/comps/tts/speecht5/Dockerfile @@ -2,23 +2,33 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user +ARG ARCH=cpu # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/tts/speecht5/Dockerfile_hpu b/comps/tts/speecht5/Dockerfile_hpu index 8f889b86a..7fd53e913 100644 --- a/comps/tts/speecht5/Dockerfile_hpu +++ b/comps/tts/speecht5/Dockerfile_hpu @@ -3,27 +3,32 @@ # HABANA environment FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ RUN rm -rf /etc/ssh/ssh_host* # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana +ENV LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/vectorstores/langchain/chroma/README.md b/comps/vectorstores/langchain/chroma/README.md index e69de29bb..d7399b8fb 100644 --- a/comps/vectorstores/langchain/chroma/README.md +++ b/comps/vectorstores/langchain/chroma/README.md @@ -0,0 +1,36 @@ +# Introduction + +Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. Chroma runs in various modes, we can deploy it as a server running your local machine or in the cloud. + +# Getting Started + +## Start Chroma Server + +To start the Chroma server on your local machine, follow these steps: + +```bash +git clone https://github.com/chroma-core/chroma.git +cd chroma +docker compose up -d +``` + +## Start Log Output + +Upon starting the server, you should see log outputs similar to the following: + +```log +server-1 | Starting 'uvicorn chromadb.app:app' with args: --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30 +server-1 | INFO: [02-08-2024 07:03:19] Set chroma_server_nofile to 65536 +server-1 | INFO: [02-08-2024 07:03:19] Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information. +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component System +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component OpenTelemetryClient +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component SqliteDB +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component QuotaEnforcer +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component Posthog +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component LocalSegmentManager +server-1 | DEBUG: [02-08-2024 07:03:19] Starting component SegmentAPI +server-1 | INFO: [02-08-2024 07:03:19] Started server process [1] +server-1 | INFO: [02-08-2024 07:03:19] Waiting for application startup. +server-1 | INFO: [02-08-2024 07:03:19] Application startup complete. +server-1 | INFO: [02-08-2024 07:03:19] Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +``` diff --git a/comps/vectorstores/langchain/milvus/README.md b/comps/vectorstores/langchain/milvus/README.md index d02508351..b0f19caf4 100644 --- a/comps/vectorstores/langchain/milvus/README.md +++ b/comps/vectorstores/langchain/milvus/README.md @@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c Customized the path to store data, default is /volumes ```bash -export DOCKER_VOLUME_DIRECTORY=./your_path +export DOCKER_VOLUME_DIRECTORY=${your_path} ``` ## 2. Run Milvus service diff --git a/comps/vectorstores/langchain/milvus/docker-compose.yml b/comps/vectorstores/langchain/milvus/docker-compose.yml index 125463752..d6c39d0f0 100644 --- a/comps/vectorstores/langchain/milvus/docker-compose.yml +++ b/comps/vectorstores/langchain/milvus/docker-compose.yml @@ -7,10 +7,6 @@ services: etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 - deploy: - resources: - limits: - cpus: "0.5" environment: - ETCD_AUTO_COMPACTION_MODE=revision - ETCD_AUTO_COMPACTION_RETENTION=1000 @@ -28,10 +24,6 @@ services: minio: container_name: milvus-minio image: minio/minio:RELEASE.2023-03-20T20-16-18Z - deploy: - resources: - limits: - cpus: "0.5" environment: MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin @@ -49,31 +41,25 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:latest - deploy: - resources: - limits: - cpus: "8" - memory: 32G + image: milvusdb/milvus:v2.4.6 command: ["milvus", "run", "standalone"] security_opt: - seccomp:unconfined environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9000 - DNNL_ENABLE: 0 volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml + - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"] + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] interval: 30s start_period: 90s timeout: 20s retries: 3 ports: - "19530:19530" - - "9092:9092" + - "9091:9091" depends_on: - "etcd" - "minio" diff --git a/comps/vectorstores/langchain/milvus/milvus.yaml b/comps/vectorstores/langchain/milvus/milvus.yaml index de29dfe3d..b9f22cb3d 100644 --- a/comps/vectorstores/langchain/milvus/milvus.yaml +++ b/comps/vectorstores/langchain/milvus/milvus.yaml @@ -105,7 +105,9 @@ minio: region: # Specify minio storage system location region useVirtualHost: false # Whether use virtual host mode for bucket requestTimeoutMs: 10000 # minio timeout for request time in milliseconds - listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout + # The maximum number of objects requested per batch in minio ListObjects rpc, + # 0 means using oss client by default, decrease these configuration if ListObjects timeout + listObjectsMaxKeys: 0 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. # You can change your mq by setting mq.type field. @@ -120,6 +122,10 @@ mq: pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes mqBufSize: 16 # MQ client consumer buffer length + dispatcher: + mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge + targetBufSize: 16 # the length of channel buffer for targe + maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. pulsar: @@ -182,7 +188,7 @@ natsmq: # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests rootCoord: dmlChannelNum: 16 # The number of dml channels created at system startup - maxPartitionNum: 4096 # Maximum number of partitions in a collection + maxPartitionNum: 1024 # Maximum number of partitions in a collection minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed enableActiveStandby: false maxDatabaseNum: 64 # Maximum number of database @@ -200,7 +206,6 @@ rootCoord: proxy: timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick healthCheckTimeout: 3000 # ms, the interval that to do component healthy check - healthCheckTimetout: 3000 # ms, the interval that to do component healthy check msgStream: timeTick: bufSize: 512 @@ -217,6 +222,7 @@ proxy: ginLogging: true ginLogSkipPaths: / # skip url path for gin log maxTaskNum: 1024 # max task number of proxy task queue + mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection accessLog: enable: false # if use access log minioEnable: false # if upload sealed access log file to minio @@ -244,7 +250,7 @@ proxy: port: # high-level restful api acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64 enablePprof: true # Whether to enable pprof middleware on the metrics port - ip: 0.0.0.0 # if not specified, use the first unicastable address + ip: # if not specified, use the first unicastable address port: 19530 internalPort: 19529 grpc: @@ -282,6 +288,8 @@ queryCoord: channelTaskTimeout: 60000 # 1 minute segmentTaskTimeout: 120000 # 2 minute distPullInterval: 500 + collectionObserverInterval: 200 + checkExecutedFlagInterval: 100 heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available loadTimeoutSeconds: 600 distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds @@ -298,6 +306,7 @@ queryCoord: checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session gracefulStopTimeout: 5 # seconds. force stop node without graceful stop enableStoppingBalance: true # whether enable stopping balance + channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds ip: # if not specified, use the first unicastable address port: 19531 @@ -320,6 +329,7 @@ queryNode: nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist memExpansionRate: 1.15 # extra memory needed by building interim index buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 @@ -327,17 +337,22 @@ queryNode: enabled: true memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, off. + # options: async, sync, disable. # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: async + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable mmap: mmapEnabled: false # Enable mmap for loading data - mmapEnabled: false # Enable mmap for loading data - lazyloadEnabled: false # Enable lazyload for loading data + lazyload: + enabled: false # Enable lazyload for loading data + waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve + requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default + requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default + maxRetryTimes: 1 # max retry times for lazy load, 1 by default + maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default grouping: enabled: true maxNQ: 1000 @@ -403,9 +418,11 @@ indexNode: dataCoord: channel: watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. + balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch + legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing balanceInterval: 360 # The interval with which the channel manager check dml channel balance status - checkInterval: 10 # The interval in seconds with which the channel manager advances channel states + checkInterval: 1 # The interval in seconds with which the channel manager advances channel states notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds). segment: maxSize: 1024 # Maximum size of a segment in MB @@ -485,7 +502,7 @@ dataNode: coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds segment: insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB syncPeriod: 600 # The period to sync segments if buffer is not empty. memory: forceSyncEnable: true # Set true to force sync if memory usage is too high @@ -536,8 +553,6 @@ log: grpc: log: level: WARNING - serverMaxSendSize: 536870912 - serverMaxRecvSize: 268435456 gracefulStopTimeout: 10 # second, time to wait graceful stop finish client: compressionEnabled: false @@ -550,8 +565,6 @@ grpc: minResetInterval: 1000 maxCancelError: 32 minSessionCheckInterval: 200 - clientMaxSendSize: 268435456 - clientMaxRecvSize: 536870912 # Configure the proxy tls enable. tls: @@ -560,18 +573,6 @@ tls: caPemPath: configs/cert/ca.pem common: - chanNamePrefix: - cluster: by-dev - rootCoordTimeTick: rootcoord-timetick - rootCoordStatistics: rootcoord-statistics - rootCoordDml: rootcoord-dml - replicateMsg: replicate-msg - queryTimeTick: queryTimeTick - dataCoordTimeTick: datacoord-timetick-channel - dataCoordSegmentInfo: segment-info-channel - subNamePrefix: - dataCoordSubNamePrefix: dataCoord - dataNodeSubNamePrefix: dataNode defaultPartitionName: _default # default partition name for a collection defaultIndexName: _default_idx # default index name entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire @@ -617,7 +618,7 @@ common: ttMsgEnabled: true # Whether the instance disable sending ts messages traceLogMode: 0 # trace request info bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -631,7 +632,7 @@ common: # 4. DQL result rate protection; # If necessary, you can also manually force to deny RW requests. quotaAndLimits: - enabled: false # `true` to enable quota and limits, `false` to disable. + enabled: true # `true` to enable quota and limits, `false` to disable. # quotaCenterCollectInterval is the time interval that quotaCenter # collects metrics from Proxies, Query cluster and Data cluster. # seconds, (0 ~ 65536) @@ -649,10 +650,10 @@ quotaAndLimits: db: max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex flushRate: - enabled: false + enabled: true max: -1 # qps, default no limit, rate for flush collection: - max: -1 # qps, default no limit, rate for flush at collection level. + max: 0.1 # qps, default no limit, rate for flush at collection level. db: max: -1 # qps of db level, default no limit, rate for flush compactionRate: @@ -719,6 +720,7 @@ quotaAndLimits: limits: maxCollectionNum: 65536 maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes limitWriting: # forceDeny false means dml requests are allowed (except for some @@ -786,8 +788,8 @@ quotaAndLimits: trace: # trace exporter type, default is stdout, - # optional values: ['stdout', 'jaeger', 'otlp'] - exporter: stdout + # optional values: ['noop','stdout', 'jaeger', 'otlp'] + exporter: noop # fraction of traceID based sampler, # optional values: [0, 1] # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. diff --git a/comps/web_retrievers/langchain/chroma/docker/Dockerfile b/comps/web_retrievers/langchain/chroma/docker/Dockerfile index a6c3d80d5..c391fefe2 100644 --- a/comps/web_retrievers/langchain/chroma/docker/Dockerfile +++ b/comps/web_retrievers/langchain/chroma/docker/Dockerfile @@ -2,14 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 FROM langchain/langchain:latest - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG ARCH="cpu" # Set this to "cpu" or "gpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ libjemalloc-dev \ vim - +USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/comps/web_retrievers/langchain/chroma/requirements.txt b/comps/web_retrievers/langchain/chroma/requirements.txt index e72c13650..c3b013496 100644 --- a/comps/web_retrievers/langchain/chroma/requirements.txt +++ b/comps/web_retrievers/langchain/chroma/requirements.txt @@ -12,3 +12,4 @@ opentelemetry-sdk prometheus-fastapi-instrumentator sentence_transformers shortuuid +uvicorn diff --git a/comps/web_retrievers/langchain/chroma/retriever_chroma.py b/comps/web_retrievers/langchain/chroma/retriever_chroma.py index d699b3eb3..3fbd1b755 100644 --- a/comps/web_retrievers/langchain/chroma/retriever_chroma.py +++ b/comps/web_retrievers/langchain/chroma/retriever_chroma.py @@ -12,7 +12,7 @@ from langchain_huggingface import HuggingFaceEndpointEmbeddings from comps import ( - EmbedDoc768, + EmbedDoc, SearchedDoc, ServiceType, TextDoc, @@ -58,7 +58,7 @@ def dump_docs(docs): port=7077, ) @register_statistics(names=["opea_service@web_retriever_chroma", "opea_service@search"]) -def web_retrieve(input: EmbedDoc768) -> SearchedDoc: +def web_retrieve(input: EmbedDoc) -> SearchedDoc: start = time.time() query = input.text embedding = input.embedding diff --git a/requirements.txt b/requirements.txt index 6a453d50f..ef12b2fc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,9 @@ httpx opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +Pillow prometheus-fastapi-instrumentator pyyaml requests shortuuid +uvicorn diff --git a/tests/test_agent_langchain.sh b/tests/test_agent_langchain.sh index db19f6c0f..ad9aae145 100644 --- a/tests/test_agent_langchain.sh +++ b/tests/test_agent_langchain.sh @@ -5,6 +5,7 @@ #set -xe WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { diff --git a/tests/test_asr_whisper.sh b/tests/test_asr.sh similarity index 76% rename from tests/test_asr_whisper.sh rename to tests/test_asr.sh index 5e6e4a8c8..87553f6ab 100644 --- a/tests/test_asr_whisper.sh +++ b/tests/test_asr.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,25 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/whisper:latest -f comps/asr/whisper/Dockerfile . - docker build -t opea/asr:latest -f comps/asr/Dockerfile . + docker build --no-cache -t opea/whisper:comps -f comps/asr/whisper/Dockerfile . + docker build --no-cache -t opea/asr:comps -f comps/asr/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:latest - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9099:9099 --ipc=host opea/asr:latest + docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:comps + docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9089:9099 --ipc=host opea/asr:comps sleep 3m } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') if [[ $result == *"you"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-asr-whisper + docker logs test-comps-asr exit 1 fi diff --git a/tests/test_chathistory_mongo.sh b/tests/test_chathistory_mongo.sh index 1e60a59c9..005a1a6ef 100755 --- a/tests/test_chathistory_mongo.sh +++ b/tests/test_chathistory_mongo.sh @@ -22,7 +22,7 @@ function build_docker_images() { function start_service() { - docker run -d --name="test-comps-chathistory-mongo-server" -p 6013:6013 -p 6012:6012 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps + docker run -d --name="test-comps-chathistory-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps sleep 10s } diff --git a/tests/test_dataprep_milvus.sh b/tests/test_dataprep_milvus.sh new file mode 100644 index 000000000..e379882d5 --- /dev/null +++ b/tests/test_dataprep_milvus.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + # langchain mosec embedding image + docker build --no-cache -t opea/langchain-mosec:comps --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + # dataprep milvus image + docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/milvus/docker/Dockerfile . +} + +function start_service() { + # start milvus vector db + mkdir $WORKPATH/milvus + cd $WORKPATH/milvus + wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.6/configs/milvus.yaml + wget https://github.com/milvus-io/milvus/releases/download/v2.4.6/milvus-standalone-docker-compose.yml -O docker-compose.yml + sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml + docker compose up -d + + # set service ports + mosec_embedding_port=5021 + dataprep_service_port=5022 + dataprep_file_service_port=5023 + dataprep_del_service_port=5024 + + # start mosec embedding service + docker run -d --name="test-comps-dataprep-milvus-mosec-server" -p $mosec_embedding_port:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/langchain-mosec:comps + + # start dataprep service + MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_embedding_port}" + MILVUS=${ip_address} + docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:6010 -p ${dataprep_file_service_port}:6011 -p ${dataprep_del_service_port}:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS=${MILVUS} --ipc=host opea/dataprep-milvus:comps + sleep 1m +} + +function validate_microservice() { + cd $LOG_PATH + + # test /v1/dataprep + dataprep_service_port=5022 + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep ] HTTP status is 200. Checking content..." + cp ./dataprep_file.txt ./dataprep_file2.txt + local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file2.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) + + if echo "$CONTENT" | grep -q "Data preparation succeeded"; then + echo "[ dataprep ] Content is as expected." + else + echo "[ dataprep ] Content does not match the expected result: $CONTENT" + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep.log + exit 1 + fi + else + echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep.log + exit 1 + fi + + # test /v1/dataprep/get_file + dataprep_file_service_port=5023 + URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - file ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) + + if echo "$CONTENT" | grep -q '{"name":'; then + echo "[ dataprep - file ] Content is as expected." + else + echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + else + echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + + # test /v1/dataprep/delete_file + dataprep_del_service_port=5024 + URL="http://${ip_address}:$dataprep_del_service_port/v1/dataprep/delete_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - del ] HTTP status is 200." + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_del.log + else + echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_del.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH + rm -rf milvus/ + cid=$(docker ps -aq --filter "name=test-comps-dataprep-milvus*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=milvus-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_dataprep_pgvector.sh b/tests/test_dataprep_pgvector.sh index c9daba9fc..c4c892ee9 100755 --- a/tests/test_dataprep_pgvector.sh +++ b/tests/test_dataprep_pgvector.sh @@ -5,7 +5,10 @@ set -xe WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +dataprep_service_port=5013 + function build_docker_images() { cd $WORKPATH @@ -21,20 +24,72 @@ function start_service() { export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=vectordb + docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 sleep 10s - docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest + docker run -d --name="dataprep-pgvector" -p ${dataprep_service_port}:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest sleep 3m } function validate_microservice() { - URL="http://$ip_address:6007/v1/dataprep" - echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt - curl --noproxy $ip_address --location --request POST \ - --form 'files=@./dataprep_file.txt' $URL + cd $LOG_PATH + + # test /v1/dataprep + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep ] HTTP status is 200. Checking content..." + cp ./dataprep_file.txt ./dataprep_file2.txt + local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file2.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) + + if echo "$CONTENT" | grep -q "Data preparation succeeded"; then + echo "[ dataprep ] Content is as expected." + else + echo "[ dataprep ] Content does not match the expected result: $CONTENT" + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep.log + exit 1 + fi + else + echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep.log + exit 1 + fi + + # test /v1/dataprep/get_file + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - file ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) + + if echo "$CONTENT" | grep -q '{"name":'; then + echo "[ dataprep - file ] Content is as expected." + else + echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + else + echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + + # test /v1/dataprep/delete_file + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - del ] HTTP status is 200." + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + else + echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + exit 1 + fi } function stop_docker() { diff --git a/tests/test_dataprep_pinecone.sh b/tests/test_dataprep_pinecone.sh new file mode 100755 index 000000000..a92a86c64 --- /dev/null +++ b/tests/test_dataprep_pinecone.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + + # build dataprep image for pinecone + docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pinecone/docker/Dockerfile . +} + +function start_service() { + export PINECONE_API_KEY=$PINECONE_KEY + export PINECONE_INDEX_NAME="test-index" + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN + + docker run -d --name="dataprep-pinecone" -p 6007:6007 -p 6008:6008 -p 6009:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:latest + + sleep 1m +} + +function validate_microservice() { + URL="http://$ip_address:6007/v1/dataprep" + echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt + curl --noproxy $ip_address --location --request POST \ + --form 'files=@./dataprep_file.txt' $URL + + DELETE_URL="http://$ip_address:6009/v1/dataprep/delete_file" + curl --noproxy $ip_address --location --request POST \ + -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=vectorstore-pinecone*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + + cid=$(docker ps -aq --filter "name=dataprep-pinecone*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_dataprep_qdrant_langchain.sh b/tests/test_dataprep_qdrant_langchain.sh new file mode 100644 index 000000000..7d9e47708 --- /dev/null +++ b/tests/test_dataprep_qdrant_langchain.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + + # dataprep qdrant image + docker build --no-cache -t opea/dataprep-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile . +} + +function start_service() { + QDRANT_PORT=6360 + docker run -d --name="test-comps-dataprep-qdrant-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $QDRANT_PORT:6333 -p 6334:6334 --ipc=host qdrant/qdrant + tei_embedding_port=6361 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-dataprep-qdrant-langchain-tei" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_embedding_port:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + dataprep_service_port=6362 + TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_embedding_port}" + COLLECTION_NAME="rag-qdrant" + docker run -d --name="test-comps-dataprep-qdrant-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$ip_address -e QDRANT_PORT=$QDRANT_PORT -e COLLECTION_NAME=$COLLECTION_NAME -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-qdrant:comps + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservice() { + # tei for embedding service + validate_services \ + "${ip_address}:6361/embed" \ + "[[" \ + "tei_embedding" \ + "test-comps-dataprep-qdrant-langchain-tei" \ + '{"inputs":"What is Deep Learning?"}' + + # dataprep upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_services \ + "${ip_address}:6362/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "test-comps-dataprep-qdrant-langchain-server" + + # dataprep upload link + validate_services \ + "${ip_address}:6362/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_link" \ + "test-comps-dataprep-qdrant-langchain-server" + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-dataprep-qdrant-langchain*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + + rm $LOG_PATH/dataprep_file.txt +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_dataprep_redis_langchain.sh b/tests/test_dataprep_redis_langchain.sh index 325bc4652..1a4b06ef7 100644 --- a/tests/test_dataprep_redis_langchain.sh +++ b/tests/test_dataprep_redis_langchain.sh @@ -18,71 +18,99 @@ function start_service() { REDIS_PORT=6380 docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 dataprep_service_port=5013 - dataprep_file_service_port=5016 - dataprep_del_service_port=5020 REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 -p ${dataprep_file_service_port}:6008 -p ${dataprep_del_service_port}:6009 --ipc=host opea/dataprep-redis:comps + docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-redis:comps sleep 1m } function validate_microservice() { cd $LOG_PATH - # test /v1/dataprep - dataprep_service_port=5013 + # test /v1/dataprep upload file URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep ] HTTP status is 200. Checking content..." - cp ./dataprep_file.txt ./dataprep_file2.txt - local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file2.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) - - if echo "$CONTENT" | grep -q "Data preparation succeeded"; then - echo "[ dataprep ] Content is as expected." - else - echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - file" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test /v1/dataprep upload link + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - link" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 else - echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep.log + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get_file - dataprep_file_service_port=5016 - URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep - file ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) - - if echo "$CONTENT" | grep -q '{"name":'; then - echo "[ dataprep - file ] Content is as expected." - else - echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log - exit 1 - fi + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - get" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 else - echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *'{"name":'* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/delete_file - dataprep_file_service_port=5016 - URL="http://${ip_address}:$dataprep_del_service_port/v1/dataprep/delete_file" - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - del" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 else - echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi } diff --git a/tests/test_embeddings_langchain-mosec.sh b/tests/test_embeddings_langchain-mosec.sh index 1381a6dcb..a2f9aeb2a 100644 --- a/tests/test_embeddings_langchain-mosec.sh +++ b/tests/test_embeddings_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t langchain-mosec:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec-endpoint:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . } function build_docker_images() { @@ -23,7 +23,7 @@ function start_service() { mosec_endpoint=5001 model="BAAI/bge-large-en-v1.5" unset http_proxy - docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 langchain-mosec:comps + docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/embedding-langchain-mosec-endpoint:comps export MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5002 docker run -d --name="test-comps-embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:comps @@ -36,6 +36,14 @@ function validate_microservice() { -X POST \ -d '{"text":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' + if [ $? -eq 0 ]; then + echo "curl command executed successfully" + else + echo "curl command failed" + docker logs test-comps-embedding-langchain-mosec-endpoint + docker logs test-comps-embedding-langchain-mosec-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_guardrails_langchain.sh b/tests/test_guardrails_llama_guard.sh similarity index 86% rename from tests/test_guardrails_langchain.sh rename to tests/test_guardrails_llama_guard.sh index e2032435c..1462611aa 100644 --- a/tests/test_guardrails_langchain.sh +++ b/tests/test_guardrails_llama_guard.sh @@ -11,18 +11,19 @@ function build_docker_images() { echo "Start building docker images for microservice" cd $WORKPATH docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 - docker build --no-cache -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/langchain/docker/Dockerfile . + docker build --no-cache -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile . echo "Docker images built" } function start_service() { echo "Starting microservice" export model_id="meta-llama/Meta-Llama-Guard-2-8B" - export SAFETY_GUARD_ENDPOINT=http://${ip_address}:8088 + export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" + export SAFETY_GUARD_ENDPOINT=http://${ip_address}:8088/v1/chat/completions docker run -d --name="test-guardrails-langchain-tgi-server" -p 8088:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 sleep 4m - docker run -d --name="test-guardrails-langchain-service" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:latest + docker run -d --name="test-guardrails-langchain-service" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:latest sleep 10s echo "Microservice started" diff --git a/tests/test_guardrails_pii_detection.sh b/tests/test_guardrails_pii_detection.sh index 4466992b5..1ba8202f6 100644 --- a/tests/test_guardrails_pii_detection.sh +++ b/tests/test_guardrails_pii_detection.sh @@ -25,11 +25,16 @@ function validate_microservice() { echo "Validate microservice started" export PATH="${HOME}/miniforge3/bin:$PATH" source activate - echo "test 1 - single task" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address - echo "test 2 - 20 tasks in parallel" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address + echo "test 1 - single task - ner" + python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ner + echo "test 2 - 20 tasks in parallel - ner" + python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ner + echo "test 3 - single task - ml" + python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ml + echo "test 4 - 20 tasks in parallel - ml" + python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ml echo "Validate microservice completed" + docker logs test-guardrails-pii-detection-endpoint } function stop_docker() { diff --git a/tests/test_llms_text-generation_vllm-openvino.sh b/tests/test_llms_text-generation_vllm-openvino.sh new file mode 100755 index 000000000..f2df98584 --- /dev/null +++ b/tests/test_llms_text-generation_vllm-openvino.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH="$( cd "$( dirname "$0" )" && pwd )" + +# Define variables +port=8123 +HF_CACHE_DIR=$HOME/.cache/huggingface +DOCKER_IMAGE="vllm:openvino" +CONTAINER_NAME="vllm-openvino-container" + +function build_container() { + cd $WORKPATH + git clone https://github.com/vllm-project/vllm.git vllm-openvino + cd ./vllm-openvino/ + docker build -t $DOCKER_IMAGE \ + -f Dockerfile.openvino \ + . \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy + cd $WORKPATH + rm -rf vllm-openvino +} + +# Function to start Docker container +start_container() { + + docker run -d --rm --name=$CONTAINER_NAME \ + -p $port:$port \ + --ipc=host \ + -e HTTPS_PROXY=$https_proxy \ + -e HTTP_PROXY=$https_proxy \ + -v $HF_CACHE_DIR:/root/.cache/huggingface \ + vllm:openvino /bin/bash -c "\ + cd / && \ + export VLLM_CPU_KVCACHE_SPACE=50 && \ + python3 -m vllm.entrypoints.openai.api_server \ + --model \"Intel/neural-chat-7b-v3-3\" \ + --host 0.0.0.0 \ + --port $port" + + # check whether service is fully ready + n=0 + until [[ "$n" -ge 300 ]]; do + docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 + n=$((n+1)) + if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then + break + fi + sleep 3s + done + +} + +# Cleanup Function +cleanup() { + # Stop and remove Docker container and images + cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") + if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi + docker rmi -f $DOCKER_IMAGE + rm /tmp/$CONTAINER_NAME.log +} + +# Function to test API endpoint +function test_api_endpoint { + local endpoint="$1" + local expected_status="$2" + + # Make the HTTP request + if test "$1" = "v1/completions" + then + local response=$(curl "http://localhost:$port/$endpoint" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Intel/neural-chat-7b-v3-3", + "prompt": "What is the key advantage of Openvino framework", + "max_tokens": 300, + "temperature": 0.7 + }' \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + else + local response=$(curl "http://localhost:$port/$endpoint" \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + fi + + # Assert the response status code + if [[ "$response" -eq "$expected_status" ]]; then + echo "PASS: $endpoint returned expected status code: $expected_status" + else + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" + fi +} +# Main function +main() { + + build_container + start_container + + # Sleep to allow the container to start up fully + sleep 10 + # Test the /v1/models API + test_api_endpoint "v1/models" 200 + + # Test the /v1/completions API + test_api_endpoint "v1/completions" 200 + + cleanup +} + +# Call main function +main diff --git a/tests/test_llms_text-generation_vllm-ray.sh b/tests/test_llms_text-generation_vllm-ray.sh index 8ecb487e9..7ab235a93 100644 --- a/tests/test_llms_text-generation_vllm-ray.sh +++ b/tests/test_llms_text-generation_vllm-ray.sh @@ -12,7 +12,7 @@ function build_docker_images() { cd $WORKPATH docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana --network=host . + -t opea/vllm_ray:habana --network=host . ## Build OPEA microservice docker cd $WORKPATH @@ -34,7 +34,7 @@ function start_service() { --ipc=host \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ -p $port_number:8000 \ - vllm_ray:habana \ + opea/vllm_ray:habana \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL --tensor_parallel_size 2 --enforce_eager False" export vLLM_RAY_ENDPOINT="http://${ip_address}:${port_number}" diff --git a/tests/test_llms_text-generation_vllm.sh b/tests/test_llms_text-generation_vllm.sh index c5e7faa4b..48bee9ae8 100644 --- a/tests/test_llms_text-generation_vllm.sh +++ b/tests/test_llms_text-generation_vllm.sh @@ -12,7 +12,7 @@ function build_docker_images() { cd $WORKPATH/comps/llms/text-generation/vllm docker build \ -f docker/Dockerfile.hpu \ - -t vllm:hpu \ + -t opea/vllm:hpu \ --shm-size=128g . ## Build OPEA microservice docker @@ -35,7 +35,7 @@ function start_service() { --cap-add=sys_nice \ --ipc=host \ -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - vllm:hpu \ + opea/vllm:hpu \ /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048" export vLLM_ENDPOINT="http://${ip_address}:${port_number}" diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh index da7c740a9..d9d4258e7 100644 --- a/tests/test_lvms_llava.sh +++ b/tests/test_lvms_llava.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,26 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/llava:latest -f comps/lvms/llava/Dockerfile . - docker build --no-cache -t opea/lvm:latest -f comps/lvms/Dockerfile . + docker build -t opea/llava:comps -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:latest - docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:latest + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:comps + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:comps sleep 8m } function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm exit 1 fi diff --git a/tests/test_lvms_tgi_llava_next.sh b/tests/test_lvms_tgi_llava_next.sh new file mode 100644 index 000000000..970e3004f --- /dev/null +++ b/tests/test_lvms_tgi_llava_next.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + git clone https://github.com/yuanwu2017/tgi-gaudi.git && cd tgi-gaudi && git checkout v2.0.4 + docker build -t opea/llava-tgi:latest . + cd .. + docker build --no-cache -t opea/lvm-tgi:latest -f comps/lvms/Dockerfile_tgi . +} + +function start_service() { + unset http_proxy + model="llava-hf/llava-v1.6-mistral-7b-hf" + docker run -d --name="test-comps-lvm-llava-tgi" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/llava-tgi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8192 + docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm-tgi:latest + sleep 3m +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_prompt_registry_mongo.sh b/tests/test_prompt_registry_mongo.sh index bdf5d907c..e91bf225c 100644 --- a/tests/test_prompt_registry_mongo.sh +++ b/tests/test_prompt_registry_mongo.sh @@ -22,7 +22,7 @@ function build_docker_images() { function start_service() { - docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest + docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest sleep 10s } diff --git a/tests/test_reranks_langchain-mosec.sh b/tests/test_reranks_langchain-mosec.sh index 899db5122..ba675bccf 100644 --- a/tests/test_reranks_langchain-mosec.sh +++ b/tests/test_reranks_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t reranking-langchain-mosec:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec-endpoint:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . } function build_docker_images() { @@ -23,7 +23,7 @@ function start_service() { mosec_endpoint=5006 model="BAAI/bge-reranker-large" unset http_proxy - docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 reranking-langchain-mosec:comps + docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/reranking-langchain-mosec-endpoint:comps export MOSEC_RERANKING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5007 docker run -d --name="test-comps-reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:comps @@ -32,12 +32,18 @@ function start_service() { function validate_microservice() { mosec_service_port=5007 - http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\ + result=$(http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\ -X POST \ -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - docker logs test-comps-reranking-langchain-mosec-server - docker logs test-comps-reranking-langchain-mosec-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"Human"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-reranking-langchain-mosec-endpoint + docker logs test-comps-reranking-langchain-mosec-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_reranks_tei.sh b/tests/test_reranks_tei.sh index 0777e7e4d..4a8c77aad 100644 --- a/tests/test_reranks_tei.sh +++ b/tests/test_reranks_tei.sh @@ -34,7 +34,7 @@ function validate_microservice() { -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json') - if echo "$CONTENT" | grep -q "### Search results:"; then + if echo "$CONTENT" | grep -q "documents"; then echo "Content is as expected." else echo "Content does not match the expected result: $CONTENT" diff --git a/tests/test_retrievers_haystack_qdrant.sh b/tests/test_retrievers_haystack_qdrant.sh index 6b11eba5a..b1f8a02e8 100644 --- a/tests/test_retrievers_haystack_qdrant.sh +++ b/tests/test_retrievers_haystack_qdrant.sh @@ -19,7 +19,7 @@ function start_service() { # tei endpoint tei_endpoint=5008 model="BAAI/bge-base-en-v1.5" - docker run -d --name="test-comps-retriever-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model + docker run -d --name="test-comps-retriever-tei-endpoint" -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model sleep 30s export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" diff --git a/tests/test_retrievers_langchain_pinecone.sh b/tests/test_retrievers_langchain_pinecone.sh new file mode 100755 index 000000000..3e5215ba7 --- /dev/null +++ b/tests/test_retrievers_langchain_pinecone.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/retriever-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . +} + +function start_service() { + + # tei endpoint + tei_endpoint=5008 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-tei-endpoint" -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model + sleep 30s + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + # pinecone retriever + export PINECONE_API_KEY=$PINECONE_KEY + export PINECONE_INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN + retriever_port=5009 + unset http_proxy + docker run -d --name="test-comps-retriever-pinecone-server" -p ${retriever_port}:7000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e INDEX_NAME=$PINECONE_INDEX_NAME opea/retriever-pinecone:comps + + sleep 2m +} + +function validate_microservice() { + retriever_port=5009 + test_embedding="[0.3212316218862614, 0.05284697028105079, 0.792736615029739, -0.01450667589035648, -0.7358454555705813, -0.5159104761926909, 0.3535153166047822, -0.6465310827905328, -0.3260418169245214, 0.5427377177268364, 0.839674125021304, 0.27459120894125255, -0.9833857616143291, 0.4763752586395751, 0.7048355150785723, 0.4935209825796325, -0.09655411499027178, -0.5739389241976944, 0.34450497876796815, -0.03401327136919208, -0.8247080270670755, -0.9430721851019634, 0.4702688485035773, 0.3872526674852217, -0.13436894777006136, 0.27166203983338266, 0.7724679346611174, 0.49524109590526666, 0.9810730976435518, 0.2143402533230332, 0.35235793217357947, -0.3199320624935764, -0.3535996110405917, 0.1982603781951089, -0.37547349902996063, -0.6148649695355071, 0.388521078627599, 0.7073360849235228, 0.1768845283243352, -0.38289339223361885, 0.36390326284734775, -0.4790146416310761, -0.5412301982310956, 0.33793186533237507, -0.7028178009236765, -0.6850965350085609, -0.519584428926227, 0.07610032557230206, 0.8173990245819258, 0.6620078274633294, 0.9159029345791101, -0.6353085978752564, 0.5816911666251467, -0.03007583916355916, 0.7405029634324471, 0.43720248036100817, -0.8588961125219283, -0.5267610831146254, 0.17242810571201828, -0.5958637989986995, -0.9424146892733949, 0.593549429279222, -0.6516554787902789, -0.5666971591678356, -0.942676397097636, -0.7754876202156127, 0.4981071621118629, 0.3479716647812874, -0.20905562164787628, -0.01239748867059931, -0.39282697259470645, -0.682776727276128, 0.8490471472078613, 0.9407846472878745, 0.38429459825058054, -0.6217288222979798, 0.7017039943902317, 0.2666859825508645, -0.8350624589077213, -0.6844099142855995, 0.7150220289787632, 0.6172753342426756, 0.3411977212235433, -0.6885106120374, -0.9063819220399785, -0.8409372842391187, -0.8297926800281972, -0.7209991962325382, -0.10750064217958677, 0.3293914797165298, -0.7839812511866298, 0.3413595850264284, 0.9251256529601857, -0.7129635996889019, 0.2032168270911272, -0.744174955251268, 0.7691350055313244, -0.20065548721684312, 0.8869269473893813, -0.02043469943990095, 0.6747773545635596, -0.08840723444251264, 0.29835753335664084, -0.06410433319206965, 0.6915278973312651, 0.35470936730145075, -0.8143883316077478, 0.3700125242841532, 0.21752822647915626, -0.8620510146349405, -0.9872766671960136, -0.4418160577207253, -0.22054594310628928, -0.12301077500821433, -0.32532691454130314, -0.13151154223491113, -0.11476973253362455, -0.6347877217496254, -0.7764229239974911, 0.8494414471799672, -0.8096141861298036, -0.126108099532108, -0.3910538453811505, 0.7416491690145808, -0.9147820237179922, -0.09053536925720418, 0.6536341825563443, 0.655602583013402, 0.1757558598054938, -0.2501459855449637, 0.23414048418314914, -0.2944157385030681, 0.9386472406881659, -0.18806566910431344, -0.29109490690006345, -0.06582041104197667, -0.24458043176038613, 0.22893907834264082, -0.6322528508563678, -0.7885667746432836, 0.10383516801892911, 0.25661930212021256, 0.48395546864077654, 0.25074187080653787, 0.7878158493705165, 0.23874513474134984, -0.18963037155323526, 0.6768315857746809, 0.5323731821887652, 0.23324330999046516, -0.738289178845237, 0.8231931441360549, -0.5243106029457096, 0.21804967641989204, 0.3707592922049536, 0.1970890658467559, 0.6290401053696923, -0.6193312718716564, 0.4319818453521995, -0.4373242547587233, -0.20412719166280646, -0.868724458613944, -0.9426457085574942, 0.7688331784589177, 0.8429476319014946, -0.6928872166553237, -0.3089062124196522, -0.4951601658025162, -0.20786350848417157, -0.1834098357401246, 0.6258630377921288, -0.25204085881527294, -0.6433661815891194, 0.24194250996512046, 0.7945180851525879, 0.6730215739979015, 0.45995755232419877, 0.27685945410814927, 0.7529674957244883, -0.4439881981193141, 0.38722277085649703, 0.4225851985441007, 0.5151867308566294, 0.8592936274009735, -0.5577167356519221, -0.22541015002223674, 0.7872403040580904, -0.12895843621078895, 0.5887160803674254, -0.6121486933005933, -0.45190497189987, 0.5882515994898736, -0.20915972333667443, 0.6412544240387859, -0.9812292190679823, 0.23598351448404986, -0.01874477123769469, -0.5571884049798792, -0.21717058226127106, -0.8566428604555374, -0.7698283820683764, -0.7788953845967042, -0.9695043602118194, 0.2531642774513472, 0.24476771264255004, 0.799177428779027, 0.15892099361251932, 0.2675472976400166, 0.7977537791258142, 0.5682082238828539, -0.45861936031507833, 0.976812562932188, 0.7074171102968665, -0.255345769250928, -0.8903371790301657, 0.7704811965386686, 0.7499406836491052, 0.015867022798163433, 0.023343856172087563, -0.8985882333056163, 0.967943518200411, 0.6738003473613683, 0.500027753964835, -0.25086930359627546, 0.8192342987623937, -0.5553572601867272, -0.5869387659256808, 0.8105241617485164, 0.26722188191476604, -0.3958252448602495, -0.5045071968072412, -0.28738102025143886, 0.9466985876572256, 0.7491954841518662, -0.05398806963889902, 0.5602374066760636, -0.7105267600964871, 0.9183176656578995, -0.7484524873628995, -0.9707740622635459, -0.835248467210193, -0.6698976002755301, -0.9157167347077453, 0.8385470752014215, -0.8484323571440642, 0.1488482374866753, 0.3535389435893035, 0.40201643606217297, -0.39307181109310174, -0.651228451786785, 0.9707155460374848, 0.7578035730666239, -0.916880505891617, 0.7976566483403702, 0.4769359186496589, -0.9056872532891009, 0.5018227509242583, 0.06634988131602104, -0.38876676686204537, -0.20473802582321277, 0.5980365889203325, -0.34935300908506206, 0.5873905336860825, -0.8339160527604776, 0.2903116937984762, -0.9254374424169307, 0.6580958452134436, 0.15246698154103022, -0.6646130474515959, 0.8207084174685697, 0.06879769054023499, 0.6856796611464853, 0.7434402148947985, -0.07417300955086725, -0.37981881059511857, 0.7945700979382095, 0.9465476443316254, 0.7045891367557522, -0.21374560717812052, 0.09707043886320443, 0.40542472035097754, -0.21295063208183063, -0.3638798039778244, 0.27259830494730597, -0.9679565648433712, 0.574009198040323, 0.5453104171463734, 0.4226578254247848, 0.8135241112071945, -0.9913587704531821, -0.5117490950168377, 0.31240764840477486, 0.05726091394767008, -0.44352035546239654, 0.973651830312322, -0.30089019754641044, -0.38110683211990515, 0.12746451891554633, -0.44142668003974683, -0.6085743100333996, 0.6897705314589502, 0.9941017194163115, 0.22931154106427631, -0.38393397164902865, -0.487276417971108, 0.9823011016539693, -0.525188403356583, 0.20472304461076174, -0.549309125745228, 0.8391439613819196, -0.29947371410247614, -0.9587993477785177, 0.49169643064876745, -0.8450431739492874, 0.4992908092405386, 0.8214166011949593, 0.3514461197612715, 0.7052749449063302, -0.456428137096097, -0.21613329759075817, -0.4240696515484821, -0.6072280877366947, -0.19019911975234938, 0.03207563995916485, 0.7832264288656379, -0.9848532944591397, 0.2814057130788894, 0.860398099217986, -0.5757789213121853, -0.6403226820347003, 0.6276892831123779, 0.6966115314942829, -0.5964071917752842, 0.44624318175630373, 0.7747997483259705, -0.5274892594576506, -0.00345488047657061, 0.39694784159551255, -0.32018146543784254, 0.7503113292041483, 0.2279567107684024, -0.6993797573511833, 0.07551046336599065, 0.34912828888955083, 0.4590408940147299, 0.25454507513086266, -0.30882522463970363, -0.4080889783776509, -0.3123706885833979, -0.8906352519220135, -0.8139972234039548, -0.08828963608894047, 0.14503312886836617, -0.3714118896544083, 0.3827783378301277, 0.5438460044018558, 0.5097760438462526, 0.15715247575456592, 0.7656929283612122, 0.2920396353744734, 0.2373440190759446, 0.9526910643357105, 0.1250822784239567, 0.8541819063485603, -0.12747895073713877, 0.5735382473541981, -0.5032516001742902, 0.7413632640531032, -0.7276977107465363, 0.843580565716205, 0.7018464054348241, 0.5586022744519274, 0.8087171435922904, -0.21245941454116735, -0.948838383837346, -0.33122336674310726, -0.6044852681843789, 0.9537863293189539, 0.2536799406315282, -0.6165803849255769, 0.7101896753682724, -0.7295247078012181, -0.7614076971639918, -0.26355996174665797, 0.2821572530049805, -0.31435759840484767, 0.4606279529588946, -0.6454718015595133, 0.29204230021467015, -0.9773214517280517, 0.9018006022750058, 0.41864735598581615, -0.6362219585524242, 0.6393270283675747, 0.8775458814947836, -0.8151570635893794, 0.3439568607968999, 0.29709851503999474, -0.757078876496533, 0.5012539900859203, 0.9894088580102554, -0.7830638861580885, -0.2991021462567893, 0.106227593453466, 0.475717480159388, -0.8190837445165258, 0.7235860704831878, 0.7463245164230621, -0.5005231847044065, 0.6040314499611552, 0.6735380082955229, -0.5547291176872893, -0.9090102518914822, 0.13079236830880614, 0.30122136258272514, -0.6417236467561747, 0.2630310905704383, -0.37163926901056077, 0.20821525595060142, 0.058213575984825905, -0.7186424501121726, 0.7186917038077467, 0.20368227867764155, 0.7957158871869667, -0.8553769107478018, 0.8475526085456688, -0.929286319233819, -0.4084410910607217, -0.18451194893213185, -0.2629665470348457, 0.36380699955097695, 0.2762298083541519, 0.8264334555626198, -0.022207373606218495, -0.32224911623004626, -0.18947254078026798, 0.33627343422225175, 0.6906306880901341, -0.5248865356053838, -0.8976978225060646, -0.9198989266658277, -0.9045058048590318, -0.43074279628622225, 0.9599523380525761, 0.16694571818827875, 0.08638717900194992, 0.24369341180939874, -0.29293980835779454, 0.13980998987643733, -0.9103052978285509, 0.9109674748745353, -0.6189652187256851, -0.30507868365416413, -0.4232217216255978, 0.34784431052206877, -0.8235167119697908, 0.1565512568825982, -0.11476153735499195, -0.5476852944817927, -0.9695366885614041, 0.31387227761880165, -0.8460727492314095, 0.5313339961520958, 0.5605009436841186, 0.04504755045556719, -0.10937916620725119, -0.40867992424849797, -0.9148814576758182, 0.41260731002228, 0.6535850987782705, -0.3956136730481463, 0.03633719317271722, -0.26520169024611917, -0.39307279913859916, 0.8389708129910836, -0.10965192030153337, -0.8114479506343715, 0.6624055258346568, -0.12364857684372677, -0.3391386034226034, 0.5064344415363975, 0.4222558794792024, -0.8920802019539475, 0.8403881748708741, -0.5144930020007417, -0.3961429483392995, -0.9112376538340263, 0.5369991550001529, 0.4099994212177125, 0.8971702224538953, -0.07250674251100442, -0.4123232887614461, -0.4122138364547645, 0.30115503935936516, 0.9140832812087094, -0.37996517983025035, 0.45766194212423583, 0.8778668278803266, -0.871373882496363, 0.9061603981794313, -0.4815792838295849, -0.3540250825062252, 0.47058280496548677, 0.6353307464139133, -0.9084299203157564, 0.32569503818833767, -0.5917177728092791, 0.017982667746413883, -0.39657854384311597, 0.30240291420731147, -0.8789617636583977, 0.398601970442066, -0.9537566407528597, -0.7326801366509474, 0.6394091009367926, -0.24018952260048332, -0.4410443985541457, -0.715250103875068, -0.9531170489995859, 0.8907413230296786, -0.6270483513933209, -0.1278281545077713, 0.6205668124687644, -0.5880492136441298, 0.8458960227498347, 0.5156432304509859, -0.41522707199863196, -0.9971627462302537, 0.967570980171752, -0.1258013547750596, -0.3920054384667395, -0.7579953976551077, -0.5047276085442098, -0.742917134758996, 0.307776046578512, 0.33240724082891204, -0.12439712701067074, 0.8297068611891512, 0.9092972699438713, -0.5553533790744807, -0.9327632085647035, 0.4797798607215402, -0.6407284323825371, 0.23503537288803233, 0.7356444783186646, 0.550461677629142, -0.8859356421536595, -0.06157466053719496, 0.2628024780598055, -0.14515603184459613, -0.9382781600128365, -0.9076306357777459, -0.5661586668239169, -0.5778188698610502, -0.343591139945177, -0.9957519288956789, 3.652203366399931e-05, -0.2850434941249338, 0.9450784913510459, -0.7344049612004591, 0.3966551077940945, 0.9820403785569927, 0.7132254472780228, 0.04475455308790677, 0.7149662286904288, 0.30640286803677386, -0.11825818002978239, 0.9475071024012094, -0.4020573255284672, -0.25210492474829316, -0.9864930649895771, -0.3662338670933165, 0.6528806547589174, 0.23157758222346203, -0.5707934304014186, -0.12462852967839688, 0.1912875382350001, 0.9111205883142817, -0.7227638014501978, -0.36537014763125186, -0.37380198030841805, 0.4707867786085871, -0.5824192322860218, -0.47547092650542666, 0.7836345381645189, 0.7843678847969751, 0.6754328587362883, -0.6670404362153401, 0.7372872996570987, -0.8333262364813818, -0.41971949504499273, -0.7600660277081586, 0.22809249636551576, -0.8923092554006928, -0.28910705230462663, 0.17556387278264474, -0.3120642961908995, -0.08857040909612457, 0.9736924099705169, -0.6425732085916924, 0.5667862783362607, -0.45242262118684295, -0.3366537122702131, -0.21042580668493605, -0.969230642055972, -0.6986186588663355, -0.5420629464988849, 0.8012632695329027, 0.10364503122371205, -0.8288649738571241, -0.7488901002163446, -0.2086447971105505, 0.24528530567671103, -0.1194706644737491, -0.4487125509839567, 0.19757079065420702, 0.9701391397770309, 0.6918580324259651, -0.6609864495230626, -0.5767397650124655, 0.13274852903677803, 0.45790899492650117, 0.6156249211932037, -0.5400854790245104, -0.4871335994554471, -0.37124459518957686, -0.9740961061020355, 0.8132186161153883, 0.5432742278375737, -0.7555629992450097, -0.3626273029276168, 0.3273351801156006, 0.2950481130490956, 0.5899713501222568, 0.1290258276325824, 0.14809153246329188, -0.8527458869128903, -0.45135237009997664, -0.78966354981686, -0.9869505409499153, 0.5440922045096472, -0.5065478252374527, 0.8914118613097968, -0.7009799840752231, -0.37720301784400667, -0.1990418958793818, 0.07895118490326825, 0.43246496862820827, 0.06871630683294172, 0.04584623777009278, -0.34229499350310455, 0.9387219959330184, -0.5381844165951264, 0.4794422861285379, 0.8534951958829573, 0.5734335942167272, -0.85412829706822, -0.7352963908032732, -0.12895000820916747, -0.22552570725823173, -0.5976878733463429, -0.32791035485443487, 0.7202059113861725, 0.39099290295132905, 0.30525825694263764, -0.2266469266742548, -0.03379388729241706, -0.5954645444941691, -0.02422270847921526, 0.2367051711225363, 0.0254309367030352, -0.8571941247598263, 0.6036464885617703, 0.780145197998714, -0.18486284139078912, -0.4861368589284454, -0.2789831003703762, -0.695370188724934, 0.20748300875047643, 0.613995882433769, -0.20040817194169125, 0.8373240273873666, 0.6138944053316708, -0.7863205352137852, -0.7823411702718377, 0.79906295867358, -0.5467331800231525, -0.6344655458958364, -0.9818941753091346, 0.5525644258030062, 0.6262889073747209, 0.9963129049354384, -0.6272737000603017, -0.2716262931036606, 0.2096677033434846, -0.6982262682600213, -0.5674210473085657, 0.24902399542030595, -0.5657568018493333, 0.08618618872017958, 0.5489764282591345, -0.8941510222698827, 0.41351613826944567, -0.5112980841262675, 0.4470615015729351, -0.20725162805621333, -0.08479642143543553, -0.1278591923549064, -0.4999896814124227, 0.9888904679503661, -0.048462424602504495, -0.7019088972627803, 0.24200967459107448, -0.07080934919496995, -0.7205222066189325, 0.8569714457890816, -0.16535406501060956, -0.6995151061411666, -0.002471197183836038, 0.36657456718336245, -0.21418945415378254, 0.8960422717208372, -0.8112144998402944, 0.3367368342692487, -0.1409734233274329, 0.9270438056838188, 0.6449085435355675, -0.42063510394970094, -0.5514753035609532, -0.7824719546926855, 0.27064161179409774, 0.7610801292513893, 0.041332375564573365, -0.4938906089444197, 0.6565606828711339, -0.8175201877660032, -0.7145428710506601, 0.5266689558422335, -0.36373337569732045, -0.4295940430516798, 0.6614123405581125, -0.5795867768963181, 0.09683447902632913, -0.7233160622088481, -0.035259383881968365, 0.44407987368431834, 0.5080824859277744, -0.025605597564321236, -0.33746311986945, 0.8643101724003239, -0.6590382567793307, 0.11251953056040387, -0.5283365207737802, 0.8881578952123139, -0.9796498715072419, -0.8206325632112821, -0.5431772730915239, -0.09628735573638458, 0.8509192593020449, 0.6468967965920123, -0.5886852895684587, -0.25974684548008664, 0.4474352123365879, -0.2199845691372495, 0.7554317108927318, 0.9809450136647395, -0.9430090133566618, 0.23635288316941683]" + http_proxy='' curl --noproxy $ip_address http://${ip_address}:$retriever_port/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json' + docker logs test-comps-retriever-pinecone-server + docker logs test-comps-retriever-tei-endpoint +} + +function stop_docker() { + cid_retrievers=$(docker ps -aq --filter "name=test-comps-retrievers*") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi + + cid_pinecone=$(docker ps -aq --filter "name=test-pinecone-vector-db") + if [[ ! -z "$cid_pinecone" ]]; then + docker stop $cid_pinecone && docker rm $cid_pinecone && sleep 1s + fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_tts_speecht5.sh b/tests/test_tts.sh similarity index 79% rename from tests/test_tts_speecht5.sh rename to tests/test_tts.sh index d9426bdff..943ae1854 100644 --- a/tests/test_tts_speecht5.sh +++ b/tests/test_tts.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,14 +10,14 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile . - docker build -t opea/tts:latest -f comps/tts/Dockerfile . + docker build --no-cache -t opea/speecht5:comps -f comps/tts/speecht5/Dockerfile . + docker build --no-cache -t opea/tts:comps -f comps/tts/Dockerfile . } function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:latest - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:latest + docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:comps + docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:comps sleep 3m } @@ -27,6 +27,8 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." + docker logs test-comps-tts-speecht5 + docker logs test-comps-tts exit 1 fi diff --git a/tests/test_vectorstores_langchain_milvus.sh b/tests/test_vectorstores_langchain_milvus.sh new file mode 100644 index 000000000..60303017d --- /dev/null +++ b/tests/test_vectorstores_langchain_milvus.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + + +function start_service() { + cd $WORKPATH/comps/vectorstores/langchain/milvus + rm -rf volumes/ + + docker compose up -d + + sleep 60s +} + +function validate_vectorstore() { + PORT="19530" + COLLECTION_NAME="test_col" + + # test create collection + echo "[ test create ] creating collection.." + create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}") + echo $create_response >> ${LOG_PATH}/milvus_create_col.log + if [[ $(echo $create_response | grep '{"code":200') ]]; then + echo "[ test create ] create collection succeed" + else + echo "[ test create ] create collection failed" + exit 1 + fi + + # test insert data + echo "[ test insert ] inserting data.." + insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }") + echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log + if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then + echo "[ test insert ] insert data succeed" + else + echo "[ test insert ] insert data failed" + exit 1 + fi + + # test search data + echo "[ test search ] searching data.." + search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }") + echo $search_response>> ${LOG_PATH}/milvus_search_data.log + if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then + echo "[ test search ] search data succeed" + else + echo "[ test search ] search data failed" + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=milvus-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + start_service + + validate_vectorstore + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_web_retrievers_langchain_chroma.sh b/tests/test_web_retrievers_langchain_chroma.sh index d1e2c3ed5..288d4fe6b 100644 --- a/tests/test_web_retrievers_langchain_chroma.sh +++ b/tests/test_web_retrievers_langchain_chroma.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -31,11 +31,18 @@ function validate_microservice() { retriever_port=5019 export PATH="${HOME}/miniforge3/bin:$PATH" test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - http_proxy='' curl http://${ip_address}:$retriever_port/v1/web_retrieval \ + result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/web_retrieval \ -X POST \ -d "{\"text\":\"What is OPEA?\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json' - docker logs test-comps-web-retriever-tei-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"title"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received status was $result" + docker logs test-comps-web-retriever-tei-endpoint + docker logs test-comps-web-retriever-chroma-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_workflow_chatqna.py b/tests/test_workflow_chatqna.py deleted file mode 100644 index a2ea0f2d0..000000000 --- a/tests/test_workflow_chatqna.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# - -import asyncio -import os - -from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType - -MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") -MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 8888) -EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") -EMBEDDING_SERVICE_PORT = os.getenv("EMBEDDING_SERVICE_PORT", 6000) -RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") -RETRIEVER_SERVICE_PORT = os.getenv("RETRIEVER_SERVICE_PORT", 7000) -RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") -RERANK_SERVICE_PORT = os.getenv("RERANK_SERVICE_PORT", 8000) -LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") -LLM_SERVICE_PORT = os.getenv("LLM_SERVICE_PORT", 9000) - - -class ChatQnAService: - def __init__(self, host="0.0.0.0", port=8000): - self.host = host - self.port = port - self.megaservice = ServiceOrchestrator() - - def add_remote_service(self): - embedding = MicroService( - name="embedding", - host=EMBEDDING_SERVICE_HOST_IP, - port=EMBEDDING_SERVICE_PORT, - endpoint="/v1/embeddings", - use_remote_service=True, - service_type=ServiceType.EMBEDDING, - ) - retriever = MicroService( - name="retriever", - host=RETRIEVER_SERVICE_HOST_IP, - port=RETRIEVER_SERVICE_PORT, - endpoint="/v1/retrieval", - use_remote_service=True, - service_type=ServiceType.RETRIEVER, - ) - rerank = MicroService( - name="rerank", - host=RERANK_SERVICE_HOST_IP, - port=RERANK_SERVICE_PORT, - endpoint="/v1/reranking", - use_remote_service=True, - service_type=ServiceType.RERANK, - ) - llm = MicroService( - name="llm", - host=LLM_SERVICE_HOST_IP, - port=LLM_SERVICE_PORT, - endpoint="/v1/chat/completions", - use_remote_service=True, - service_type=ServiceType.LLM, - ) - self.megaservice.add(embedding).add(retriever).add(rerank).add(llm) - self.megaservice.flow_to(embedding, retriever) - self.megaservice.flow_to(retriever, rerank) - self.megaservice.flow_to(rerank, llm) - self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) - - async def schedule(self): - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": "What is the revenue of Nike in 2023?"} - ) - print(result_dict) - - -if __name__ == "__main__": - chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() - asyncio.run(chatqna.schedule())