diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 972b2a181..2636246ae 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,9 +1,21 @@ +/comps/agent/ xuhui.ren@intel.com +/comps/finetuning/ xinyu.ye@intel.com +/comps/guardrails/ liang1.lv@intel.com /comps/asr/ sihan.chen@intel.com +/comps/intent_detection/ liang1.lv@intel.com +/comps/knowledgegraphs/ xuhui.ren@intel.com /comps/cores/ liang1.lv@intel.com /comps/dataprep/ xinyu.ye@intel.com /comps/embeddings/ xuhui.ren@intel.com /comps/guardrails/ letong.han@intel.com /comps/llms/ liang1.lv@intel.com +/comps/lvms/ sihan.chen@intel.com +/comps/nginx/ letong.han@intel.com +/comps/prompt_registry/ hoong.tee.yeoh@intel.com +/comps/chathistory/ yogesh.pandey@intel.com /comps/reranks/ xuhui.ren@intel.com /comps/retrievers/ xuhui.ren@intel.com /comps/tts/ sihan.chen@intel.com +/comps/ragas/ xuhui.ren@intel.com +/comps/vectorstores/ xinyu.ye@intel.com +/comps/web_retrievers/ sihan.chen@intel.com diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml new file mode 100644 index 000000000..ca402c445 --- /dev/null +++ b/.github/workflows/_comps-workflow.yml @@ -0,0 +1,95 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Comps jobs +permissions: read-all +on: + workflow_call: + inputs: + node: + required: true + type: string + service: + required: true + type: string + tag: + default: "comps" + required: false + type: string + build: + default: true + required: false + type: boolean + test: + default: true + description: "Test comps with docker compose" + required: false + type: boolean + mode: + default: "CD" + description: "Whether the test range is CI or CD" + required: false + type: string + +jobs: + #################################################################################################### + # Image Build + #################################################################################################### + build-images: + runs-on: "docker-build-${{ inputs.node }}" + continue-on-error: true + outputs: + file_exists: ${{ steps.get-yaml-path.outputs.file_exists }} + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Clone required Repo + id: get-yaml-path + run: | + cd ${{ github.workspace }}/.github/workflows/docker/compose + # service=$(echo ${{ inputs.service }} | cut -d'_' -f1) + if [[ "${{ inputs.mode }}" == "CD" ]]; then + docker_compose_yml=${{ github.workspace }}/.github/workflows/docker/compose/${{ inputs.service }}-compose-cd.yaml + else + docker_compose_yml=${{ github.workspace }}/.github/workflows/docker/compose/${{ inputs.service }}-compose.yaml + fi + echo "docker_compose_path=${docker_compose_yml}" >> $GITHUB_OUTPUT + if [ -f "$docker_compose_yml" ]; then + echo "file_exists=true" >> $GITHUB_OUTPUT + else + echo "There is no ${{ inputs.mode }} part of ${{ inputs.service }} that needs to be executed." + echo "file_exists=false" >> $GITHUB_OUTPUT + fi + + if [[ $(grep -c "llava-tgi:" ${docker_compose_yml}) != 0 ]]; then + git clone https://github.com/yuanwu2017/tgi-gaudi.git && cd tgi-gaudi && git checkout v2.0.4 + fi + if [[ $(grep -c "vllm-openvino:" ${docker_compose_yml}) != 0 ]]; then + git clone https://github.com/vllm-project/vllm.git vllm-openvino + fi + + - name: Build Image + if: ${{ fromJSON(inputs.build) && steps.get-yaml-path.outputs.file_exists == 'true' }} + uses: opea-project/validation/actions/image-build@main + with: + work_dir: ${{ github.workspace }} + docker_compose_path: ${{ steps.get-yaml-path.outputs.docker_compose_path }} + registry: ${OPEA_IMAGE_REPO}opea + tag: ${{ inputs.tag }} + + #################################################################################################### + # Docker Compose Test + #################################################################################################### + test-service-compose: + needs: [build-images] + if: ${{ fromJSON(inputs.test) && needs.build-images.outputs.file_exists == 'true' }} + uses: ./.github/workflows/_run-docker-compose.yml + with: + tag: ${{ inputs.tag }} + service: ${{ inputs.service }} + hardware: ${{ inputs.node }} + secrets: inherit diff --git a/.github/workflows/reuse-get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml similarity index 91% rename from .github/workflows/reuse-get-test-matrix.yml rename to .github/workflows/_get-test-matrix.yml index 6860f8e9b..301835fc8 100644 --- a/.github/workflows/reuse-get-test-matrix.yml +++ b/.github/workflows/_get-test-matrix.yml @@ -25,7 +25,6 @@ jobs: else echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV fi - echo "checkout ref ${{ env.CHECKOUT_REF }}" - name: Checkout out Repo uses: actions/checkout@v4 @@ -38,14 +37,17 @@ jobs: run: | set -xe if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then - base_commit=${{ github.event.pull_request.base.sha }} + LATEST_COMMIT_SHA=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/opea-project/GenAIComps/commits?sha=${{ github.event.pull_request.base.ref }}" | jq -r '.[0].sha') + echo "Latest commit SHA is $LATEST_COMMIT_SHA" + base_commit=$LATEST_COMMIT_SHA else base_commit=$(git rev-parse HEAD~1) # push event fi merged_commit=$(git log -1 --format='%H') changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | \ - grep 'comps/' | grep -vE '*.md|*.txt|comps/cores')" || true + grep 'comps/' | grep -vE '*.md|comps/cores')" || true services=$(printf '%s\n' "${changed_files[@]}" | cut -d'/' -f2 | grep -vE '*.py' | sort -u) || true run_matrix="{\"include\":[" for service in ${services}; do diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml new file mode 100644 index 000000000..5f7ac7270 --- /dev/null +++ b/.github/workflows/_run-docker-compose.yml @@ -0,0 +1,111 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Image Build +permissions: read-all +on: + workflow_call: + inputs: + registry: + description: Container Registry URL + required: false + default: "" + type: string + tag: + description: Container Tag + required: false + default: "latest" + type: string + service: + description: Example to test + required: true + type: string + hardware: + description: Hardware to run the test on + required: true + type: string +jobs: + get-test-case: + runs-on: ubuntu-latest + outputs: + test_cases: ${{ steps.test-case-matrix.outputs.test_cases }} + CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + steps: + - name: Get checkout ref + id: get-checkout-ref + run: | + if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then + CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge + else + CHECKOUT_REF=${{ github.ref }} + fi + echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT + echo "checkout ref ${CHECKOUT_REF}" + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Get test matrix + shell: bash + id: test-case-matrix + run: | + set -x + service_l=$(echo ${{ inputs.service }} | tr '[:upper:]' '[:lower:]') + cd ${{ github.workspace }}/tests + test_cases=$(find . -type f -name "test_${service_l}*.sh" -print | cut -d/ -f2 | jq -R '.' | jq -sc '.') + echo "test_cases=$test_cases" >> $GITHUB_OUTPUT + + run-test: + needs: [get-test-case] + strategy: + matrix: + test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }} + fail-fast: false + runs-on: ${{ inputs.hardware }} + continue-on-error: true + steps: + - name: Clean up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* + docker system prune -f + docker rmi $(docker images --filter reference="*/*:comps" -q) || true + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: ${{ needs.get-test-case.outputs.CHECKOUT_REF }} + fetch-depth: 0 + + - name: Run test + shell: bash + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + PINECONE_KEY: ${{ secrets.PINECONE_KEY }} + service: ${{ inputs.service }} + hardware: ${{ inputs.hardware }} + test_case: ${{ matrix.test_case }} + run: | + cd ${{ github.workspace }}/tests + service=$(echo "${test_case}" | sed 's/test_\(.*\)\.sh/\1/') + echo "service=${service}" >> $GITHUB_ENV + if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi + + - name: Clean up container + if: cancelled() || failure() + run: | + cid=$(docker ps -aq --filter "name=test-comps-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + docker system prune -f + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ env.service }} + path: ${{ github.workspace }}/tests/*.log diff --git a/.github/workflows/docker/compose/agent-compose-cd.yaml b/.github/workflows/docker/compose/agent-compose-cd.yaml new file mode 100644 index 000000000..a285ecc34 --- /dev/null +++ b/.github/workflows/docker/compose/agent-compose-cd.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + comps-agent-langchain: + build: + dockerfile: comps/agent/langchain/docker/Dockerfile + image: ${REGISTRY}opea/comps-agent-langchain:${TAG:-latest} diff --git a/.github/workflows/docker/compose/chathistory-compose-cd.yaml b/.github/workflows/docker/compose/chathistory-compose-cd.yaml new file mode 100644 index 000000000..f8930cde8 --- /dev/null +++ b/.github/workflows/docker/compose/chathistory-compose-cd.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + chathistory-mongo-server: + build: + dockerfile: comps/chathistory/mongo/docker/Dockerfile + image: ${REGISTRY}opea/chathistory-mongo-server:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose-cd.yaml b/.github/workflows/docker/compose/dataprep-compose-cd.yaml new file mode 100644 index 000000000..19f4c063d --- /dev/null +++ b/.github/workflows/docker/compose/dataprep-compose-cd.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# others: dataprep-redis-llama-index,dataprep-on-ray-redis +services: + dataprep-redis-llama-index: + build: + dockerfile: comps/dataprep/redis/llama_index/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-redis-llama-index:${TAG:-latest} + dataprep-on-ray-redis: + build: + dockerfile: comps/dataprep/redis/langchain_ray/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-on-ray-redis:${TAG:-latest} + dataprep-milvus: + build: + dockerfile: comps/dataprep/milvus/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-milvus:${TAG:-latest} + dataprep-pgvector: + build: + dockerfile: comps/dataprep/pgvector/langchain/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-pgvector:${TAG:-latest} + dataprep-pinecone: + build: + dockerfile: comps/dataprep/pinecone/docker/Dockerfile + image: ${REGISTRY}opea/dataprep-pinecone:${TAG:-latest} diff --git a/.github/workflows/docker/compose/dataprep-compose.yaml b/.github/workflows/docker/compose/dataprep-compose.yaml index 5cca84cb4..1671235f4 100644 --- a/.github/workflows/docker/compose/dataprep-compose.yaml +++ b/.github/workflows/docker/compose/dataprep-compose.yaml @@ -13,11 +13,3 @@ services: build: dockerfile: comps/dataprep/qdrant/docker/Dockerfile image: ${REGISTRY}opea/dataprep-qdrant:${TAG:-latest} - dataprep-redis-llama-index: - build: - dockerfile: comps/dataprep/redis/llama_index/docker/Dockerfile - image: ${REGISTRY}opea/dataprep-redis-llama-index:${TAG:-latest} - dataprep-on-ray-redis: - build: - dockerfile: comps/dataprep/redis/langchain_ray/docker/Dockerfile - image: ${REGISTRY}opea/dataprep-on-ray-redis:${TAG:-latest} diff --git a/.github/workflows/docker/compose/embeddings-compose-cd.yaml b/.github/workflows/docker/compose/embeddings-compose-cd.yaml new file mode 100644 index 000000000..3d08a1b53 --- /dev/null +++ b/.github/workflows/docker/compose/embeddings-compose-cd.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + embedding-langchain-mosec-endpoint: + build: + dockerfile: comps/embeddings/langchain-mosec/mosec-docker/Dockerfile + image: ${REGISTRY}opea/embedding-langchain-mosec-endpoint:${TAG:-latest} + embedding-langchain-mosec: + build: + dockerfile: comps/embeddings/langchain-mosec/docker/Dockerfile + image: ${REGISTRY}opea/embedding-langchain-mosec:${TAG:-latest} + embedding-tei-llama-index: + build: + dockerfile: comps/embeddings/llama_index/docker/Dockerfile + image: ${REGISTRY}opea/embedding-tei-llama-index:${TAG:-latest} diff --git a/.github/workflows/docker/compose/guardrails-compose-cd.yaml b/.github/workflows/docker/compose/guardrails-compose-cd.yaml new file mode 100644 index 000000000..e6365a99d --- /dev/null +++ b/.github/workflows/docker/compose/guardrails-compose-cd.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + guardrails-pii-detection: + build: + dockerfile: comps/guardrails/pii_detection/docker/Dockerfile + image: ${REGISTRY}opea/guardrails-pii-detection:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml new file mode 100644 index 000000000..f60e0e921 --- /dev/null +++ b/.github/workflows/docker/compose/llms-compose-cd.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + llm-native: + build: + dockerfile: comps/llms/text-generation/native/docker/Dockerfile + image: ${REGISTRY}opea/llm-native:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index a899bcfce..36b8adfc8 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # this file should be run in the root of the repo -# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-ray +# images used by GenAIExamples: llm-tgi,llm-ollama,llm-docsum-tgi,llm-faqgen-tgi,llm-vllm,llm-vllm-hpu,llm-vllm-ray,llm-vllm-ray-hpu services: llm-tgi: build: @@ -24,6 +24,10 @@ services: build: dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.microservice image: ${REGISTRY}opea/llm-vllm:${TAG:-latest} + llm-vllm-hpu: + build: + dockerfile: comps/llms/text-generation/vllm/docker/Dockerfile.hpu + image: ${REGISTRY}opea/llm-vllm-hpu:${TAG:-latest} llm-vllm-ray: build: dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice @@ -36,3 +40,7 @@ services: build: dockerfile: comps/llms/text-generation/predictionguard/docker/Dockerfile image: ${REGISTRY}opea/llm-predictionguard:${TAG:-latest} + llm-vllm-ray-hpu: + build: + dockerfile: comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray + image: ${REGISTRY}opea/llm-vllm-ray-hpu:${TAG:-latest} diff --git a/.github/workflows/docker/compose/lvms-compose-cd.yaml b/.github/workflows/docker/compose/lvms-compose-cd.yaml new file mode 100644 index 000000000..fbdad3011 --- /dev/null +++ b/.github/workflows/docker/compose/lvms-compose-cd.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + lvm: + build: + dockerfile: comps/lvms/Dockerfile + image: ${REGISTRY}opea/lvm:${TAG:-latest} + # Xeon CPU + llava: + build: + dockerfile: comps/lvms/llava/Dockerfile + image: ${REGISTRY}opea/llava:${TAG:-latest} + # Gaudi2 HPU + llava_hpu: + build: + dockerfile: comps/lvms/llava/Dockerfile_hpu + image: ${REGISTRY}opea/llava_hpu:${TAG:-latest} + lvm-tgi: + build: + dockerfile: comps/lvms/Dockerfile_tgi + image: ${REGISTRY}opea/lvm-tgi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/nginx-compose-cd.yaml b/.github/workflows/docker/compose/nginx-compose-cd.yaml new file mode 100644 index 000000000..e6cf05aa4 --- /dev/null +++ b/.github/workflows/docker/compose/nginx-compose-cd.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + nginx: + build: + dockerfile: comps/nginx/docker/Dockerfile + image: ${REGISTRY}opea/nginx:${TAG:-latest} diff --git a/.github/workflows/docker/compose/prompt_registry-compose-cd.yaml b/.github/workflows/docker/compose/prompt_registry-compose-cd.yaml new file mode 100644 index 000000000..52923a2f1 --- /dev/null +++ b/.github/workflows/docker/compose/prompt_registry-compose-cd.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# this file should be run in the root of the repo +services: + promptregistry-mongo-server: + build: + dockerfile: comps/prompt_registry/mongo/docker/Dockerfile + image: ${REGISTRY}opea/promptregistry-mongo-server:${TAG:-latest} diff --git a/.github/workflows/docker/compose/reranks-compose-cd.yaml b/.github/workflows/docker/compose/reranks-compose-cd.yaml new file mode 100644 index 000000000..85339c8b8 --- /dev/null +++ b/.github/workflows/docker/compose/reranks-compose-cd.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + reranking-fastrag: + build: + dockerfile: comps/reranks/fastrag/docker/Dockerfile + image: ${REGISTRY}opea/reranking-fastrag:${TAG:-latest} + reranking-langchain-mosec-endpoint: + build: + dockerfile: comps/reranks/langchain-mosec/mosec-docker/Dockerfile + image: ${REGISTRY}opea/reranking-langchain-mosec-endpoint:${TAG:-latest} + reranking-langchain-mosec: + build: + dockerfile: comps/reranks/langchain-mosec/docker/Dockerfile + image: ${REGISTRY}opea/reranking-langchain-mosec:${TAG:-latest} diff --git a/.github/workflows/docker/compose/retrievers-compose.yaml b/.github/workflows/docker/compose/retrievers-compose.yaml index d9de4b27f..289871ed5 100644 --- a/.github/workflows/docker/compose/retrievers-compose.yaml +++ b/.github/workflows/docker/compose/retrievers-compose.yaml @@ -11,3 +11,19 @@ services: build: dockerfile: comps/retrievers/haystack/qdrant/docker/Dockerfile image: ${REGISTRY}opea/retriever-qdrant:${TAG:-latest} + retriever-pgvector: + build: + dockerfile: comps/retrievers/langchain/pgvector/docker/Dockerfile + image: ${REGISTRY}opea/retriever-qdrant:${TAG:-latest} + retriever-pinecone: + build: + dockerfile: comps/retrievers/langchain/pinecone/docker/Dockerfile + image: ${REGISTRY}opea/retriever-pinecone:${TAG:-latest} + retriever-milvus: + build: + dockerfile: comps/retrievers/langchain/milvus/docker/Dockerfile + image: ${REGISTRY}opea/retriever-milvus:${TAG:-latest} + retriever-redis-llamaindex: + build: + dockerfile: comps/retrievers/llamaindex/docker/Dockerfile + image: ${REGISTRY}opea/retriever-redis-llamaindex:${TAG:-latest} diff --git a/.github/workflows/docker/compose/tts-compose.yaml b/.github/workflows/docker/compose/tts-compose.yaml index dd7766345..358aa74a1 100644 --- a/.github/workflows/docker/compose/tts-compose.yaml +++ b/.github/workflows/docker/compose/tts-compose.yaml @@ -4,10 +4,10 @@ # this file should be run in the root of the repo # images used by GenAIExamples: reranking-tei services: - asr: + tts: build: dockerfile: comps/tts/Dockerfile - image: ${REGISTRY}opea/asr:${TAG:-latest} + image: ${REGISTRY}opea/tts:${TAG:-latest} speecht5: build: dockerfile: comps/tts/speecht5/Dockerfile diff --git a/.github/workflows/manual-bom-scan.yml b/.github/workflows/manual-bom-scan.yml new file mode 100644 index 000000000..4a781b254 --- /dev/null +++ b/.github/workflows/manual-bom-scan.yml @@ -0,0 +1,102 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Comps docker images BoM scan on manual event +on: + workflow_dispatch: + inputs: + services: + default: "asr" + description: "List of services to test [agent_langchain,asr,chathistory_mongo,dataprep_milvus...]" #,embeddings,guardrails,knowledgegraphs,llms,lvms,prompt_registry,ragas,reranks,retrievers,tts,vectorstores,web_retrievers]" + required: true + type: string + tag: + default: "comps" + description: "Tag to apply to images" + required: true + type: string + sbom-scan: + default: true + description: "Enable sbom-scan" + required: false + type: boolean + trivy-scan: + default: true + description: "Enable trivy-scan" + required: false + type: boolean + +permissions: read-all +jobs: + get-image-list: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.scan-matrix.outputs.matrix }} + steps: + - name: Checkout out Repo + uses: actions/checkout@v4 + + - name: Set Matrix + id: scan-matrix + run: | + pip install yq + services=($(echo ${{ inputs.services }} | tr ',' ' ')) + image_list=[] + for service in ${services[@]} + do + images=$(cat ${{ github.workspace }}/.github/workflows/docker/compose/${service}-compose.yaml | yq -r '.[]' | jq 'keys' | jq -c '.') + image_list=$(echo ${image_list} | jq -s '.[0] + .[1] | unique' - <(echo ${images})) + done + echo "matrix=$(echo ${image_list} | jq -c '.')" >> $GITHUB_OUTPUT + + scan-license: + needs: get-image-list + runs-on: "docker-build-gaudi" + strategy: + matrix: + image: ${{ fromJson(needs.get-image-list.outputs.matrix) }} + fail-fast: false + steps: + - name: Pull Image + run: | + docker pull ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:latest + # ${{ inputs.tag }} + echo "OPEA_IMAGE_REPO=${OPEA_IMAGE_REPO}" >> $GITHUB_ENV + + - name: SBOM Scan Container + uses: anchore/sbom-action@v0.17.1 + if: ${{ fromJSON(inputs.sbom-scan) }} + with: + image: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }} + output-file: ${{ matrix.image }}-sbom-scan.txt + format: "spdx-json" + + - name: Security Scan Container + uses: aquasecurity/trivy-action@0.24.0 + if: ${{ fromJSON(inputs.trivy-scan) }} + with: + image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ matrix.image }}:${{ inputs.tag }} + output: ${{ matrix.image }}-trivy-scan.txt + format: "table" + exit-code: "1" + ignore-unfixed: true + vuln-type: "os,library" + severity: "CRITICAL,HIGH" + + - name: Cleanup + if: always() + run: docker rmi -f ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ inputs.tag }} + + - uses: actions/upload-artifact@v4.3.4 + if: always() + with: + name: sbom-scan + path: ${{ matrix.image }}-sbom-scan.txt + overwrite: true + + - uses: actions/upload-artifact@v4.3.4 + if: always() + with: + name: trivy-scan + path: ${{ matrix.image }}-trivy-scan.txt + overwrite: true diff --git a/.github/workflows/manual-comps-test.yml b/.github/workflows/manual-comps-test.yml new file mode 100644 index 000000000..bde3bf9fa --- /dev/null +++ b/.github/workflows/manual-comps-test.yml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Comps CD workflow on manual event +on: + workflow_dispatch: + inputs: + services: + default: "asr" + description: "List of services to test [agent,asr,chathistory,dataprep,embeddings,guardrails,llms,lvms,nginx,prompt_registry,reranks,retrievers,tts,web_retrievers]" + required: true + type: string + build: + default: true + description: "Build test required images for Comps" + required: false + type: boolean + test: + default: true + description: "Test comps with docker compose" + required: false + type: boolean + tag: + default: "comps" + description: "Tag to apply to images" + required: true + type: string + mode: + default: "CD" + description: "Whether the test range is CI or CD" + required: false + type: string + +permissions: read-all + +jobs: + get-test-matrix: + runs-on: ubuntu-latest + outputs: + services: ${{ steps.get-matrix.outputs.services }} + steps: + - name: Create Matrix + id: get-matrix + run: | + services=($(echo ${{ inputs.services }} | tr ',' ' ')) + services_json=$(printf '%s\n' "${services[@]}" | sort -u | jq -R '.' | jq -sc '.') + echo "services=$services_json" >> $GITHUB_OUTPUT + + run-services: + needs: [get-test-matrix] + strategy: + matrix: + service: ${{ fromJson(needs.get-test-matrix.outputs.services) }} + fail-fast: false + uses: ./.github/workflows/_comps-workflow.yml + with: + service: ${{ matrix.service }} + tag: ${{ inputs.tag }} + node: gaudi + mode: ${{ inputs.mode }} + test: ${{ inputs.test }} + secrets: inherit diff --git a/.github/workflows/manual-freeze-images.yml b/.github/workflows/manual-freeze-images.yml new file mode 100644 index 000000000..2df3b1978 --- /dev/null +++ b/.github/workflows/manual-freeze-images.yml @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Freeze base images and 3rd part images on manual event + +on: + workflow_dispatch: + +jobs: + freeze-images: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.ref }} + + - name: install skopeo + run: | + sudo apt update + sudo apt -y install skopeo + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Set up Git + run: | + git config --global user.name "NeuralChatBot" + git config --global user.email "grp_neural_chat_bot@intel.com" + git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIComps.git + + - name: Run script + run: | + bash .github/workflows/scripts/freeze_images.sh + + - name: Commit changes + run: | + git add . + git commit -s -m "Freeze base images tag" + git push diff --git a/.github/workflows/manual-freeze-requirements.yml b/.github/workflows/manual-freeze-requirements.yml new file mode 100644 index 000000000..8f00ca423 --- /dev/null +++ b/.github/workflows/manual-freeze-requirements.yml @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Freeze-requirements + +on: + workflow_dispatch: + +jobs: + freeze-requirements: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.ref }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Set up Git + run: | + git config --global user.name "NeuralChatBot" + git config --global user.email "grp_neural_chat_bot@intel.com" + git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIComps.git + + - name: Run script + run: | + bash .github/workflows/scripts/freeze_requirements.sh + + - name: Commit changes + run: | + git add . + git commit -s -m "Freeze requirements" + git push diff --git a/.github/workflows/image-build-on-manual.yml b/.github/workflows/manual-image-build.yml similarity index 100% rename from .github/workflows/image-build-on-manual.yml rename to .github/workflows/manual-image-build.yml diff --git a/.github/workflows/code-scan.yml b/.github/workflows/mix-code-scan.yml similarity index 100% rename from .github/workflows/code-scan.yml rename to .github/workflows/mix-code-scan.yml diff --git a/.github/workflows/megaservice-test.yml b/.github/workflows/mix-megaservice-test.yml similarity index 98% rename from .github/workflows/megaservice-test.yml rename to .github/workflows/mix-megaservice-test.yml index 7e55e2bf2..83c826cd7 100644 --- a/.github/workflows/megaservice-test.yml +++ b/.github/workflows/mix-megaservice-test.yml @@ -10,7 +10,7 @@ on: branches: [main] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - - .github/workflows/megaservice-test.yml + - .github/workflows/mix-megaservice-test.yml - comps/cores/** - requirements.txt - setup.py diff --git a/.github/workflows/trellix.yml b/.github/workflows/mix-trellix.yml similarity index 100% rename from .github/workflows/trellix.yml rename to .github/workflows/mix-trellix.yml diff --git a/.github/workflows/pr-dockerfile-path-scan.yaml b/.github/workflows/pr-dockerfile-path-scan.yaml new file mode 100644 index 000000000..b5e1ce753 --- /dev/null +++ b/.github/workflows/pr-dockerfile-path-scan.yaml @@ -0,0 +1,120 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: File Change Warning + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + Dockerfile-path-change-detection-in-GenAIComps: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check for changed Dockerfile paths in yaml + run: | + set -e + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + if grep -q "$file" .github/workflows/docker/compose/*.yaml; then + is_use="TRUE" + used_files+="$file " + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding yaml in GenAIComps/.github/workflows/docker/compose and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi + + - name: Check for changed Dockerfile paths in readme + run: | + set -e + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + if grep -q "$file" ./**/*.md; then + is_use="TRUE" + used_files+="$file " + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding README in GenAIComps and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi + + Dockerfile-path-change-detection-in-GenAIExamples: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Clone repo GenAIExamples + run: | + cd .. + git clone https://github.com/opea-project/GenAIExamples + + - name: Check for changed Dockerfile paths + run: | + set -e + shopt -s globstar + cd ${{github.workspace}} + is_use="FALSE" + used_files="" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile' | cut -f2)" + if [ -n "$changed_files" ]; then + for file in $changed_files; do + matching_files=$(grep -rl "$file" ../GenAIExamples/**/*.md) + if [ -n "$matching_files" ]; then + is_use="TRUE" + used_files+="$file " + echo "Modified Dockerfile '$file' is referenced in:" + echo "$matching_files" + fi + done + fi + + if [[ "$is_use" == "TRUE" ]]; then + echo "Warning: Changed Dockerfile paths:" + echo "$used_files" + echo "Please modify the corresponding README in GenAIExamples repo and ask suyue.chen@intel.com for final confirmation." + exit 1 + fi diff --git a/.github/workflows/pr-examples-test.yml b/.github/workflows/pr-examples-test.yml new file mode 100644 index 000000000..e3fe2b6ab --- /dev/null +++ b/.github/workflows/pr-examples-test.yml @@ -0,0 +1,71 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Example-test + +on: + pull_request_target: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped + paths: + - .github/workflows/pr-examples-test.yml + - comps/cores/** + - comps/embeddings/langchain/** + - comps/retrievers/langchain/redis/** + - comps/reranks/tei/** + - comps/llms/text-generation/tgi/** + - comps/dataprep/redis/langchain/** + - requirements.txt + - "!**.md" + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + example-test: + runs-on: gaudi-01-3 + steps: + - name: Clean Up Working Directory + run: | + sudo rm -rf ${{github.workspace}}/* || true + echo y | docker system prune + docker rmi $(docker images --filter reference="*/*:comps" -q) || true + + - name: Checkout out Repo + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.number }}/merge" + + - name: Run ChatQnA + env: + HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + run: | + cd ../ && sudo rm -rf GenAIExamples + git clone https://github.com/opea-project/GenAIExamples.git + cd GenAIExamples/ChatQnA/docker + cp -r ${{ github.workspace }}/../GenAIComps . + + cd ../tests + sed -i '/GenAIComps.git/d' test_chatqna_on_gaudi.sh + cat test_chatqna_on_gaudi.sh + + echo "Run test..." + echo "LOG_DIR=$(pwd)" >> $GITHUB_ENV + export IMAGE_TAG="comps" + timeout 50m bash test_chatqna_on_gaudi.sh + + - name: Clean up container + if: cancelled() || failure() + run: | + cd ${{ github.workspace }}/../GenAIExamples/ChatQnA/docker/gaudi + docker compose stop && docker compose rm -f + docker system prune -f + + - name: Publish pipeline artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: "Examples-Test-Logs" + path: ${{ env.LOG_DIR }}/*.log diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/pr-microservice-test.yml similarity index 80% rename from .github/workflows/microservice-test.yml rename to .github/workflows/pr-microservice-test.yml index d8000b93d..786b887c5 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/pr-microservice-test.yml @@ -5,14 +5,13 @@ name: MicroService-test on: pull_request_target: - branches: [main] + branches: ["main", "*rc"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - comps/** - tests/** - "!**.md" - - "!**.txt" - - .github/workflows/microservice-test.yml + - .github/workflows/pr-microservice-test.yml # If there is a new commit, the previous jobs will be canceled concurrency: @@ -21,7 +20,7 @@ concurrency: jobs: job1: - uses: ./.github/workflows/reuse-get-test-matrix.yml + uses: ./.github/workflows/_get-test-matrix.yml Microservice-test: needs: job1 @@ -31,7 +30,10 @@ jobs: continue-on-error: true steps: - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* + run: | + sudo rm -rf ${{github.workspace}}/* + docker system prune -f + docker rmi $(docker images --filter reference="*/*:comps" -q) || true - name: Checkout out Repo uses: actions/checkout@v4 @@ -43,20 +45,21 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} + PINECONE_KEY: ${{ secrets.PINECONE_KEY }} service_path: ${{ matrix.service }} hardware: ${{ matrix.hardware }} run: | cd tests service=$(echo $service_path | tr '/' '_') echo "service=${service}" >> $GITHUB_ENV - if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi + if [ -f test_${service}.sh ]; then timeout 60m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi - name: Clean up container if: cancelled() || failure() run: | cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - echo y | docker system prune --all + docker system prune -f - name: Publish pipeline artifact if: ${{ !cancelled() }} diff --git a/.github/workflows/image-build-on-push.yml b/.github/workflows/push-image-build.yml similarity index 98% rename from .github/workflows/image-build-on-push.yml rename to .github/workflows/push-image-build.yml index a72d13a61..5472111dd 100644 --- a/.github/workflows/image-build-on-push.yml +++ b/.github/workflows/push-image-build.yml @@ -10,7 +10,7 @@ on: - comps/** - "!**.md" - "!**.txt" - - .github/workflows/image-build-on-push.yml + - .github/workflows/push-image-build.yml concurrency: group: ${{ github.workflow }}-${{ github.ref }}-on-push diff --git a/.github/workflows/reuse-image-build.yml b/.github/workflows/reuse-image-build.yml deleted file mode 100644 index e2ed6883b..000000000 --- a/.github/workflows/reuse-image-build.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: Image Build -permissions: read-all -on: - workflow_call: - inputs: - micro_service: - required: true - type: string - -jobs: - micro-image-build: - continue-on-error: true - strategy: - matrix: - node: [docker-build-xeon, docker-build-gaudi] - runs-on: ${{ matrix.node }} - steps: - - name: Checkout out Repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Building MicroService Docker Image - id: build-microservice-image - env: - micro_service: ${{ inputs.micro_service }} - hardware: ${{ matrix.node }} - run: | - bash .github/workflows/scripts/docker_images_build_push.sh ${micro_service} ${hardware} diff --git a/.github/workflows/image-build-on-schedule.yml b/.github/workflows/schedule-image-build.yml similarity index 100% rename from .github/workflows/image-build-on-schedule.yml rename to .github/workflows/schedule-image-build.yml diff --git a/.github/workflows/scripts/docker_images_build_push.sh b/.github/workflows/scripts/docker_images_build_push.sh deleted file mode 100644 index eaf4d78f3..000000000 --- a/.github/workflows/scripts/docker_images_build_push.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKSPACE=$PWD -IMAGE_REPO=${IMAGE_REPO:-$OPEA_IMAGE_REPO} -IMAGE_TAG=${IMAGE_TAG:-latest} - -function docker_build() { - # docker_build - IMAGE_NAME=$1 - micro_service=$2 - dockerfile_path=${WORKSPACE}/comps/${micro_service} - if [[ "$IMAGE_NAME" == *"gaudi" ]]; then - dockerfile_name="Dockerfile_hpu" - else - dockerfile_name="Dockerfile" - fi - if [ -f "$dockerfile_path/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/$dockerfile_name" - elif [ -f "$dockerfile_path/docker/$dockerfile_name" ]; then - DOCKERFILE_PATH="$dockerfile_path/docker/$dockerfile_name" - else - echo "Dockerfile not found" - exit 1 - fi - echo "Building ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG using Dockerfile $DOCKERFILE_PATH" - - docker build --no-cache -t ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -f $DOCKERFILE_PATH . - docker push ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG - docker rmi ${IMAGE_REPO}${IMAGE_NAME}:$IMAGE_TAG -} - -micro_service=$1 -hardware=$(echo $2 | cut -d- -f3) -case ${micro_service} in - "asr"|"tts") - IMAGE_NAME="opea/${micro_service}" - ;; - "embeddings/langchain") - IMAGE_NAME="opea/embedding-tei" - ;; - "retrievers/langchain/redis") - IMAGE_NAME="opea/retriever-redis" - ;; - "reranks/tei") - IMAGE_NAME="opea/reranking-tei" - ;; - "llms/text-generation/tgi") - IMAGE_NAME="opea/llm-tgi" - ;; - "dataprep/redis/langchain") - IMAGE_NAME="opea/dataprep-redis" - ;; - "llms/summarization/tgi") - IMAGE_NAME="opea/llm-docsum-tgi" - ;; - "llms/faq-generation/tgi") - IMAGE_NAME="opea/llm-faqgen-tgi" - ;; - "web_retrievers/langchain/chroma") - IMAGE_NAME="opea/web-retriever-chroma" - ;; - "tts/speecht5") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/speecht5-gaudi"; else IMAGE_NAME="opea/speecht5"; fi - ;; - "asr/whisper") - if [ "${hardware}" == "gaudi" ]; then IMAGE_NAME="opea/whisper-gaudi"; else IMAGE_NAME="opea/whisper"; fi - ;; - *) - echo "Not supported yet" - exit 0 - ;; -esac -docker_build "${IMAGE_NAME}" "${micro_service}" diff --git a/.github/workflows/scripts/freeze_images.sh b/.github/workflows/scripts/freeze_images.sh new file mode 100644 index 000000000..27743fd90 --- /dev/null +++ b/.github/workflows/scripts/freeze_images.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +declare -A dict +dict["langchain/langchain"]="docker://docker.io/langchain/langchain" +# dict["vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2"]="docker://vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2" +dict["opea/habanalabs:1.16.1-pytorch-installer-2.2.2"]="docker://docker.io/opea/habanalabs:1.16.1-pytorch-installer-2.2.2" + +function get_latest_version() { + repo_image=$1 + versions=$(skopeo list-tags ${dict[$repo_image]} | jq -r '.Tags[]') + printf "version list:\n$versions\n" + latest_version=$(printf "%s\n" "${versions[@]}" | grep -E '^[\.0-9\-]+$' | sort -V | tail -n 1) + echo "latest version: $latest_version" + replace_image_version $repo_image $latest_version +} + +function replace_image_version() { + repo_image=$1 + version=$2 + if [[ -z "$version" ]]; then + echo "version is empty" + else + echo "replace $repo_image:latest with $repo_image:$version" + find . -name "Dockerfile*" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g" + find . -name "*.yaml" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g" + find . -name "*.md" | xargs sed -i "s|$repo_image:latest[A-Za-z0-9\-]*|$repo_image:$version|g" + fi +} + +function check_branch_name() { + if [[ "$GITHUB_REF_NAME" == "main" ]]; then + echo "$GITHUB_REF_NAME is protected branch" + exit 0 + else + echo "branch name is $GITHUB_REF_NAME" + fi +} + +function main() { + check_branch_name + for repo_image in "${!dict[@]}"; do + echo "::group::check $repo_image" + get_latest_version $repo_image + echo "::endgroup::" + done +} + +main diff --git a/.github/workflows/scripts/freeze_requirements.sh b/.github/workflows/scripts/freeze_requirements.sh new file mode 100644 index 000000000..431dadaef --- /dev/null +++ b/.github/workflows/scripts/freeze_requirements.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +function freeze() { + local file=$1 + local folder=$(dirname "$file") + local keep_origin_packages="true" + echo "::group::Check $file ..." + pip-compile \ + --no-upgrade \ + --no-annotate \ + --no-header \ + --output-file "$folder/freeze.txt" \ + "$file" + echo "::endgroup::" + + if [[ -e "$folder/freeze.txt" ]]; then + if [[ "$keep_origin_packages" == "true" ]]; then + # fix corner cases + sed -i '/^\s*#/d; s/#.*//; /^\s*$/d; s/ //g' "$file" + sed -i '/^\s*#/d; s/#.*//; /^\s*$/d; s/ //g; s/huggingface-hub\[inference\]/huggingface-hub/g; s/uvicorn\[standard\]/uvicorn/g' "$folder/freeze.txt" + if grep -q '^transformers$' $file && ! grep -q '^transformers\[sentencepiece\]$' $file; then + sed -i "s/transformers\[sentencepiece\]/transformers/" "$folder/freeze.txt" + fi + packages1=$(tr '><' '=' <"$file" | cut -d'=' -f1 | tr '[:upper:]' '[:lower:]' | sed 's/[-_]/-/g') + packages2=$(cut -d'=' -f1 "$folder/freeze.txt" | tr '[:upper:]' '[:lower:]' | sed 's/[-_]/-/g') + common_packages=$(comm -12 <(echo "$packages2" | sort) <(echo "$packages1" | sort)) + grep '^git\+' "$file" >temp_file || touch temp_file + rm -rf "$file" && mv temp_file "$file" + while IFS= read -r line; do + package=$(echo "$line" | cut -d'=' -f1) + package_transformed=$(echo "$package" | tr '[:upper:]' '[:lower:]' | sed 's/[_-]/-/g') + pattern=$(echo "$package_transformed" | sed 's/\[/\\\[/g; s/\]/\\\]/g') + if echo "$common_packages" | grep -q "^$pattern$"; then + echo "$line" >>"$file" + fi + done <"$folder/freeze.txt" + rm "$folder/freeze.txt" + else + mv "$folder/freeze.txt" "$file" + fi + fi +} + +function check_branch_name() { + if [[ "$GITHUB_REF_NAME" == "main" ]]; then + echo "$GITHUB_REF_NAME is protected branch" + exit 0 + else + echo "branch name is $GITHUB_REF_NAME" + fi +} + +function main() { + check_branch_name + echo "::group::pip install pip-tools" && pip install pip-tools --upgrade && echo "::endgroup::" + export -f freeze + find . -name "requirements.txt" | xargs -n 1 -I {} bash -c 'freeze "$@"' _ {} +} + +main diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 94b6ef31e..000000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: File Change Warning - -on: - pull_request: - branches: [main] - types: [opened, reopened, ready_for_review, synchronize] - -# If there is a new commit, the previous jobs will be canceled -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - file-change-detection: - runs-on: ubuntu-latest - steps: - - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* - - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Clone repo GenAIExamples - run: | - cd .. - git clone https://github.com/opea-project/GenAIExamples - - - name: Check for changed Dockerfile paths - run: | - shopt -s globstar - cd ${{github.workspace}} - is_use="FALSE" - used_files="" - merged_commit=$(git log -1 --format='%H') - changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile' | cut -f2)" - if [ -n "$changed_files" ]; then - for file in $changed_files; do - if grep -q "$file" ../GenAIExamples/**/*.md; then - is_use="TRUE" - used_files+="$file " - fi - done - fi - - if [[ "$is_use" == "TRUE" ]]; then - echo "Warning: Changed Dockerfile paths:" - echo "$used_files" - echo "Please modify the corresponding README in GenAIExamples repo and ask suyue.chen@intel.com for final confirmation." - exit 1 - fi diff --git a/.gitignore b/.gitignore index 23ed2b293..c07df7717 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ __pycache__ -venv/ \ No newline at end of file +venv/ +*.egg-info/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5686fa13b..e6d820b83 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,6 +15,8 @@ repos: - id: check-yaml - id: debug-statements exclude: venv/ + - id: mixed-line-ending + args: [--fix=lf] - id: requirements-txt-fixer - id: trailing-whitespace files: (.*\.(py|rst|cmake|yaml|yml|json|ts|js|html|svelte|sh))$ diff --git a/README.md b/README.md index 8c3af2373..e1b48a08c 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,6 @@ A `Microservices` can be created by using the decorator `register_microservice`. ```python from langchain_community.embeddings import HuggingFaceHubEmbeddings -from langsmith import traceable from comps import register_microservice, EmbedDoc, ServiceType, TextDoc @@ -187,7 +186,6 @@ from comps import register_microservice, EmbedDoc, ServiceType, TextDoc input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: embed_vector = embeddings.embed_query(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) diff --git a/comps/__init__.py b/comps/__init__.py index b483f46a7..c58ae42fe 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -12,12 +12,18 @@ GeneratedDoc, LLMParamsDoc, SearchedDoc, + SearchedMultimodalDoc, RerankedDoc, TextDoc, RAGASParams, RAGASScores, GraphDoc, LVMDoc, + LVMVideoDoc, + ImageDoc, + TextImageDoc, + MultimodalDoc, + EmbedMultimodalDoc, ) # Constants @@ -36,7 +42,9 @@ TranslationGateway, SearchQnAGateway, AudioQnAGateway, + RetrievalToolGateway, FaqGenGateway, + VisualQnAGateway, ) # Telemetry @@ -44,3 +52,6 @@ # Statistics from comps.cores.mega.base_statistics import statistics_dict, register_statistics + +# Logger +from comps.cores.mega.logger import CustomLogger diff --git a/comps/agent/langchain/README.md b/comps/agent/langchain/README.md index 286e95508..bc694cdf1 100644 --- a/comps/agent/langchain/README.md +++ b/comps/agent/langchain/README.md @@ -1,39 +1,70 @@ -# langchain Agent Microservice +# Agent Microservice -The langchain agent model refers to a framework that integrates the reasoning capabilities of large language models (LLMs) with the ability to take actionable steps, creating a more sophisticated system that can understand and process information, evaluate situations, take appropriate actions, communicate responses, and track ongoing situations. +## 1. Overview -![Architecture Overview](agent_arch.jpg) +This agent microservice is built on Langchain/Langgraph frameworks. Agents integrate the reasoning capabilities of large language models (LLMs) with the ability to take actionable steps, creating a more sophisticated system that can understand and process information, evaluate situations, take appropriate actions, communicate responses, and track ongoing situations. + +### 1.1 Supported agent types + +We currently support the following types of agents: + +1. ReAct: use `react_langchain` or `react_langgraph` as strategy. First introduced in this seminal [paper](https://arxiv.org/abs/2210.03629). The ReAct agent engages in "reason-act-observe" cycles to solve problems. Please refer to this [doc](https://python.langchain.com/v0.2/docs/how_to/migrate_agent/) to understand the differences between the langchain and langgraph versions of react agents. +2. RAG agent: `rag_agent` strategy. This agent is specifically designed for improving RAG performance. It has the capability to rephrase query, check relevancy of retrieved context, and iterate if context is not relevant. +3. Plan and execute: `plan_execute` strategy. This type of agent first makes a step-by-step plan given a user request, and then execute the plan sequentially (or in parallel, to be implemented in future). If the execution results can solve the problem, then the agent will output an answer; otherwise, it will replan and execute again. + For advanced developers who want to implement their own agent strategies, please refer to [Section 5](#5-customize-agent-strategy) below. + +### 1.2 LLM engine + +Agents use LLM for reasoning and planning. We support 2 options of LLM engine: + +1. Open-source LLMs served with TGI-gaudi. To use open-source llms, follow the instructions in [Section 2](#222-start-microservices) below. Note: we recommend using state-of-the-art LLMs, such as llama3.1-70B-instruct, to get higher success rate. +2. OpenAI LLMs via API calls. To use OpenAI llms, specify `llm_engine=openai` and `export OPENAI_API_KEY=` + +### 1.3 Tools -# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ +The tools are registered with a yaml file. We support the following types of tools: -## 1.1 Install Requirements +1. Endpoint: user to provide url +2. User-defined python functions. This is usually used to wrap endpoints with request post or simple pre/post-processing. +3. Langchain tool modules. + Examples of how to register tools can be found in [Section 4](#-4-provide-your-own-tools) below. + +### 1.4 Agent APIs + +Currently we have implemented OpenAI chat completion compatible API for agents. We are working to support OpenAI assistants APIs. + +# ๐Ÿš€2. Start Agent Microservice + +## 2.1 Option 1: with Python + +### 2.1.1 Install Requirements ```bash cd comps/agent/langchain/ pip install -r requirements.txt ``` -## 1.2 Start Microservice with Python Script +### 2.1.2 Start Microservice with Python Script ```bash cd comps/agent/langchain/ python agent.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## 2.2 Option 2. Start Microservice with Docker -## Build Microservices +### 2.2.1 Build Microservices ```bash cd GenAIComps/ # back to GenAIComps/ folder docker build -t opea/comps-agent-langchain:latest -f comps/agent/langchain/docker/Dockerfile . ``` -## start microservices +### 2.2.2 Start microservices ```bash export ip_address=$(hostname -I | awk '{print $1}') -export model=meta-llama/Meta-Llama-3-8B-Instruct +export model=mistralai/Mistral-7B-Instruct-v0.3 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} @@ -53,10 +84,10 @@ docker logs comps-langchain-agent-endpoint > debug mode > > ```bash -> docker run --rm --runtime=runc --name="comps-langchain-agent-endpoint" -v ./comps/agent/langchain/:/home/user/comps/agent/langchain/ -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} --env-file ${agent_env} opea/comps-agent-langchain:latest +> docker run --rm --runtime=runc --name="comps-langchain-agent-endpoint" -v ./comps/agent/langchain/:/home/user/comps/agent/langchain/ -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e ip_address=${ip_address} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:latest > ``` -# ๐Ÿš€3. Validate Microservice +# ๐Ÿš€ 3. Validate Microservice Once microservice starts, user can use below script to invoke. @@ -73,7 +104,7 @@ data: [DONE] ``` -# ๐Ÿš€4. Provide your own tools +# ๐Ÿš€ 4. Provide your own tools - Define tools @@ -148,3 +179,8 @@ data: 'The weather information in Austin is not available from the Open Platform data: [DONE] ``` + +# 5. Customize agent strategy + +For advanced developers who want to implement their own agent strategies, you can add a separate folder in `src\strategy`, implement your agent by inherit the `BaseAgent` class, and add your strategy into the `src\agent.py`. The architecture of this agent microservice is shown in the diagram below as a reference. +![Architecture Overview](agent_arch.jpg) diff --git a/comps/agent/langchain/agent.py b/comps/agent/langchain/agent.py index 4d946eda7..9530d7bb6 100644 --- a/comps/agent/langchain/agent.py +++ b/comps/agent/langchain/agent.py @@ -5,6 +5,8 @@ import os import pathlib import sys +from datetime import datetime +from typing import Union from fastapi.responses import StreamingResponse @@ -12,36 +14,192 @@ comps_path = os.path.join(cur_path, "../../../") sys.path.append(comps_path) -from comps import LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice from comps.agent.langchain.src.agent import instantiate_agent +from comps.agent.langchain.src.global_var import assistants_global_kv, threads_global_kv +from comps.agent.langchain.src.thread import instantiate_thread_memory, thread_completion_callback from comps.agent.langchain.src.utils import get_args +from comps.cores.proto.api_protocol import ( + AssistantsObject, + ChatCompletionRequest, + CreateAssistantsRequest, + CreateMessagesRequest, + CreateRunResponse, + CreateThreadsRequest, + MessageContent, + MessageObject, + RunObject, + ThreadObject, +) + +logger = CustomLogger("comps-react-agent") +logflag = os.getenv("LOGFLAG", False) args, _ = get_args() @register_microservice( - name="opea_service@comps-react-agent", + name="opea_service@comps-chat-agent", service_type=ServiceType.LLM, endpoint="/v1/chat/completions", host="0.0.0.0", port=args.port, - input_datatype=LLMParamsDoc, ) -def llm_generate(input: LLMParamsDoc): +async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest]): + if logflag: + logger.info(input) # 1. initialize the agent - print("args: ", args) + if logflag: + logger.info("args: ", args) + input.streaming = args.streaming config = {"recursion_limit": args.recursion_limit} agent_inst = instantiate_agent(args, args.strategy) - print(type(agent_inst)) + if logflag: + logger.info(type(agent_inst)) + + if isinstance(input, LLMParamsDoc): + # use query as input + input_query = input.query + else: + # openai compatible input + if isinstance(input.messages, str): + input_query = input.messages + else: + input_query = input.messages[-1]["content"] # 2. prepare the input for the agent if input.streaming: - return StreamingResponse(agent_inst.stream_generator(input.query, config), media_type="text/event-stream") + print("-----------STREAMING-------------") + return StreamingResponse(agent_inst.stream_generator(input_query, config), media_type="text/event-stream") + + else: + print("-----------NOT STREAMING-------------") + response = await agent_inst.non_streaming_run(input_query, config) + print("-----------Response-------------") + print(response) + return GeneratedDoc(text=response, prompt=input_query) + + +@register_microservice( + name="opea_service@comps-chat-agent", + endpoint="/v1/assistants", + host="0.0.0.0", + port=args.port, +) +def create_assistants(input: CreateAssistantsRequest): + # 1. initialize the agent + print("args: ", args) + agent_inst = instantiate_agent(args, args.strategy, with_memory=True) + agent_id = agent_inst.id + created_at = int(datetime.now().timestamp()) + with assistants_global_kv as g_assistants: + g_assistants[agent_id] = (agent_inst, created_at) + print(f"Record assistant inst {agent_id} in global KV") + + # get current time in string format + return AssistantsObject( + id=agent_id, + created_at=created_at, + ) + + +@register_microservice( + name="opea_service@comps-chat-agent", + endpoint="/v1/threads", + host="0.0.0.0", + port=args.port, +) +def create_threads(input: CreateThreadsRequest): + # create a memory KV for the thread + thread_inst, thread_id = instantiate_thread_memory() + created_at = int(datetime.now().timestamp()) + status = "ready" + with threads_global_kv as g_threads: + g_threads[thread_id] = (thread_inst, created_at, status) + print(f"Record thread inst {thread_id} in global KV") + + return ThreadObject( + id=thread_id, + created_at=created_at, + ) + +@register_microservice( + name="opea_service@comps-chat-agent", + endpoint="/v1/threads/{thread_id}/messages", + host="0.0.0.0", + port=args.port, +) +def create_messages(thread_id, input: CreateMessagesRequest): + with threads_global_kv as g_threads: + thread_inst, _, _ = g_threads[thread_id] + + # create a memory KV for the message + role = input.role + if isinstance(input.content, str): + query = input.content else: - # TODO: add support for non-streaming mode - return StreamingResponse(agent_inst.stream_generator(input.query, config), media_type="text/event-stream") + query = input.content[-1]["text"] + msg_id, created_at = thread_inst.add_query(query) + + structured_content = MessageContent(text=query) + return MessageObject( + id=msg_id, + created_at=created_at, + thread_id=thread_id, + role=role, + content=[structured_content], + ) + + +@register_microservice( + name="opea_service@comps-chat-agent", + endpoint="/v1/threads/{thread_id}/runs", + host="0.0.0.0", + port=args.port, +) +def create_run(thread_id, input: CreateRunResponse): + with threads_global_kv as g_threads: + thread_inst, _, status = g_threads[thread_id] + + if status == "running": + return "[error] Thread is already running, need to cancel the current run or wait for it to finish" + + agent_id = input.assistant_id + with assistants_global_kv as g_assistants: + agent_inst, _ = g_assistants[agent_id] + + config = {"recursion_limit": args.recursion_limit} + input_query = thread_inst.get_query() + try: + return StreamingResponse( + thread_completion_callback(agent_inst.stream_generator(input_query, config, thread_id), thread_id), + media_type="text/event-stream", + ) + except Exception as e: + with threads_global_kv as g_threads: + thread_inst, created_at, status = g_threads[thread_id] + g_threads[thread_id] = (thread_inst, created_at, "ready") + return f"An error occurred: {e}. This thread is now set as ready" + + +@register_microservice( + name="opea_service@comps-chat-agent", + endpoint="/v1/threads/{thread_id}/runs/cancel", + host="0.0.0.0", + port=args.port, +) +def cancel_run(thread_id): + with threads_global_kv as g_threads: + thread_inst, created_at, status = g_threads[thread_id] + if status == "ready": + return "Thread is not running, no need to cancel" + elif status == "try_cancel": + return "cancel request is submitted" + else: + g_threads[thread_id] = (thread_inst, created_at, "try_cancel") + return "submit cancel request" if __name__ == "__main__": - opea_microservices["opea_service@comps-react-agent"].start() + opea_microservices["opea_service@comps-chat-agent"].start() diff --git a/comps/agent/langchain/docker/Dockerfile b/comps/agent/langchain/docker/Dockerfile index 62b4ea2bc..9a966b952 100644 --- a/comps/agent/langchain/docker/Dockerfile +++ b/comps/agent/langchain/docker/Dockerfile @@ -4,6 +4,7 @@ FROM python:3.11-slim ENV LANG=C.UTF-8 +ARG ARCH=cpu RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ @@ -19,8 +20,11 @@ USER user COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ - if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ - pip install --no-cache-dir -r /home/user/comps/agent/langchain/requirements.txt + if [ ${ARCH} = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/agent/langchain/requirements.txt; \ + else \ + pip install --no-cache-dir -r /home/user/comps/agent/langchain/requirements.txt; \ + fi ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/agent/langchain/requirements.txt b/comps/agent/langchain/requirements.txt index 16859ac2c..6e4dd1012 100644 --- a/comps/agent/langchain/requirements.txt +++ b/comps/agent/langchain/requirements.txt @@ -4,12 +4,12 @@ docarray[full] #used by tools duckduckgo-search fastapi -huggingface_hub -langchain #==0.1.12 +huggingface_hub==0.24.0 +langchain==0.2.9 langchain-huggingface langchain-openai -langchain_community -langchainhub +langchain_community==0.2.7 +langchainhub==0.1.20 langgraph langsmith numpy diff --git a/comps/agent/langchain/src/agent.py b/comps/agent/langchain/src/agent.py index 4c20a3858..9accf8a35 100644 --- a/comps/agent/langchain/src/agent.py +++ b/comps/agent/langchain/src/agent.py @@ -2,20 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 -def instantiate_agent(args, strategy="react"): - if strategy == "react": +def instantiate_agent(args, strategy="react_langchain", with_memory=False): + if strategy == "react_langchain": from .strategy.react import ReActAgentwithLangchain - return ReActAgentwithLangchain(args) + return ReActAgentwithLangchain(args, with_memory) + elif strategy == "react_langgraph": + from .strategy.react import ReActAgentwithLanggraph + + return ReActAgentwithLanggraph(args, with_memory) elif strategy == "plan_execute": from .strategy.planexec import PlanExecuteAgentWithLangGraph - return PlanExecuteAgentWithLangGraph(args) - elif strategy == "agentic_rag": - from .strategy.agentic_rag import RAGAgentwithLanggraph + return PlanExecuteAgentWithLangGraph(args, with_memory) - return RAGAgentwithLanggraph(args) - else: - from .strategy.base_agent import BaseAgent, BaseAgentState + elif strategy == "rag_agent": + from .strategy.ragagent import RAGAgent - return BaseAgent(args) + return RAGAgent(args, with_memory) + else: + raise ValueError(f"Agent strategy: {strategy} not supported!") diff --git a/comps/agent/langchain/src/global_var.py b/comps/agent/langchain/src/global_var.py new file mode 100644 index 000000000..02a2083b8 --- /dev/null +++ b/comps/agent/langchain/src/global_var.py @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import threading + + +class ThreadSafeDict(dict): + def __init__(self, *p_arg, **n_arg): + dict.__init__(self, *p_arg, **n_arg) + self._lock = threading.Lock() + + def __enter__(self): + self._lock.acquire() + return self + + def __exit__(self, type, value, traceback): + self._lock.release() + + +assistants_global_kv = ThreadSafeDict() +threads_global_kv = ThreadSafeDict() diff --git a/comps/agent/langchain/src/strategy/agentic_rag/README.md b/comps/agent/langchain/src/strategy/agentic_rag/README.md deleted file mode 100644 index 3d6e341a5..000000000 --- a/comps/agent/langchain/src/strategy/agentic_rag/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Agentic Rag - -This strategy is a practise provided with [LangGraph](https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_agentic_rag) -This agent strategy includes steps listed below: - -1. RagAgent - decide if this query need to get extra help - - - Yes: Goto 'Retriever' - - No: Complete the query with Final answer - -2. Retriever: - - - Get relative Info from tools, Goto 'DocumentGrader' - -3. DocumentGrader - Judge retrieved info relevance based query - - - Yes: Complete the query with Final answer - - No: Goto 'Rewriter' - -4. Rewriter - Rewrite the query and Goto 'RagAgent' - -![Agentic Rag Workflow](https://blog.langchain.dev/content/images/size/w1000/2024/02/image-16.png) diff --git a/comps/agent/langchain/src/strategy/agentic_rag/prompt.py b/comps/agent/langchain/src/strategy/agentic_rag/prompt.py deleted file mode 100644 index 1f68db32e..000000000 --- a/comps/agent/langchain/src/strategy/agentic_rag/prompt.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from langchain_core.prompts import ChatPromptTemplate, PromptTemplate - -rlm_rag_prompt = ChatPromptTemplate.from_messages( - [ - ( - "human", - "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.Question: {question} Context: {context} Answer:", - ), - ] -) - -hwchase17_react_prompt = PromptTemplate.from_template( - "Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}" -) diff --git a/comps/agent/langchain/src/strategy/base_agent.py b/comps/agent/langchain/src/strategy/base_agent.py index a0e35e912..ca0e12a96 100644 --- a/comps/agent/langchain/src/strategy/base_agent.py +++ b/comps/agent/langchain/src/strategy/base_agent.py @@ -1,6 +1,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +from uuid import uuid4 + from ..tools import get_tools_descriptions from ..utils import setup_llm @@ -10,6 +12,8 @@ def __init__(self, args) -> None: self.llm_endpoint = setup_llm(args) self.tools_descriptions = get_tools_descriptions(args.tools) self.app = None + self.memory = None + self.id = f"assistant_{self.__class__.__name__}_{uuid4()}" print(self.tools_descriptions) def compile(self): @@ -17,3 +21,6 @@ def compile(self): def execute(self, state: dict): pass + + def non_streaming_run(self, query, config): + raise NotImplementedError diff --git a/comps/agent/langchain/src/strategy/planexec/planner.py b/comps/agent/langchain/src/strategy/planexec/planner.py index 601e28bf8..c33c906f4 100644 --- a/comps/agent/langchain/src/strategy/planexec/planner.py +++ b/comps/agent/langchain/src/strategy/planexec/planner.py @@ -16,9 +16,11 @@ from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.utils.json import parse_partial_json from langchain_huggingface import ChatHuggingFace +from langgraph.checkpoint.memory import MemorySaver from langgraph.graph import END, START, StateGraph from langgraph.graph.message import add_messages +from ...global_var import threads_global_kv from ...utils import has_multi_tool_inputs, tool_renderer from ..base_agent import BaseAgent from .prompt import ( @@ -221,7 +223,7 @@ def __call__(self, state): class PlanExecuteAgentWithLangGraph(BaseAgent): - def __init__(self, args): + def __init__(self, args, with_memory=False): super().__init__(args) # Define Node @@ -231,37 +233,39 @@ def __init__(self, args): execute_step = Executor(self.llm_endpoint, args.model, self.tools_descriptions) make_answer = AnswerMaker(self.llm_endpoint, args.model) - # answer_checker = FinalAnswerChecker(self.llm_endpoint, args.model) - # replan_step = Replanner(self.llm_endpoint, args.model, answer_checker) - # Define Graph workflow = StateGraph(PlanExecute) workflow.add_node("planner", plan_step) workflow.add_node("plan_executor", execute_step) workflow.add_node("answer_maker", make_answer) - # workflow.add_node("replan", replan_step) # Define edges workflow.add_edge(START, "planner") workflow.add_edge("planner", "plan_executor") workflow.add_edge("plan_executor", "answer_maker") workflow.add_edge("answer_maker", END) - # workflow.add_conditional_edges( - # "answer_maker", - # answer_checker, - # {END: END, "replan": "replan"}, - # ) - # workflow.add_edge("replan", "plan_executor") - # Finally, we compile it! - self.app = workflow.compile() + if with_memory: + self.app = workflow.compile(checkpointer=MemorySaver()) + else: + self.app = workflow.compile() def prepare_initial_state(self, query): return {"messages": [("user", query)]} - async def stream_generator(self, query, config): + async def stream_generator(self, query, config, thread_id=None): initial_state = self.prepare_initial_state(query) + if thread_id is not None: + config["configurable"] = {"thread_id": thread_id} async for event in self.app.astream(initial_state, config=config): + if thread_id is not None: + with threads_global_kv as g_threads: + thread_inst, created_at, status = g_threads[thread_id] + if status == "try_cancel": + yield "[thread_completion_callback] signal to cancel! Changed status to ready" + print("[thread_completion_callback] signal to cancel! Changed status to ready") + g_threads[thread_id] = (thread_inst, created_at, "ready") + break for node_name, node_state in event.items(): yield f"--- CALL {node_name} ---\n" for k, v in node_state.items(): diff --git a/comps/agent/langchain/src/strategy/ragagent/README.md b/comps/agent/langchain/src/strategy/ragagent/README.md new file mode 100644 index 000000000..23114e1f9 --- /dev/null +++ b/comps/agent/langchain/src/strategy/ragagent/README.md @@ -0,0 +1,31 @@ +# RAG Agent + +This agent is specifically designed to improve answer quality over conventional RAG. +This agent strategy includes steps listed below: + +1. QueryWriter + This is an llm with tool calling capability, it decides if tool calls are needed to answer the user query or it can answer with llm's parametric knowledge. + + - Yes: Rephrase the query in the form of a tool call to the Retriever tool, and send the rephrased query to 'Retriever'. The rephrasing is important as user queries may be not be clear and simply using user query may not retrieve relevant documents. + - No: Complete the query with Final answer + +2. Retriever: + + - Get related documents from a retrieval tool, then send the documents to 'DocumentGrader'. Note: The retrieval tool here is broad-sense, which can be a text retriever over a proprietary knowledge base, a websearch API, knowledge graph API, SQL database API etc. + +3. DocumentGrader + Judge retrieved info relevance with respect to the user query + + - Yes: Go to TextGenerator + - No: Go back to QueryWriter to rewrite query. + +4. TextGenerator + - Generate an answer based on query and last retrieved context. + - After generation, go to END. + +Note: + +- Currently the performance of this RAG agent has been tested and validated with only one retrieval tool. If you want to use multiple retrieval tools, we recommend a hierarchical multi-agent system where a supervisor agent dispatches requests to multiple worker RAG agents, where individual worker RAG agents uses one type of retrieval tool. +- The max number of retrieves is set at 3. +- You can specify a small `recursion_limit` to stop early or a big `recursion_limit` to fully use the 3 retrieves. +- The TextGenerator only looks at the last retrieved docs. diff --git a/comps/agent/langchain/src/strategy/agentic_rag/__init__.py b/comps/agent/langchain/src/strategy/ragagent/__init__.py similarity index 64% rename from comps/agent/langchain/src/strategy/agentic_rag/__init__.py rename to comps/agent/langchain/src/strategy/ragagent/__init__.py index 8ed6f0281..f369d1928 100644 --- a/comps/agent/langchain/src/strategy/agentic_rag/__init__.py +++ b/comps/agent/langchain/src/strategy/ragagent/__init__.py @@ -1,4 +1,4 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from .planner import RAGAgentwithLanggraph +from .planner import RAGAgent diff --git a/comps/agent/langchain/src/strategy/agentic_rag/planner.py b/comps/agent/langchain/src/strategy/ragagent/planner.py similarity index 51% rename from comps/agent/langchain/src/strategy/agentic_rag/planner.py rename to comps/agent/langchain/src/strategy/ragagent/planner.py index 18ab42083..e618aed80 100644 --- a/comps/agent/langchain/src/strategy/agentic_rag/planner.py +++ b/comps/agent/langchain/src/strategy/ragagent/planner.py @@ -4,18 +4,23 @@ from typing import Annotated, Any, Literal, Sequence, TypedDict from langchain.output_parsers import PydanticOutputParser -from langchain_core.messages import BaseMessage, HumanMessage +from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.output_parsers.openai_tools import PydanticToolsParser from langchain_core.prompts import PromptTemplate from langchain_core.pydantic_v1 import BaseModel, Field -from langchain_huggingface import ChatHuggingFace +from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint +from langchain_openai import ChatOpenAI +from langgraph.checkpoint.memory import MemorySaver from langgraph.graph import END, START, StateGraph from langgraph.graph.message import add_messages from langgraph.prebuilt import ToolNode, tools_condition from ..base_agent import BaseAgent -from .prompt import rlm_rag_prompt +from .prompt import DOC_GRADER_PROMPT, RAG_PROMPT + +instruction = "Retrieved document is not sufficient or relevant to answer the query. Reformulate the query to search knowledge base again." +MAX_RETRY = 3 class AgentState(TypedDict): @@ -23,6 +28,40 @@ class AgentState(TypedDict): # Default is to replace. add_messages says "append" messages: Annotated[Sequence[BaseMessage], add_messages] output: str + doc_score: str + query_time: str + + +class QueryWriter: + """Invokes llm to generate a response based on the current state. Given + the question, it will decide to retrieve using the retriever tool, or simply end. + + Args: + state (messages): The current state + + Returns: + dict: The updated state with the response appended to messages + """ + + def __init__(self, llm_endpoint, model_id, tools): + if isinstance(llm_endpoint, HuggingFaceEndpoint): + self.llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools(tools) + elif isinstance(llm_endpoint, ChatOpenAI): + self.llm = llm_endpoint.bind_tools(tools) + + def __call__(self, state): + print("---CALL QueryWriter---") + messages = state["messages"] + + response = self.llm.invoke(messages) + # We return a list, because this will get added to the existing list + return {"messages": [response], "output": response} + + +class Retriever: + @classmethod + def create(cls, tools_descriptions): + return ToolNode(tools_descriptions) class DocumentGrader: @@ -43,24 +82,23 @@ class grade(BaseModel): # Prompt prompt = PromptTemplate( - template="""You are a grader assessing relevance of a retrieved document to a user question. \n - Here is the retrieved document: \n\n {context} \n\n - Here is the user question: {question} \n - If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n - Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""", + template=DOC_GRADER_PROMPT, input_variables=["context", "question"], ) - llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools([grade]) + if isinstance(llm_endpoint, HuggingFaceEndpoint): + llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools([grade]) + elif isinstance(llm_endpoint, ChatOpenAI): + llm = llm_endpoint.bind_tools([grade]) output_parser = PydanticToolsParser(tools=[grade], first_tool_only=True) self.chain = prompt | llm | output_parser def __call__(self, state) -> Literal["generate", "rewrite"]: print("---CALL DocumentGrader---") messages = state["messages"] - last_message = messages[-1] + last_message = messages[-1] # the latest retrieved doc - question = messages[0].content + question = messages[0].content # the original query docs = last_message.content scored_result = self.chain.invoke({"question": question, "context": docs}) @@ -69,74 +107,12 @@ def __call__(self, state) -> Literal["generate", "rewrite"]: if score.startswith("yes"): print("---DECISION: DOCS RELEVANT---") - return "generate" + return {"doc_score": "generate"} else: print(f"---DECISION: DOCS NOT RELEVANT, score is {score}---") - return "rewrite" - -class RagAgent: - """Invokes the agent model to generate a response based on the current state. Given - the question, it will decide to retrieve using the retriever tool, or simply end. - - Args: - state (messages): The current state - - Returns: - dict: The updated state with the agent response appended to messages - """ - - def __init__(self, llm_endpoint, model_id, tools): - self.llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools(tools) - - def __call__(self, state): - print("---CALL RagAgent---") - messages = state["messages"] - - response = self.llm.invoke(messages) - # We return a list, because this will get added to the existing list - return {"messages": [response], "output": response} - - -class Retriever: - @classmethod - def create(cls, tools_descriptions): - return ToolNode(tools_descriptions) - - -class Rewriter: - """Transform the query to produce a better question. - - Args: - state (messages): The current state - - Returns: - dict: The updated state with re-phrased question - """ - - def __init__(self, llm_endpoint): - self.llm = llm_endpoint - - def __call__(self, state): - print("---TRANSFORM QUERY---") - messages = state["messages"] - question = messages[0].content - - msg = [ - HumanMessage( - content=f""" \n - Look at the input and try to reason about the underlying semantic intent / meaning. \n - Here is the initial question: - \n ------- \n - {question} - \n ------- \n - Formulate an improved question: """, - ) - ] - - response = self.llm.invoke(msg) - return {"messages": [response]} + return {"messages": [HumanMessage(content=instruction)], "doc_score": "rewrite"} class TextGenerator: @@ -151,77 +127,124 @@ class TextGenerator: def __init__(self, llm_endpoint, model_id=None): # Chain - prompt = rlm_rag_prompt + # prompt = rlm_rag_prompt + prompt = RAG_PROMPT self.rag_chain = prompt | llm_endpoint | StrOutputParser() def __call__(self, state): print("---GENERATE---") messages = state["messages"] question = messages[0].content - last_message = messages[-1] + query_time = state["query_time"] + + # find the latest retrieved doc + # which is a ToolMessage + for m in state["messages"][::-1]: + if isinstance(m, ToolMessage): + last_message = m + break question = messages[0].content docs = last_message.content # Run - response = self.rag_chain.invoke({"context": docs, "question": question}) - return {"output": response} + response = self.rag_chain.invoke({"context": docs, "question": question, "time": query_time}) + print("@@@@ Used this doc for generation:\n", docs) + print("@@@@ Generated response: ", response) + return {"messages": [response], "output": response} -class RAGAgentwithLanggraph(BaseAgent): - def __init__(self, args): +class RAGAgent(BaseAgent): + def __init__(self, args, with_memory=False): super().__init__(args) # Define Nodes document_grader = DocumentGrader(self.llm_endpoint, args.model) - rag_agent = RagAgent(self.llm_endpoint, args.model, self.tools_descriptions) - retriever = Retriever.create(self.tools_descriptions) - rewriter = Rewriter(self.llm_endpoint) + query_writer = QueryWriter(self.llm_endpoint, args.model, self.tools_descriptions) text_generator = TextGenerator(self.llm_endpoint) + retriever = Retriever.create(self.tools_descriptions) # Define graph workflow = StateGraph(AgentState) # Define the nodes we will cycle between - workflow.add_node("agent", rag_agent) + workflow.add_node("query_writer", query_writer) workflow.add_node("retrieve", retriever) - workflow.add_node("rewrite", rewriter) + workflow.add_node("doc_grader", document_grader) workflow.add_node("generate", text_generator) # connect as graph - workflow.add_edge(START, "agent") + workflow.add_edge(START, "query_writer") workflow.add_conditional_edges( - "agent", + "query_writer", tools_condition, { "tools": "retrieve", # if tools_condition return 'tools', then go to 'retrieve' END: END, # if tools_condition return 'END', then go to END }, ) + + workflow.add_edge("retrieve", "doc_grader") + workflow.add_conditional_edges( - "retrieve", - document_grader, + "doc_grader", + self.should_retry, { - "generate": "generate", # if tools_condition return 'generate', then go to 'generate' node - "rewrite": "rewrite", # if tools_condition return 'rewrite', then go to 'rewrite' node + False: "generate", + True: "query_writer", }, ) workflow.add_edge("generate", END) - workflow.add_edge("rewrite", "agent") - self.app = workflow.compile() + if with_memory: + self.app = workflow.compile(checkpointer=MemorySaver()) + else: + self.app = workflow.compile() + + def should_retry(self, state): + # first check how many retry attempts have been made + num_retry = 0 + for m in state["messages"]: + if instruction in m.content: + num_retry += 1 + + print("**********Num retry: ", num_retry) + + if (num_retry < MAX_RETRY) and (state["doc_score"] == "rewrite"): + return True + else: + return False def prepare_initial_state(self, query): return {"messages": [HumanMessage(content=query)]} async def stream_generator(self, query, config): initial_state = self.prepare_initial_state(query) - async for event in self.app.astream(initial_state, config=config): - for node_name, node_state in event.items(): - yield f"--- CALL {node_name} ---\n" - for k, v in node_state.items(): - if v is not None: - yield f"{k}: {v}\n" - - yield f"data: {repr(event)}\n\n" - yield "data: [DONE]\n\n" + try: + async for event in self.app.astream(initial_state, config=config): + for node_name, node_state in event.items(): + yield f"--- CALL {node_name} ---\n" + for k, v in node_state.items(): + if v is not None: + yield f"{k}: {v}\n" + + yield f"data: {repr(event)}\n\n" + yield "data: [DONE]\n\n" + except Exception as e: + yield str(e) + + async def non_streaming_run(self, query, config): + initial_state = self.prepare_initial_state(query) + try: + async for s in self.app.astream(initial_state, config=config, stream_mode="values"): + message = s["messages"][-1] + if isinstance(message, tuple): + print(message) + else: + message.pretty_print() + + last_message = s["messages"][-1] + print("******Response: ", last_message.content) + return last_message.content + except Exception as e: + return str(e) diff --git a/comps/agent/langchain/src/strategy/ragagent/prompt.py b/comps/agent/langchain/src/strategy/ragagent/prompt.py new file mode 100644 index 000000000..bf12422ec --- /dev/null +++ b/comps/agent/langchain/src/strategy/ragagent/prompt.py @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from langchain_core.prompts import ChatPromptTemplate, PromptTemplate + +DOC_GRADER_PROMPT = """\ +Given the QUERY, determine if a relevant answer can be derived from the DOCUMENT.\n +QUERY: {question} \n +DOCUMENT:\n{context}\n\n +Give score 'yes' if the document provides sufficient and relevant information to answer the question. Otherwise, give score 'no'. ONLY answer with 'yes' or 'no'. NOTHING ELSE.""" + + +PROMPT = """\ +### You are a helpful, respectful and honest assistant. +You are given a Question and the time when it was asked in the Pacific Time Zone (PT), referred to as "Query +Time". The query time is formatted as "mm/dd/yyyy, hh:mm:ss PT". +Please follow these guidelines when formulating your answer: +1. If the question contains a false premise or assumption, answer โ€œinvalid questionโ€. +2. If you are uncertain or do not know the answer, respond with โ€œI donโ€™t knowโ€. +3. Refer to the search results to form your answer. +4. Give concise, factual and relevant answers. + +### Search results: {context} \n +### Question: {question} \n +### Query Time: {time} \n +### Answer: +""" + +RAG_PROMPT = ChatPromptTemplate.from_messages( + [ + ( + "human", + PROMPT, + ), + ] +) diff --git a/comps/agent/langchain/src/strategy/react/__init__.py b/comps/agent/langchain/src/strategy/react/__init__.py index 63f79e32a..ad1aa456e 100644 --- a/comps/agent/langchain/src/strategy/react/__init__.py +++ b/comps/agent/langchain/src/strategy/react/__init__.py @@ -2,3 +2,4 @@ # SPDX-License-Identifier: Apache-2.0 from .planner import ReActAgentwithLangchain +from .planner import ReActAgentwithLanggraph diff --git a/comps/agent/langchain/src/strategy/react/planner.py b/comps/agent/langchain/src/strategy/react/planner.py index 58cc70104..4466a115f 100644 --- a/comps/agent/langchain/src/strategy/react/planner.py +++ b/comps/agent/langchain/src/strategy/react/planner.py @@ -1,33 +1,70 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langchain.agents import AgentExecutor, create_react_agent +from langchain.agents import AgentExecutor +from langchain.agents import create_react_agent as create_react_langchain_agent +from langchain.memory import ChatMessageHistory +from langchain_core.messages import HumanMessage +from langchain_core.runnables.history import RunnableWithMessageHistory +from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint +from langchain_openai import ChatOpenAI +from langgraph.checkpoint.memory import MemorySaver +from langgraph.prebuilt import create_react_agent +from ...global_var import threads_global_kv from ...utils import has_multi_tool_inputs, tool_renderer from ..base_agent import BaseAgent -from .prompt import hwchase17_react_prompt +from .prompt import REACT_SYS_MESSAGE, hwchase17_react_prompt class ReActAgentwithLangchain(BaseAgent): - def __init__(self, args): + def __init__(self, args, with_memory=False): super().__init__(args) prompt = hwchase17_react_prompt if has_multi_tool_inputs(self.tools_descriptions): - raise ValueError("Only supports single input tools when using strategy == react") + raise ValueError("Only supports single input tools when using strategy == react_langchain") else: - agent_chain = create_react_agent( + agent_chain = create_react_langchain_agent( self.llm_endpoint, self.tools_descriptions, prompt, tools_renderer=tool_renderer ) self.app = AgentExecutor( agent=agent_chain, tools=self.tools_descriptions, verbose=True, handle_parsing_errors=True ) + self.memory = {} + + def get_session_history(session_id): + if session_id in self.memory: + return self.memory[session_id] + else: + mem = ChatMessageHistory() + self.memory[session_id] = mem + return mem + + if with_memory: + self.app = RunnableWithMessageHistory( + self.app, + get_session_history, + input_messages_key="input", + history_messages_key="chat_history", + history_factory_config=[], + ) def prepare_initial_state(self, query): return {"input": query} - async def stream_generator(self, query, config): + async def stream_generator(self, query, config, thread_id=None): initial_state = self.prepare_initial_state(query) + if thread_id is not None: + config["configurable"] = {"session_id": thread_id} async for chunk in self.app.astream(initial_state, config=config): + if thread_id is not None: + with threads_global_kv as g_threads: + thread_inst, created_at, status = g_threads[thread_id] + if status == "try_cancel": + yield "[thread_completion_callback] signal to cancel! Changed status to ready" + print("[thread_completion_callback] signal to cancel! Changed status to ready") + g_threads[thread_id] = (thread_inst, created_at, "ready") + break if "actions" in chunk: for action in chunk["actions"]: yield f"Calling Tool: `{action.tool}` with input `{action.tool_input}`\n\n" @@ -42,3 +79,56 @@ async def stream_generator(self, query, config): raise ValueError() print("---") yield "data: [DONE]\n\n" + + +class ReActAgentwithLanggraph(BaseAgent): + def __init__(self, args, with_memory=False): + super().__init__(args) + + if isinstance(self.llm_endpoint, HuggingFaceEndpoint): + self.llm = ChatHuggingFace(llm=self.llm_endpoint, model_id=args.model) + elif isinstance(self.llm_endpoint, ChatOpenAI): + self.llm = self.llm_endpoint + + tools = self.tools_descriptions + + if with_memory: + self.app = create_react_agent( + self.llm, tools=tools, state_modifier=REACT_SYS_MESSAGE, checkpointer=MemorySaver() + ) + else: + self.app = create_react_agent(self.llm, tools=tools, state_modifier=REACT_SYS_MESSAGE) + + def prepare_initial_state(self, query): + return {"messages": [HumanMessage(content=query)]} + + async def stream_generator(self, query, config): + initial_state = self.prepare_initial_state(query) + try: + async for event in self.app.astream(initial_state, config=config): + for node_name, node_state in event.items(): + yield f"--- CALL {node_name} ---\n" + for k, v in node_state.items(): + if v is not None: + yield f"{k}: {v}\n" + + yield f"data: {repr(event)}\n\n" + yield "data: [DONE]\n\n" + except Exception as e: + yield str(e) + + async def non_streaming_run(self, query, config): + initial_state = self.prepare_initial_state(query) + try: + async for s in self.app.astream(initial_state, config=config, stream_mode="values"): + message = s["messages"][-1] + if isinstance(message, tuple): + print(message) + else: + message.pretty_print() + + last_message = s["messages"][-1] + print("******Response: ", last_message.content) + return last_message.content + except Exception as e: + return str(e) diff --git a/comps/agent/langchain/src/strategy/react/prompt.py b/comps/agent/langchain/src/strategy/react/prompt.py index bfec54fe3..5404aa73a 100644 --- a/comps/agent/langchain/src/strategy/react/prompt.py +++ b/comps/agent/langchain/src/strategy/react/prompt.py @@ -6,3 +6,13 @@ hwchase17_react_prompt = PromptTemplate.from_template( "Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}" ) + + +REACT_SYS_MESSAGE = """\ +Decompose the user request into a series of simple tasks when necessary and solve the problem step by step. +When you cannot get the answer at first, do not give up. Reflect on the info you have from the tools and try to solve the problem in a different way. +Please follow these guidelines when formulating your answer: +1. If the question contains a false premise or assumption, answer โ€œinvalid questionโ€. +2. If you are uncertain or do not know the answer, respond with โ€œI donโ€™t knowโ€. +3. Give concise, factual and relevant answers. +""" diff --git a/comps/agent/langchain/src/thread.py b/comps/agent/langchain/src/thread.py new file mode 100644 index 000000000..441a2936e --- /dev/null +++ b/comps/agent/langchain/src/thread.py @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections import deque +from datetime import datetime +from uuid import uuid4 + +from .global_var import threads_global_kv + + +class ThreadMemory: + def __init__(self): + self.query_list = deque() + + def add_query(self, query): + msg_id = f"msg_{uuid4()}" + created_at = int(datetime.now().timestamp()) + + self.query_list.append((query, msg_id, created_at)) + + return msg_id, created_at + + def get_query(self): + query, _, _ = self.query_list.pop() + return query + + +async def thread_completion_callback(content, thread_id): + with threads_global_kv as g_threads: + thread_inst, created_at, _ = g_threads[thread_id] + g_threads[thread_id] = (thread_inst, created_at, "running") + print("[thread_completion_callback] Changed status to running") + async for chunk in content: + if "data: [DONE]\n\n" == chunk: + with threads_global_kv as g_threads: + thread_inst, created_at, _ = g_threads[thread_id] + g_threads[thread_id] = (thread_inst, created_at, "ready") + yield chunk + + +def instantiate_thread_memory(args=None): + thread_id = f"thread_{uuid4()}" + return ThreadMemory(), thread_id diff --git a/comps/agent/langchain/src/utils.py b/comps/agent/langchain/src/utils.py index d84b7e225..4f5fb450c 100644 --- a/comps/agent/langchain/src/utils.py +++ b/comps/agent/langchain/src/utils.py @@ -68,7 +68,7 @@ def setup_openai_client(args): from langchain_openai import ChatOpenAI params = { - "temperature": 0.5, + "temperature": args.temperature, "max_tokens": args.max_new_tokens, "streaming": args.streaming, } diff --git a/comps/agent/langchain/test.py b/comps/agent/langchain/test.py index d3f5d4506..cb7cc0424 100644 --- a/comps/agent/langchain/test.py +++ b/comps/agent/langchain/test.py @@ -5,6 +5,7 @@ import json import os import traceback +from time import sleep import pandas as pd import requests @@ -85,6 +86,68 @@ def process_request(query): df.to_csv(os.path.join(args.filedir, args.output), index=False) +def test_assistants_http(args): + proxies = {"http": ""} + ip_addr = args.ip_addr + url = f"http://{ip_addr}:9090/v1" + + def process_request(api, query, is_stream=False): + content = json.dumps(query) if query is not None else None + print(f"send request to {url}/{api}, data is {content}") + try: + resp = requests.post(url=f"{url}/{api}", data=content, proxies=proxies, stream=is_stream) + if not is_stream: + ret = resp.json() + print(ret) + else: + for line in resp.iter_lines(decode_unicode=True): + print(line) + ret = None + + resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes + return ret + except requests.exceptions.RequestException as e: + ret = f"An error occurred:{e}" + print(ret) + return False + + # step 1. create assistants + query = {} + if ret := process_request("assistants", query): + assistant_id = ret.get("id") + print("Created Assistant Id: ", assistant_id) + else: + print("Error when creating assistants !!!!") + return + + # step 2. create threads + query = {} + if ret := process_request("threads", query): + thread_id = ret.get("id") + print("Created Thread Id: ", thread_id) + else: + print("Error when creating threads !!!!") + return + + # step 3. add messages + if args.query is None: + query = {"role": "user", "content": "How old was Bill Gates when he built Microsoft?"} + else: + query = {"role": "user", "content": args.query} + if ret := process_request(f"threads/{thread_id}/messages", query): + pass + else: + print("Error when add messages !!!!") + return + + # step 4. run + print("You may cancel the running process with cmdline") + print(f"curl {url}/threads/{thread_id}/runs/cancel -X POST -H 'Content-Type: application/json'") + + query = {"assistant_id": assistant_id} + process_request(f"threads/{thread_id}/runs", query, is_stream=True) + + def test_ut(args): from src.tools import get_tools_descriptions @@ -99,8 +162,10 @@ def test_ut(args): parser.add_argument("--strategy", type=str, default="react") parser.add_argument("--local_test", action="store_true", help="Test with local mode") parser.add_argument("--endpoint_test", action="store_true", help="Test with endpoint mode") + parser.add_argument("--assistants_api_test", action="store_true", help="Test with endpoint mode") parser.add_argument("--q", type=int, default=0) parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address") + parser.add_argument("--query", type=str, default=None) parser.add_argument("--filedir", type=str, default="./", help="test file directory") parser.add_argument("--filename", type=str, default="query.csv", help="query_list_file") parser.add_argument("--output", type=str, default="output.csv", help="query_list_file") @@ -117,5 +182,7 @@ def test_ut(args): test_agent_http(args) elif args.ut: test_ut(args) + elif args.assistants_api_test: + test_assistants_http(args) else: print("Please specify the test type") diff --git a/comps/agent/langchain/test_assistant_api.py b/comps/agent/langchain/test_assistant_api.py new file mode 100644 index 000000000..cf398c8bb --- /dev/null +++ b/comps/agent/langchain/test_assistant_api.py @@ -0,0 +1,97 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json + +import requests +from src.utils import get_args + + +def test_assistants_http(args): + proxies = {"http": ""} + url = f"http://{args.ip_addr}:{args.ext_port}/v1" + + def process_request(api, query, is_stream=False): + content = json.dumps(query) if query is not None else None + print(f"send request to {url}/{api}, data is {content}") + try: + resp = requests.post(url=f"{url}/{api}", data=content, proxies=proxies, stream=is_stream) + if not is_stream: + ret = resp.json() + print(ret) + else: + for line in resp.iter_lines(decode_unicode=True): + print(line) + ret = None + + resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes + return ret + except requests.exceptions.RequestException as e: + ret = f"An error occurred:{e}" + print(ret) + return False + + # step 1. create assistants + query = {} + if ret := process_request("assistants", query): + assistant_id = ret.get("id") + print("Created Assistant Id: ", assistant_id) + else: + print("Error when creating assistants !!!!") + return + + # step 2. create threads + query = {} + if ret := process_request("threads", query): + thread_id = ret.get("id") + print("Created Thread Id: ", thread_id) + else: + print("Error when creating threads !!!!") + return + + # step 3. add messages + if args.query is None: + query = {"role": "user", "content": "How old was Bill Gates when he built Microsoft?"} + else: + query = {"role": "user", "content": args.query} + if ret := process_request(f"threads/{thread_id}/messages", query): + pass + else: + print("Error when add messages !!!!") + return + + # step 4. run + print("You may cancel the running process with cmdline") + print(f"curl {url}/threads/{thread_id}/runs/cancel -X POST -H 'Content-Type: application/json'") + + query = {"assistant_id": assistant_id} + process_request(f"threads/{thread_id}/runs", query, is_stream=True) + + +if __name__ == "__main__": + args1, _ = get_args() + parser = argparse.ArgumentParser() + parser.add_argument("--strategy", type=str, default="react") + parser.add_argument("--local_test", action="store_true", help="Test with local mode") + parser.add_argument("--endpoint_test", action="store_true", help="Test with endpoint mode") + parser.add_argument("--assistants_api_test", action="store_true", help="Test with endpoint mode") + parser.add_argument("--q", type=int, default=0) + parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address") + parser.add_argument("--ext_port", type=str, default="9090", help="endpoint port") + parser.add_argument("--query", type=str, default=None) + parser.add_argument("--filedir", type=str, default="./", help="test file directory") + parser.add_argument("--filename", type=str, default="query.csv", help="query_list_file") + parser.add_argument("--output", type=str, default="output.csv", help="query_list_file") + parser.add_argument("--ut", action="store_true", help="ut") + + args, _ = parser.parse_known_args() + + for key, value in vars(args1).items(): + setattr(args, key, value) + + if args.assistants_api_test: + print("test args:", args) + test_assistants_http(args) + else: + print("Please specify the test type") diff --git a/comps/asr/Dockerfile b/comps/asr/Dockerfile index cc2740b7d..58f20c03b 100644 --- a/comps/asr/Dockerfile +++ b/comps/asr/Dockerfile @@ -3,15 +3,26 @@ FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user + ENV LANG=C.UTF-8 +ARG ARCH=cpu -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr +WORKDIR /home/user/comps/asr ENTRYPOINT ["python", "asr.py"] \ No newline at end of file diff --git a/comps/asr/README.md b/comps/asr/README.md index 4cf78cbf1..10cac9421 100644 --- a/comps/asr/README.md +++ b/comps/asr/README.md @@ -2,17 +2,17 @@ ASR (Audio-Speech-Recognition) microservice helps users convert speech to text. When building a talking bot with LLM, users will need to convert their audio inputs (What they talk, or Input audio from other sources) to text, so the LLM is able to tokenize the text and generate an answer. This microservice is built for that conversion stage. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the ASR microservice with Python, you need to first install python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start Whisper Service/Test +### 1.2 Start Whisper Service/Test - Xeon CPU @@ -40,7 +40,7 @@ nohup python whisper_server.py --device=hpu & python check_whisper_server.py ``` -## 1.3 Start ASR Service/Test +### 1.3 Start ASR Service/Test ```bash cd ../ @@ -54,13 +54,13 @@ While the Whisper service is running, you can start the ASR service. If the ASR {'id': '0e686efd33175ce0ebcf7e0ed7431673', 'text': 'who is pat gelsinger'} ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) Alternatively, you can also start the ASR microservice with Docker. -## 2.1 Build Images +### 2.1 Build Images -### 2.1.1 Whisper Server Image +#### 2.1.1 Whisper Server Image - Xeon CPU @@ -76,15 +76,15 @@ cd ../.. docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile_hpu . ``` -### 2.1.2 ASR Service Image +#### 2.1.2 ASR Service Image ```bash docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/Dockerfile . ``` -## 2.2 Start Whisper and ASR Service +### 2.2 Start Whisper and ASR Service -### 2.2.1 Start Whisper Server +#### 2.2.1 Start Whisper Server - Xeon @@ -98,7 +98,7 @@ docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper-gaudi:latest ``` -### 2.2.2 Start ASR service +#### 2.2.2 Start ASR service ```bash ip_address=$(hostname -I | awk '{print $1}') @@ -106,7 +106,7 @@ ip_address=$(hostname -I | awk '{print $1}') docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest ``` -### 2.2.3 Test +#### 2.2.3 Test ```bash # Use curl or python diff --git a/comps/asr/asr.py b/comps/asr/asr.py index 1f5cf2df4..f687169b0 100644 --- a/comps/asr/asr.py +++ b/comps/asr/asr.py @@ -8,6 +8,11 @@ import numpy as np import requests +from comps import CustomLogger + +logger = CustomLogger("asr") +logflag = os.getenv("LOGFLAG", False) + from comps import ( Base64ByteStrDoc, LLMParamsDoc, @@ -33,14 +38,17 @@ async def audio_to_text(audio: Base64ByteStrDoc): start = time.time() byte_str = audio.byte_str inputs = {"audio": byte_str} + if logflag: + logger.info(inputs) response = requests.post(url=f"{asr_endpoint}/v1/asr", data=json.dumps(inputs), proxies={"http": None}) - + if logflag: + logger.info(response) statistics_dict["opea_service@asr"].append_latency(time.time() - start, None) return LLMParamsDoc(query=response.json()["asr_result"]) if __name__ == "__main__": asr_endpoint = os.getenv("ASR_ENDPOINT", "http://localhost:7066") - print("[asr - router] ASR initialized.") + logger.info("[asr - router] ASR initialized.") opea_microservices["opea_service@asr"].start() diff --git a/comps/asr/requirements.txt b/comps/asr/requirements.txt index 3ebeff787..def6a51b8 100644 --- a/comps/asr/requirements.txt +++ b/comps/asr/requirements.txt @@ -4,12 +4,10 @@ fastapi opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk -optimum[habana] prometheus-fastapi-instrumentator pydantic==2.7.2 pydub shortuuid -torch transformers uvicorn zhconv diff --git a/comps/asr/whisper/Dockerfile b/comps/asr/whisper/Dockerfile index c3e2a0025..5b50cf371 100644 --- a/comps/asr/whisper/Dockerfile +++ b/comps/asr/whisper/Dockerfile @@ -3,21 +3,34 @@ FROM python:3.11-slim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + # Set environment variables ENV LANG=en_US.UTF-8 -ENV PYTHONPATH=/home/user +ARG ARCH=cpu # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ + if [ "${ARCH}" = "cpu" ]; then \ + pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper -ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] \ No newline at end of file +ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] diff --git a/comps/asr/whisper/Dockerfile_hpu b/comps/asr/whisper/Dockerfile_hpu index 128b8d5cc..15a14e0be 100644 --- a/comps/asr/whisper/Dockerfile_hpu +++ b/comps/asr/whisper/Dockerfile_hpu @@ -2,25 +2,33 @@ # SPDX-License-Identifier: Apache-2.0 # HABANA environment -FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana +ENV LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/asr/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/asr/whisper +WORKDIR /home/user/comps/asr/whisper -ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] \ No newline at end of file +ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] diff --git a/comps/asr/whisper/whisper_model.py b/comps/asr/whisper/whisper_model.py index 0af9ebfcb..85d4126cd 100644 --- a/comps/asr/whisper/whisper_model.py +++ b/comps/asr/whisper/whisper_model.py @@ -16,7 +16,7 @@ class WhisperModel: """Convert audio to text.""" - def __init__(self, model_name_or_path="openai/whisper-small", language="english", device="cpu"): + def __init__(self, model_name_or_path="openai/whisper-small", language="english", device="cpu", hpu_max_len=8192): if device == "hpu": # Explicitly link HPU with Torch from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi @@ -31,12 +31,11 @@ def __init__(self, model_name_or_path="openai/whisper-small", language="english" self.model.eval() self.language = language + self.hpu_max_len = hpu_max_len if device == "hpu": - # do hpu graph warmup with a long enough input audio - # whisper has a receptive field of 30 seconds - # here we select a relatively long audio (~15 sec) to quickly warmup - self._warmup_whisper_hpu_graph("https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav") + self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_60s_audio.wav") + self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_30s_audio.wav") def _audiosegment_to_librosawav(self, audiosegment): # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples @@ -59,11 +58,54 @@ def _warmup_whisper_hpu_graph(self, url): print("[ASR] warmup...") waveform = AudioSegment.from_file("warmup.wav").set_frame_rate(16000) waveform = self._audiosegment_to_librosawav(waveform) - # pylint: disable=E1101 - inputs = self.processor.feature_extractor( - waveform, return_tensors="pt", sampling_rate=16_000 - ).input_features.to(self.device) - _ = self.model.generate(inputs, language="chinese") + + try: + processed_inputs = self.processor( + waveform, + return_tensors="pt", + truncation=False, + padding="longest", + return_attention_mask=True, + sampling_rate=16000, + ) + except RuntimeError as e: + if "Padding size should be less than" in str(e): + # short-form + processed_inputs = self.processor( + waveform, + return_tensors="pt", + sampling_rate=16000, + ) + else: + raise e + + if processed_inputs.input_features.shape[-1] < 3000: + # short-form + processed_inputs = self.processor( + waveform, + return_tensors="pt", + sampling_rate=16000, + ) + else: + processed_inputs["input_features"] = torch.nn.functional.pad( + processed_inputs.input_features, + (0, self.hpu_max_len - processed_inputs.input_features.size(-1)), + value=-1.5, + ) + processed_inputs["attention_mask"] = torch.nn.functional.pad( + processed_inputs.attention_mask, + (0, self.hpu_max_len + 1 - processed_inputs.attention_mask.size(-1)), + value=0, + ) + + _ = self.model.generate( + **( + processed_inputs.to( + self.device, + ) + ), + language=self.language, + ) def audio2text(self, audio_path): """Convert audio to text. @@ -80,11 +122,52 @@ def audio2text(self, audio_path): audio_dataset = Dataset.from_dict({"audio": [audio_path]}).cast_column("audio", Audio(sampling_rate=16000)) waveform = audio_dataset[0]["audio"]["array"] - # pylint: disable=E1101 - inputs = self.processor.feature_extractor( - waveform, return_tensors="pt", sampling_rate=16_000 - ).input_features.to(self.device) - predicted_ids = self.model.generate(inputs, language=self.language) + try: + processed_inputs = self.processor( + waveform, + return_tensors="pt", + truncation=False, + padding="longest", + return_attention_mask=True, + sampling_rate=16000, + ) + except RuntimeError as e: + if "Padding size should be less than" in str(e): + # short-form + processed_inputs = self.processor( + waveform, + return_tensors="pt", + sampling_rate=16000, + ) + else: + raise e + if processed_inputs.input_features.shape[-1] < 3000: + # short-form + processed_inputs = self.processor( + waveform, + return_tensors="pt", + sampling_rate=16000, + ) + elif self.device == "hpu" and processed_inputs.input_features.shape[-1] > 3000: + processed_inputs["input_features"] = torch.nn.functional.pad( + processed_inputs.input_features, + (0, self.hpu_max_len - processed_inputs.input_features.size(-1)), + value=-1.5, + ) + processed_inputs["attention_mask"] = torch.nn.functional.pad( + processed_inputs.attention_mask, + (0, self.hpu_max_len + 1 - processed_inputs.attention_mask.size(-1)), + value=0, + ) + + predicted_ids = self.model.generate( + **( + processed_inputs.to( + self.device, + ) + ), + language=self.language, + ) # pylint: disable=E1101 result = self.processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=True)[0] if self.language in ["chinese", "mandarin"]: @@ -96,20 +179,23 @@ def audio2text(self, audio_path): if __name__ == "__main__": - asr = WhisperModel(language="english") + asr = WhisperModel(model_name_or_path="openai/whisper-small", language="english", device="cpu") # Test multilanguage asr + asr.language = "chinese" urllib.request.urlretrieve( "https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav", "sample.wav", ) - asr.language = "chinese" text = asr.audio2text("sample.wav") + asr.language = "english" urllib.request.urlretrieve( "https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav", "sample.wav", ) text = asr.audio2text("sample.wav") - os.remove("sample.wav") + for i in [5, 10, 30, 60]: + urllib.request.urlretrieve(f"https://github.com/Spycsh/assets/raw/main/ljspeech_{i}s_audio.wav", "sample.wav") + text = asr.audio2text("sample.wav") diff --git a/comps/chathistory/mongo/README.md b/comps/chathistory/mongo/README.md index 4a132e9c8..6f3f7a93a 100644 --- a/comps/chathistory/mongo/README.md +++ b/comps/chathistory/mongo/README.md @@ -17,16 +17,16 @@ export DB_NAME=${DB_NAME} export COLLECTION_NAME=${COLLECTION_NAME} ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ../../../../ docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/docker/Dockerfile . ``` -## Run Docker with CLI +### Run Docker with CLI - Run mongoDB image @@ -40,7 +40,7 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest docker run -d --name="chathistory-mongo-server" -p 6013:6013 -p 6012:6012 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:latest ``` -# Invoke Microservice +## Invoke Microservice Once chathistory service is up and running, users can update the database by using the below API endpoint. The API returns a unique UUID for the saved conversation. @@ -60,7 +60,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -71,7 +71,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6013/v1/chathistory/get \ + http://${host_ip}:6012/v1/chathistory/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -97,7 +97,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://${host_ip}:6014/v1/chathistory/delete \ + http://${host_ip}:6012/v1/chathistory/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/chathistory/mongo/chathistory_mongo.py b/comps/chathistory/mongo/chathistory_mongo.py index 5b65d1d8e..29f5d41cb 100644 --- a/comps/chathistory/mongo/chathistory_mongo.py +++ b/comps/chathistory/mongo/chathistory_mongo.py @@ -1,14 +1,19 @@ ๏ปฟ# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os from typing import Optional from fastapi import HTTPException from mongo_store import DocumentStore from pydantic import BaseModel +from comps import CustomLogger from comps.cores.mega.micro_service import opea_microservices, register_microservice from comps.cores.proto.api_protocol import ChatCompletionRequest +logger = CustomLogger("chathistory_mongo") +logflag = os.getenv("LOGFLAG", False) + class ChatMessage(BaseModel): data: ChatCompletionRequest @@ -35,7 +40,7 @@ def get_first_string(value): @register_microservice( - name="opea_service@chathistory_mongo_create", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/create", host="0.0.0.0", input_datatype=ChatMessage, @@ -50,7 +55,8 @@ async def create_documents(document: ChatMessage): Returns: The result of the operation if successful, None otherwise. """ - + if logflag: + logger.info(document) try: if document.data.user is None: raise HTTPException(status_code=500, detail="Please provide the user information") @@ -62,19 +68,21 @@ async def create_documents(document: ChatMessage): res = await store.update_document(document.id, document.data, document.first_query) else: res = await store.save_document(document) + if logflag: + logger.info(res) return res except Exception as e: # Handle the exception here - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @register_microservice( - name="opea_service@chathistory_mongo_get", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/get", host="0.0.0.0", input_datatype=ChatId, - port=6013, + port=6012, ) async def get_documents(document: ChatId): """Retrieves documents from the document store based on the provided ChatId. @@ -85,6 +93,8 @@ async def get_documents(document: ChatId): Returns: The retrieved documents if successful, None otherwise. """ + if logflag: + logger.info(document) try: store = DocumentStore(document.user) store.initialize_storage() @@ -92,19 +102,21 @@ async def get_documents(document: ChatId): res = await store.get_all_documents_of_user() else: res = await store.get_user_documents_by_id(document.id) + if logflag: + logger.info(res) return res except Exception as e: # Handle the exception here - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @register_microservice( - name="opea_service@chathistory_mongo_delete", + name="opea_service@chathistory_mongo", endpoint="/v1/chathistory/delete", host="0.0.0.0", input_datatype=ChatId, - port=6014, + port=6012, ) async def delete_documents(document: ChatId): """Deletes a document from the document store based on the provided ChatId. @@ -115,6 +127,8 @@ async def delete_documents(document: ChatId): Returns: The result of the deletion if successful, None otherwise. """ + if logflag: + logger.info(document) try: store = DocumentStore(document.user) store.initialize_storage() @@ -122,14 +136,14 @@ async def delete_documents(document: ChatId): raise Exception("Document id is required.") else: res = await store.delete_document(document.id) + if logflag: + logger.info(res) return res except Exception as e: # Handle the exception here - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": - opea_microservices["opea_service@chathistory_mongo_get"].start() - opea_microservices["opea_service@chathistory_mongo_create"].start() - opea_microservices["opea_service@chathistory_mongo_delete"].start() + opea_microservices["opea_service@chathistory_mongo"].start() diff --git a/comps/chathistory/mongo/docker/Dockerfile b/comps/chathistory/mongo/docker/Dockerfile index 5209af835..81e0fde5e 100644 --- a/comps/chathistory/mongo/docker/Dockerfile +++ b/comps/chathistory/mongo/docker/Dockerfile @@ -7,9 +7,8 @@ ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ - libgl1-mesa-glx \ libjemalloc-dev \ - vim + libgl1-mesa-glx RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml index 97e17e077..e272d4f91 100644 --- a/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml +++ b/comps/chathistory/mongo/docker/docker-compose-chathistory-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: chathistory-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} diff --git a/comps/chathistory/mongo/requirements.txt b/comps/chathistory/mongo/requirements.txt index b0dec78c0..aa08f761a 100644 --- a/comps/chathistory/mongo/requirements.txt +++ b/comps/chathistory/mongo/requirements.txt @@ -1 +1 @@ -๏ปฟmotor==3.4.0 +motor==3.4.0 diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index 05eab5284..10863c149 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -43,6 +43,7 @@ class MegaServiceEndpoint(Enum): DOC_SUMMARY = "/v1/docsum" SEARCH_QNA = "/v1/searchqna" TRANSLATION = "/v1/translation" + RETRIEVALTOOL = "/v1/retrievaltool" FAQ_GEN = "/v1/faqgen" # Follow OPENAI EMBEDDINGS = "/v1/embeddings" diff --git a/comps/cores/mega/gateway.py b/comps/cores/mega/gateway.py index 862205414..cc8eaf5d2 100644 --- a/comps/cores/mega/gateway.py +++ b/comps/cores/mega/gateway.py @@ -1,8 +1,15 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import base64 +import os +from io import BytesIO +from typing import Union + +import requests from fastapi import Request from fastapi.responses import StreamingResponse +from PIL import Image from ..proto.api_protocol import ( AudioChatCompletionRequest, @@ -10,9 +17,10 @@ ChatCompletionResponse, ChatCompletionResponseChoice, ChatMessage, + EmbeddingRequest, UsageInfo, ) -from ..proto.docarray import LLMParams +from ..proto.docarray import LLMParams, LLMParamsDoc, RerankedDoc, TextDoc from .constants import MegaServiceEndpoint, ServiceRoleType, ServiceType from .micro_service import MicroService @@ -71,10 +79,13 @@ def list_parameter(self): pass def _handle_message(self, messages): + images = [] if isinstance(messages, str): prompt = messages else: messages_dict = {} + system_prompt = "" + prompt = "" for message in messages: msg_role = message["role"] if msg_role == "system": @@ -84,20 +95,56 @@ def _handle_message(self, messages): text = "" text_list = [item["text"] for item in message["content"] if item["type"] == "text"] text += "\n".join(text_list) - messages_dict[msg_role] = text + image_list = [ + item["image_url"]["url"] for item in message["content"] if item["type"] == "image_url" + ] + if image_list: + messages_dict[msg_role] = (text, image_list) + else: + messages_dict[msg_role] = text else: messages_dict[msg_role] = message["content"] elif msg_role == "assistant": messages_dict[msg_role] = message["content"] else: raise ValueError(f"Unknown role: {msg_role}") - prompt = system_prompt + "\n" + if system_prompt: + prompt = system_prompt + "\n" for role, message in messages_dict.items(): - if message: - prompt += role + ": " + message + "\n" + if isinstance(message, tuple): + text, image_list = message + if text: + prompt += role + ": " + text + "\n" + else: + prompt += role + ":" + for img in image_list: + # URL + if img.startswith("http://") or img.startswith("https://"): + response = requests.get(img) + image = Image.open(BytesIO(response.content)).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Local Path + elif os.path.exists(img): + image = Image.open(img).convert("RGBA") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") + img_b64_str = base64.b64encode(image_bytes.getvalue()).decode() + # Bytes + else: + img_b64_str = img + + images.append(img_b64_str) else: - prompt += role + ":" - return prompt + if message: + prompt += role + ": " + message + "\n" + else: + prompt += role + ":" + if images: + return prompt, images + else: + return prompt class ChatQnAGateway(Gateway): @@ -118,6 +165,7 @@ async def handle_request(self, request: Request): temperature=chat_request.temperature if chat_request.temperature else 0.01, repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, streaming=stream_opt, + chat_template=chat_request.chat_template if chat_request.chat_template else None, ) result_dict, runtime_graph = await self.megaservice.schedule( initial_inputs={"text": prompt}, llm_parameters=parameters @@ -439,3 +487,84 @@ async def handle_request(self, request: Request): ) ) return ChatCompletionResponse(model="faqgen", choices=choices, usage=usage) + + +class VisualQnAGateway(Gateway): + def __init__(self, megaservice, host="0.0.0.0", port=8888): + super().__init__( + megaservice, host, port, str(MegaServiceEndpoint.VISUAL_QNA), ChatCompletionRequest, ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + data = await request.json() + stream_opt = data.get("stream", False) + chat_request = ChatCompletionRequest.parse_obj(data) + prompt, images = self._handle_message(chat_request.messages) + parameters = LLMParams( + max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, + top_k=chat_request.top_k if chat_request.top_k else 10, + top_p=chat_request.top_p if chat_request.top_p else 0.95, + temperature=chat_request.temperature if chat_request.temperature else 0.01, + repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03, + streaming=stream_opt, + ) + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters + ) + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LVM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.LVM + ): + return response + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage) + + +class RetrievalToolGateway(Gateway): + """embed+retrieve+rerank.""" + + def __init__(self, megaservice, host="0.0.0.0", port=8889): + super().__init__( + megaservice, + host, + port, + str(MegaServiceEndpoint.RETRIEVALTOOL), + Union[TextDoc, EmbeddingRequest, ChatCompletionRequest], # ChatCompletionRequest, + Union[RerankedDoc, LLMParamsDoc], # ChatCompletionResponse + ) + + async def handle_request(self, request: Request): + def parser_input(data, TypeClass, key): + try: + chat_request = TypeClass.parse_obj(data) + query = getattr(chat_request, key) + except: + query = None + return query + + data = await request.json() + query = None + for key, TypeClass in zip(["text", "input", "input"], [TextDoc, EmbeddingRequest, ChatCompletionRequest]): + query = parser_input(data, TypeClass, key) + if query is not None: + break + if query is None: + raise ValueError(f"Unknown request type: {data}") + result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"text": query}) + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node] + print("response is ", response) + return response diff --git a/comps/cores/mega/logger.py b/comps/cores/mega/logger.py index 8cbe59dc3..b556a640f 100644 --- a/comps/cores/mega/logger.py +++ b/comps/cores/mega/logger.py @@ -35,7 +35,7 @@ def __init__(self, name: str = None): self.__dict__[key.lower()] = functools.partial(self.log_message, level) # Set up log format and handler - self.format = logging.Formatter(fmt="[%(asctime)-15s] [%(levelname)8s] - %(message)s") + self.format = logging.Formatter(fmt="[%(asctime)-15s] [%(levelname)8s] - %(name)s - %(message)s") self.handler = logging.StreamHandler() self.handler.setFormatter(self.format) diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index e83a2836b..9d707fa68 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -3,7 +3,7 @@ import asyncio import multiprocessing -from typing import Any, Optional, Type +from typing import Any, List, Optional, Type from ..proto.docarray import TextDoc from .constants import ServiceRoleType, ServiceType @@ -154,25 +154,28 @@ def register_microservice( output_datatype: Type[Any] = TextDoc, provider: Optional[str] = None, provider_endpoint: Optional[str] = None, + methods: List[str] = ["POST"], ): def decorator(func): - micro_service = MicroService( - name=name, - service_role=service_role, - service_type=service_type, - protocol=protocol, - host=host, - port=port, - ssl_keyfile=ssl_keyfile, - ssl_certfile=ssl_certfile, - endpoint=endpoint, - input_datatype=input_datatype, - output_datatype=output_datatype, - provider=provider, - provider_endpoint=provider_endpoint, - ) - micro_service.app.router.add_api_route(endpoint, func, methods=["POST"]) - opea_microservices[name] = micro_service + if name not in opea_microservices: + micro_service = MicroService( + name=name, + service_role=service_role, + service_type=service_type, + protocol=protocol, + host=host, + port=port, + ssl_keyfile=ssl_keyfile, + ssl_certfile=ssl_certfile, + endpoint=endpoint, + input_datatype=input_datatype, + output_datatype=output_datatype, + provider=provider, + provider_endpoint=provider_endpoint, + ) + opea_microservices[name] = micro_service + opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods) + return func return decorator diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index d4f3ac9b7..92063d498 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -72,6 +72,18 @@ async def schedule(self, initial_inputs: Dict, llm_parameters: LLMParams = LLMPa downstreams.remove(downstream) except re.error as e: print("Pattern invalid! Operation cancelled.") + if len(downstreams) == 0 and llm_parameters.streaming: + # turn the response to a StreamingResponse + # to make the response uniform to UI + def fake_stream(text): + yield "data: b'" + text + "'\n\n" + yield "data: [DONE]\n\n" + + self.dump_outputs( + node, + StreamingResponse(fake_stream(response["text"]), media_type="text/event-stream"), + result_dict, + ) for d_node in downstreams: if all(i in result_dict for i in runtime_graph.predecessors(d_node)): @@ -117,7 +129,10 @@ async def execute( if inputs.get(field) != value: inputs[field] = value - if self.services[cur_node].service_type == ServiceType.LLM and llm_parameters.streaming: + if ( + self.services[cur_node].service_type == ServiceType.LLM + or self.services[cur_node].service_type == ServiceType.LVM + ) and llm_parameters.streaming: # Still leave to sync requests.post for StreamingResponse response = requests.post( url=endpoint, data=json.dumps(inputs), proxies={"http": None}, stream=True, timeout=1000 @@ -173,18 +188,20 @@ def extract_chunk_str(self, chunk_str): if chunk_str == "data: [DONE]\n\n": return "" prefix = "data: b'" + prefix_2 = 'data: b"' suffix = "'\n\n" - if chunk_str.startswith(prefix): + suffix_2 = '"\n\n' + if chunk_str.startswith(prefix) or chunk_str.startswith(prefix_2): chunk_str = chunk_str[len(prefix) :] - if chunk_str.endswith(suffix): + if chunk_str.endswith(suffix) or chunk_str.endswith(suffix_2): chunk_str = chunk_str[: -len(suffix)] return chunk_str def token_generator(self, sentence, is_last=False): prefix = "data: " suffix = "\n\n" - tokens = re.findall(r"\S+\s?", sentence, re.UNICODE) + tokens = re.findall(r"\s?\S+\s?", sentence, re.UNICODE) for token in tokens: - yield prefix + repr(token.encode("utf-8")) + suffix + yield prefix + repr(token.replace("\\n", "\n").encode("utf-8")) + suffix if is_last: yield "data: [DONE]\n\n" diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 957fc9d95..0a8b2de00 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -30,24 +30,243 @@ class UsageInfo(BaseModel): completion_tokens: Optional[int] = 0 +class ResponseFormat(BaseModel): + # type must be "json_object" or "text" + type: Literal["text", "json_object"] + + +class StreamOptions(BaseModel): + # refer https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L105 + include_usage: Optional[bool] + + +class FunctionDefinition(BaseModel): + name: str + description: Optional[str] = None + parameters: Optional[Dict[str, Any]] = None + + +class ChatCompletionToolsParam(BaseModel): + type: Literal["function"] = "function" + function: FunctionDefinition + + +class ChatCompletionNamedFunction(BaseModel): + name: str + + +class ChatCompletionNamedToolChoiceParam(BaseModel): + function: ChatCompletionNamedFunction + type: Literal["function"] = "function" + + +class TokenCheckRequestItem(BaseModel): + model: str + prompt: str + max_tokens: int + + +class TokenCheckRequest(BaseModel): + prompts: List[TokenCheckRequestItem] + + +class TokenCheckResponseItem(BaseModel): + fits: bool + tokenCount: int + contextLength: int + + +class TokenCheckResponse(BaseModel): + prompts: List[TokenCheckResponseItem] + + +class EmbeddingRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/embeddings + model: Optional[str] = None + input: Union[List[int], List[List[int]], str, List[str]] + encoding_format: Optional[str] = Field("float", pattern="^(float|base64)$") + dimensions: Optional[int] = None + user: Optional[str] = None + + # define + request_type: Literal["embedding"] = "embedding" + + +class EmbeddingResponseData(BaseModel): + index: int + object: str = "embedding" + embedding: Union[List[float], str] + + +class EmbeddingResponse(BaseModel): + object: str = "list" + model: Optional[str] = None + data: List[EmbeddingResponseData] + usage: Optional[UsageInfo] = None + + +class RetrievalRequest(BaseModel): + embedding: Union[EmbeddingResponse, List[float]] = None + input: Optional[str] = None # search_type maybe need, like "mmr" + search_type: str = "similarity" + k: int = 4 + distance_threshold: Optional[float] = None + fetch_k: int = 20 + lambda_mult: float = 0.5 + score_threshold: float = 0.2 + + # define + request_type: Literal["retrieval"] = "retrieval" + + +class RetrievalResponseData(BaseModel): + text: str + metadata: Optional[Dict[str, Any]] = None + + +class RetrievalResponse(BaseModel): + retrieved_docs: List[RetrievalResponseData] + + +class RerankingRequest(BaseModel): + input: str + retrieved_docs: Union[List[RetrievalResponseData], List[Dict[str, Any]], List[str]] + top_n: int = 1 + + # define + request_type: Literal["reranking"] = "reranking" + + +class RerankingResponseData(BaseModel): + text: str + score: Optional[float] = 0.0 + + +class RerankingResponse(BaseModel): + reranked_docs: List[RerankingResponseData] + + class ChatCompletionRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/chat/create messages: Union[ str, List[Dict[str, str]], List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]], ] model: Optional[str] = "Intel/neural-chat-7b-v3-3" - temperature: Optional[float] = 0.01 - top_p: Optional[float] = 0.95 - top_k: Optional[int] = 10 + frequency_penalty: Optional[float] = 0.0 + logit_bias: Optional[Dict[str, float]] = None + logprobs: Optional[bool] = False + top_logprobs: Optional[int] = 0 + max_tokens: Optional[int] = 1024 # use https://platform.openai.com/docs/api-reference/completions/create n: Optional[int] = 1 - max_tokens: Optional[int] = 1024 - stop: Optional[Union[str, List[str]]] = None + presence_penalty: Optional[float] = 0.0 + response_format: Optional[ResponseFormat] = None + seed: Optional[int] = None + service_tier: Optional[str] = None + stop: Union[str, List[str], None] = Field(default_factory=list) stream: Optional[bool] = False - presence_penalty: Optional[float] = 1.03 - frequency_penalty: Optional[float] = 0.0 + stream_options: Optional[StreamOptions] = None + temperature: Optional[float] = 0.01 # vllm default 0.7 + top_p: Optional[float] = None # openai default 1.0, but tgi needs `top_p` must be > 0.0 and < 1.0, set None + tools: Optional[List[ChatCompletionToolsParam]] = None + tool_choice: Optional[Union[Literal["none"], ChatCompletionNamedToolChoiceParam]] = "none" + parallel_tool_calls: Optional[bool] = True user: Optional[str] = None + # Ordered by official OpenAI API documentation + # default values are same with + # https://platform.openai.com/docs/api-reference/completions/create + best_of: Optional[int] = 1 + suffix: Optional[str] = None + + # vllm reference: https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L130 + repetition_penalty: Optional[float] = 1.0 + + # tgi reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate + # some tgi parameters in use + # default values are same with + # https://github.com/huggingface/text-generation-inference/blob/main/router/src/lib.rs#L190 + # max_new_tokens: Optional[int] = 100 # Priority use openai + top_k: Optional[int] = None + # top_p: Optional[float] = None # Priority use openai + typical_p: Optional[float] = None + # repetition_penalty: Optional[float] = None + + # doc: begin-chat-completion-extra-params + echo: Optional[bool] = Field( + default=False, + description=( + "If true, the new message will be prepended with the last message " "if they belong to the same role." + ), + ) + add_generation_prompt: Optional[bool] = Field( + default=True, + description=( + "If true, the generation prompt will be added to the chat template. " + "This is a parameter used by chat template in tokenizer config of the " + "model." + ), + ) + add_special_tokens: Optional[bool] = Field( + default=False, + description=( + "If true, special tokens (e.g. BOS) will be added to the prompt " + "on top of what is added by the chat template. " + "For most models, the chat template takes care of adding the " + "special tokens so this should be set to False (as is the " + "default)." + ), + ) + documents: Optional[Union[List[Dict[str, str]], List[str]]] = Field( + default=None, + description=( + "A list of dicts representing documents that will be accessible to " + "the model if it is performing RAG (retrieval-augmented generation)." + " If the template does not support RAG, this argument will have no " + "effect. We recommend that each document should be a dict containing " + '"title" and "text" keys.' + ), + ) + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + chat_template_kwargs: Optional[Dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to the template renderer. " "Will be accessible by the chat template."), + ) + # doc: end-chat-completion-extra-params + + # embedding + input: Union[List[int], List[List[int]], str, List[str]] = None # user query/question from messages[-] + encoding_format: Optional[str] = Field("float", pattern="^(float|base64)$") + dimensions: Optional[int] = None + embedding: Union[EmbeddingResponse, List[float]] = Field(default_factory=list) + + # retrieval + search_type: str = "similarity" + k: int = 4 + distance_threshold: Optional[float] = None + fetch_k: int = 20 + lambda_mult: float = 0.5 + score_threshold: float = 0.2 + retrieved_docs: Union[List[RetrievalResponseData], List[Dict[str, Any]]] = Field(default_factory=list) + + # reranking + top_n: int = 1 + reranked_docs: Union[List[RerankingResponseData], List[Dict[str, Any]]] = Field(default_factory=list) + + # define + request_type: Literal["chat"] = "chat" + class AudioChatCompletionRequest(BaseModel): audio: str @@ -110,41 +329,6 @@ class ChatCompletionStreamResponse(BaseModel): choices: List[ChatCompletionResponseStreamChoice] -class TokenCheckRequestItem(BaseModel): - model: str - prompt: str - max_tokens: int - - -class TokenCheckRequest(BaseModel): - prompts: List[TokenCheckRequestItem] - - -class TokenCheckResponseItem(BaseModel): - fits: bool - tokenCount: int - contextLength: int - - -class TokenCheckResponse(BaseModel): - prompts: List[TokenCheckResponseItem] - - -class EmbeddingsRequest(BaseModel): - model: Optional[str] = None - engine: Optional[str] = None - input: Union[str, List[Any]] - user: Optional[str] = None - encoding_format: Optional[str] = None - - -class EmbeddingsResponse(BaseModel): - object: str = "list" - data: List[Dict[str, Any]] - model: str - usage: UsageInfo - - class CompletionRequest(BaseModel): model: str prompt: Union[str, List[Any]] @@ -205,6 +389,82 @@ class ErrorResponse(BaseModel): code: int +class ThreadObject(BaseModel): + id: str + object: str = "thread" + created_at: int + + +class AssistantsObject(BaseModel): + id: str + object: str = "assistant" + created_at: int + name: Optional[str] = None + description: Optional[str] = None + model: Optional[str] = "Intel/neural-chat-7b-v3-3" + instructions: Optional[str] = None + tools: Optional[List[ChatCompletionToolsParam]] = None + + +class Attachments(BaseModel): + file_list: List[UploadFile] = [] + + +class MessageContent(BaseModel): + type: str = "text" + text: Optional[str] = None + + +class MessageObject(BaseModel): + id: str + object: str = "thread.message" + created_at: int + thread_id: str + role: str + status: Optional[str] = None + content: List[MessageContent] + assistant_id: Optional[str] = None + run_id: Optional[str] = None + attachments: Attachments = None + + +class RunObject(BaseModel): + id: str + object: str = "run" + created_at: int + thread_id: str + assistant_id: str + status: Optional[str] = None + last_error: Optional[str] = None + + +class CreateAssistantsRequest(BaseModel): + model: Optional[str] = None + name: Optional[str] = None + description: Optional[str] = None + instructions: Optional[str] = None + tools: Optional[List[ChatCompletionToolsParam]] = None + + +class CreateMessagesRequest(BaseModel): + role: str = "user" + content: Union[str, List[MessageContent]] + attachments: Attachments = None + + +class CreateThreadsRequest(BaseModel): + messages: Optional[List[CreateMessagesRequest]] = None + + +class CreateRunResponse(BaseModel): + assistant_id: str + + +class ListAssistantsRequest(BaseModel): + limit: int = 10 + order: Optional[str] = "desc" + + class ApiErrorCode(IntEnum): """ https://platform.openai.com/docs/guides/error-codes/api-errors @@ -279,3 +539,225 @@ def check_requests(request) -> Optional[JSONResponse]: ) return None + + +class Hyperparameters(BaseModel): + batch_size: Optional[Union[Literal["auto"], int]] = "auto" + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but with lower variance. + """ + + learning_rate_multiplier: Optional[Union[Literal["auto"], float]] = "auto" + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Optional[Union[Literal["auto"], int]] = "auto" + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. "auto" decides + the optimal number of epochs based on the size of the dataset. If setting the + number manually, we support any number between 1 and 50 epochs. + """ + + +class FineTuningJobWandbIntegration(BaseModel): + project: str + """The name of the project that the new run will be created under.""" + + entity: Optional[str] = None + """The entity to use for the run. + + This allows you to set the team or username of the WandB user that you would + like associated with the run. If not set, the default entity for the registered + WandB API key is used. + """ + + name: Optional[str] = None + """A display name to set for the run. + + If not set, we will use the Job ID as the name. + """ + + tags: Optional[List[str]] = None + """A list of tags to be attached to the newly created run. + + These tags are passed through directly to WandB. Some default tags are generated + by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". + """ + + +class FineTuningJobWandbIntegrationObject(BaseModel): + type: Literal["wandb"] + """The type of the integration being enabled for the fine-tuning job.""" + + wandb: FineTuningJobWandbIntegration + """The settings for your integration with Weights and Biases. + + This payload specifies the project that metrics will be sent to. Optionally, you + can set an explicit display name for your run, add tags to your run, and set a + default entity (team, username, etc) to be associated with your run. + """ + + +class FineTuningJobsRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/create + model: str + """The name of the model to fine-tune.""" + + training_file: str + """The ID of an uploaded file that contains training data.""" + + hyperparameters: Optional[Hyperparameters] = None + """The hyperparameters used for the fine-tuning job.""" + + suffix: Optional[str] = None + """A string of up to 64 characters that will be added to your fine-tuned model name.""" + + validation_file: Optional[str] = None + """The ID of an uploaded file that contains validation data.""" + + integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None + """A list of integrations to enable for your fine-tuning job.""" + + seed: Optional[str] = None + + +class Error(BaseModel): + code: str + """A machine-readable error code.""" + + message: str + """A human-readable error message.""" + + param: Optional[str] = None + """The parameter that was invalid, usually `training_file` or `validation_file`. + + This field will be null if the failure was not parameter-specific. + """ + + +class FineTuningJob(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/object + id: str + """The object identifier, which can be referenced in the API endpoints.""" + + created_at: int + """The Unix timestamp (in seconds) for when the fine-tuning job was created.""" + + error: Optional[Error] = None + """For fine-tuning jobs that have `failed`, this will contain more information on + the cause of the failure.""" + + fine_tuned_model: Optional[str] = None + """The name of the fine-tuned model that is being created. + + The value will be null if the fine-tuning job is still running. + """ + + finished_at: Optional[int] = None + """The Unix timestamp (in seconds) for when the fine-tuning job was finished. + + The value will be null if the fine-tuning job is still running. + """ + + hyperparameters: Hyperparameters + """The hyperparameters used for the fine-tuning job. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + """ + + model: str + """The base model that is being fine-tuned.""" + + object: Literal["fine_tuning.job"] = "fine_tuning.job" + """The object type, which is always "fine_tuning.job".""" + + organization_id: Optional[str] = None + """The organization that owns the fine-tuning job.""" + + result_files: List[str] = None + """The compiled results file ID(s) for the fine-tuning job. + + You can retrieve the results with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"] + """The current status of the fine-tuning job, which can be either + `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.""" + + trained_tokens: Optional[int] = None + """The total number of billable tokens processed by this fine-tuning job. + + The value will be null if the fine-tuning job is still running. + """ + + training_file: str + """The file ID used for training. + + You can retrieve the training data with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + validation_file: Optional[str] = None + """The file ID used for validation. + + You can retrieve the validation results with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None + """A list of integrations to enable for this fine-tuning job.""" + + seed: Optional[int] = None + """The seed used for the fine-tuning job.""" + + estimated_finish: Optional[int] = None + """The Unix timestamp (in seconds) for when the fine-tuning job is estimated to + finish. + + The value will be null if the fine-tuning job is not running. + """ + + +class FineTuningJobIDRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/retrieve + # https://platform.openai.com/docs/api-reference/fine-tuning/cancel + fine_tuning_job_id: str + """The ID of the fine-tuning job.""" + + +class FineTuningJobListRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/list + after: Optional[str] = None + """Identifier for the last job from the previous pagination request.""" + + limit: Optional[int] = 20 + """Number of fine-tuning jobs to retrieve.""" + + +class FineTuningJobList(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/list + object: str = "list" + """The object type, which is always "list". + + This indicates that the returned data is a list of fine-tuning jobs. + """ + + data: List[FineTuningJob] + """A list containing FineTuningJob objects.""" + + has_more: bool + """Indicates whether there are more fine-tuning jobs beyond the current list. + + If true, additional requests can be made to retrieve more jobs. + """ diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 6a4e55d4c..1a29aa329 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -1,13 +1,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Optional +from typing import Any, Dict, List, Optional, Union import numpy as np from docarray import BaseDoc, DocList from docarray.documents import AudioDoc -from docarray.typing import AudioUrl -from pydantic import Field, conint, conlist +from docarray.typing import AudioUrl, ImageUrl +from pydantic import Field, conint, conlist, field_validator class TopologyInfo: @@ -17,7 +17,30 @@ class TopologyInfo: class TextDoc(BaseDoc, TopologyInfo): - text: str + text: str = None + + +class ImageDoc(BaseDoc): + url: Optional[ImageUrl] = Field( + description="The path to the image. It can be remote (Web) URL, or a local file path", + default=None, + ) + base64_image: Optional[str] = Field( + description="The base64-based encoding of the image", + default=None, + ) + + +class TextImageDoc(BaseDoc): + image: ImageDoc = None + text: TextDoc = None + + +MultimodalDoc = Union[ + TextDoc, + ImageDoc, + TextImageDoc, +] class Base64ByteStrDoc(BaseDoc): @@ -43,6 +66,18 @@ class EmbedDoc(BaseDoc): score_threshold: float = 0.2 +class EmbedMultimodalDoc(EmbedDoc): + # extend EmbedDoc with these attributes + url: Optional[ImageUrl] = Field( + description="The path to the image. It can be remote (Web) URL, or a local file path.", + default=None, + ) + base64_image: Optional[str] = Field( + description="The base64-based encoding of the image.", + default=None, + ) + + class Audio2TextDoc(AudioDoc): url: Optional[AudioUrl] = Field( description="The path to the audio.", @@ -67,6 +102,10 @@ class Config: json_encoders = {np.ndarray: lambda x: x.tolist()} +class SearchedMultimodalDoc(SearchedDoc): + metadata: List[Dict[str, Any]] + + class GeneratedDoc(BaseDoc): text: str prompt: str @@ -88,6 +127,30 @@ class LLMParamsDoc(BaseDoc): repetition_penalty: float = 1.03 streaming: bool = True + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + documents: Optional[Union[List[Dict[str, str]], List[str]]] = Field( + default=[], + description=( + "A list of dicts representing documents that will be accessible to " + "the model if it is performing RAG (retrieval-augmented generation)." + " If the template does not support RAG, this argument will have no " + "effect. We recommend that each document should be a dict containing " + '"title" and "text" keys.' + ), + ) + + @field_validator("chat_template") + def chat_template_must_contain_variables(cls, v): + return v + class LLMParams(BaseDoc): max_new_tokens: int = 1024 @@ -98,6 +161,16 @@ class LLMParams(BaseDoc): repetition_penalty: float = 1.03 streaming: bool = True + chat_template: Optional[str] = Field( + default=None, + description=( + "A template to use for this conversion. " + "If this is not passed, the model's default chat template will be " + "used instead. We recommend that the template contains {context} and {question} for rag," + "or only contains {question} for chat completion without rag." + ), + ) + class RAGASParams(BaseDoc): questions: DocList[TextDoc] @@ -130,3 +203,17 @@ class LVMDoc(BaseDoc): image: str prompt: str max_new_tokens: conint(ge=0, le=1024) = 512 + top_k: int = 10 + top_p: float = 0.95 + typical_p: float = 0.95 + temperature: float = 0.01 + repetition_penalty: float = 1.03 + streaming: bool = False + + +class LVMVideoDoc(BaseDoc): + video_url: str + chunk_start: float + chunk_duration: float + prompt: str + max_new_tokens: conint(ge=0, le=1024) = 512 diff --git a/comps/cores/telemetry/README.md b/comps/cores/telemetry/README.md new file mode 100644 index 000000000..dda946647 --- /dev/null +++ b/comps/cores/telemetry/README.md @@ -0,0 +1,121 @@ +# Telemetry for OPEA + +OPEA Comps currently provides telemetry functionalities for metrics and tracing using Prometheus, Grafana, and Jaeger. Hereโ€™s a basic introduction to these tools: + +![opea telemetry](https://raw.githubusercontent.com/Spycsh/assets/main/OPEA%20Telemetry.jpg) + +## Metrics + +OPEA microservice metrics are exported in Prometheus format and are divided into two categories: general metrics and specific metrics. + +General metrics, such as `http_requests_total `, `http_request_size_bytes`, are exposed by every microservice endpoint using the [prometheus-fastapi-instrumentator](https://github.com/trallnag/prometheus-fastapi-instrumentator). + +Specific metrics are the built-in metrics exposed under `/metrics` by each specific microservices such as TGI, vLLM, TEI and others. Both types of the metrics adhere to the Prometheus format. + +### General Metrics + +To access the general metrics of each microservice, you can use `curl` as follows: + +```bash +curl localhost:{port of your service}/metrics +``` + +Then you will see Prometheus format metrics printed out as follows: + +```yaml +HELP http_requests_total Total number of requests by method, status and handler. +# TYPE http_requests_total counter +http_requests_total{handler="/metrics",method="GET",status="2xx"} 3.0 +http_requests_total{handler="/v1/chatqna",method="POST",status="2xx"} 2.0 +... +# HELP http_request_size_bytes Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. +# TYPE http_request_size_bytes summary +http_request_size_bytes_count{handler="/metrics"} 3.0 +http_request_size_bytes_sum{handler="/metrics"} 0.0 +http_request_size_bytes_count{handler="/v1/chatqna"} 2.0 +http_request_size_bytes_sum{handler="/v1/chatqna"} 128.0 +... +``` + +### Specific Metrics + +To access the metrics exposed by each specific microservice, ensure that you check the specific port and your port mapping to reach the `/metrics` endpoint correctly. + +For example, you can `curl localhost:6006/metrics` to retrieve the TEI embedding metrics, and the output should look like follows: + +```yaml +# TYPE te_embed_count counter +te_embed_count 7 + +# TYPE te_request_success counter +te_request_success{method="batch"} 2 + +# TYPE te_request_count counter +te_request_count{method="single"} 2 +te_request_count{method="batch"} 2 + +# TYPE te_embed_success counter +te_embed_success 7 + +# TYPE te_queue_size gauge +te_queue_size 0 + +# TYPE te_request_inference_duration histogram +te_request_inference_duration_bucket{le="0.000015000000000000002"} 0 +te_request_inference_duration_bucket{le="0.000022500000000000005"} 0 +te_request_inference_duration_bucket{le="0.00003375000000000001"} 0 +``` + +These metrics can be scraped by the Prometheus server into a time-series database and further visualized using Grafana. + +Below are some default metrics endpoints for specific microservices: + +| component | port | endpoint | metircs doc | +| ------------- | ----- | -------- | ------------------------------------------------------------------------------------------------------- | +| TGI | 80 | /metrics | [link](https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/monitoring) | +| milvus | 9091 | /metrics | [link](https://milvus.io/docs/monitor.md) | +| vLLM | 18688 | /metrics | [link](https://docs.vllm.ai/en/v0.5.0/serving/metrics.html) | +| TEI embedding | 6006 | /metrics | [link](https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/metrics) | +| TEI reranking | 8808 | /metrics | [link](https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/metrics) | + +## Tracing + +OPEA use OpenTelemetry to trace function call stacks. To trace a function, add the `@opea_telemetry` decorator to either an async or sync function. The call stacks and time span data will be exported by OpenTelemetry. You can use Jaeger UI to visualize this tracing data. + +By default, tracing data is exported to `http://localhost:4318/v1/traces`. This endpoint can be customized by editing the `TELEMETRY_ENDPOINT` environment variable. + +```py +from comps import opea_telemetry + + +@opea_telemetry +async def your_async_func(): + pass + + +@opea_telemetry +def your_sync_func(): + pass +``` + +## Visualization + +### Visualize metrics + +Please refer to [OPEA grafana](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/grafana) to get the details of Prometheus and Grafana server setup. The Grafana dashboard JSON files are also provided under [OPEA grafana](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/grafana) to visualize the metrics. + +### Visualize tracing + +Run the following command to start the Jaeger server. + +```bash +docker run -d --rm \ +-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ +-p 16686:16686 \ +-p 4317:4317 \ +-p 4318:4318 \ +-p 9411:9411 \ +jaegertracing/all-in-one:latest +``` + +Access the dashboard UI at `localhost:16686`. diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md index 46e5e3c0f..7c3873794 100644 --- a/comps/dataprep/README.md +++ b/comps/dataprep/README.md @@ -17,22 +17,22 @@ Occasionally unstructured data will contain image data, to convert the image dat export SUMMARIZE_IMAGE_VIA_LVM=1 ``` -# Dataprep Microservice with Redis +## Dataprep Microservice with Redis For details, please refer to this [readme](redis/README.md) -# Dataprep Microservice with Milvus +## Dataprep Microservice with Milvus For details, please refer to this [readme](milvus/README.md) -# Dataprep Microservice with Qdrant +## Dataprep Microservice with Qdrant For details, please refer to this [readme](qdrant/README.md) -# Dataprep Microservice with Pinecone +## Dataprep Microservice with Pinecone For details, please refer to this [readme](pinecone/README.md) -# Dataprep Microservice with PGVector +## Dataprep Microservice with PGVector For details, please refer to this [readme](pgvector/README.md) diff --git a/comps/dataprep/milvus/README.md b/comps/dataprep/milvus/README.md index 738869a82..cc958bea0 100644 --- a/comps/dataprep/milvus/README.md +++ b/comps/dataprep/milvus/README.md @@ -1,8 +1,8 @@ # Dataprep Microservice with Milvus -# ๐Ÿš€Start Microservice with Python +## ๐Ÿš€1. Start Microservice with Python (Option 1) -## Install Requirements +### 1.1 Requirements ```bash pip install -r requirements.txt @@ -11,11 +11,11 @@ apt-get install libtesseract-dev -y apt-get install poppler-utils -y ``` -## Start Milvus Server +### 1.2 Start Milvus Server Please refer to this [readme](../../../vectorstores/langchain/milvus/README.md). -## Setup Environment Variables +### 1.3 Setup Environment Variables ```bash export no_proxy=${your_no_proxy} @@ -27,7 +27,30 @@ export COLLECTION_NAME=${your_collection_name} export MOSEC_EMBEDDING_ENDPOINT=${your_embedding_endpoint} ``` -## Start Document Preparation Microservice for Milvus with Python Script +### 1.4 Start Mosec Embedding Service + +First, you need to build a mosec embedding serving docker image. + +```bash +cd ../../.. +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . +``` + +Then start the mosec embedding server. + +```bash +your_port=6010 +docker run -d --name="embedding-mosec-endpoint" -p $your_port:8000 opea/embedding-mosec-endpoint:latest +``` + +Setup environment variables: + +```bash +export MOSEC_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export MILVUS=${your_host_ip} +``` + +### 1.5 Start Document Preparation Microservice for Milvus with Python Script Start document preparation microservice for Milvus with below command. @@ -35,22 +58,45 @@ Start document preparation microservice for Milvus with below command. python prepare_doc_milvus.py ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€2. Start Microservice with Docker (Option 2) + +### 2.1 Start Milvus Server + +Please refer to this [readme](../../../vectorstores/langchain/milvus/README.md). -## Build Docker Image +### 2.2 Build Docker Image ```bash -cd ../../../../ +cd ../../.. +# build mosec embedding docker image +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . +# build dataprep milvus docker image docker build -t opea/dataprep-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg no_proxy=$no_proxy -f comps/dataprep/milvus/docker/Dockerfile . ``` -## Run Docker with CLI +### 2.3 Setup Environment Variables + +```bash +export MOSEC_EMBEDDING_ENDPOINT="http://localhost:$your_port" +export MILVUS=${your_host_ip} +``` + +### 2.3 Run Docker with CLI (Option A) + +```bash +docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS=${MILVUS} opea/dataprep-milvus:latest +``` + +### 2.4 Run with Docker Compose (Option B) ```bash -docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${your_embedding_endpoint} -e MILVUS=${your_milvus_host_ip} opea/dataprep-milvus:latest +cd docker +docker compose -f docker-compose-dataprep-milvus.yaml up -d ``` -# Invoke Microservice +## ๐Ÿš€3. Consume Microservice + +### 3.1 Consume Upload API Once document preparation microservice for Milvus is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. @@ -65,13 +111,13 @@ curl -X POST \ http://localhost:6010/v1/dataprep ``` -You can specify chunk_size and chunk_size by the following commands. +You can specify chunk_size and chunk_size by the following commands. To avoid big chunks, pass a small chun_size like 500 as below (default 1500). ```bash curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file.pdf" \ - -F "chunk_size=1500" \ + -F "chunk_size=500" \ -F "chunk_overlap=100" \ http://localhost:6010/v1/dataprep ``` @@ -132,3 +178,70 @@ Note: If you specify "table_strategy=llm", You should first start TGI Service, p ```bash curl -X POST -H "Content-Type: application/json" -d '{"path":"/home/user/doc/your_document_name","process_table":true,"table_strategy":"hq"}' http://localhost:6010/v1/dataprep ``` + +### 3.2 Consume get_file API + +To get uploaded file structures, use the following command: + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6010/v1/dataprep/get_file +``` + +Then you will get the response JSON like this: + +```json +[ + { + "name": "uploaded_file_1.txt", + "id": "uploaded_file_1.txt", + "type": "File", + "parent": "" + }, + { + "name": "uploaded_file_2.txt", + "id": "uploaded_file_2.txt", + "type": "File", + "parent": "" + } +] +``` + +### 3.3 Consume delete_file API + +To delete uploaded file/link, use the following command. + +The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. + +```bash +# delete link +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "https://www.ces.tech/.txt"}' \ + http://localhost:6010/v1/dataprep/delete_file + +# delete file +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "uploaded_file_1.txt"}' \ + http://localhost:6010/v1/dataprep/delete_file + +# delete all files and links, will drop the entire db collection +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "all"}' \ + http://localhost:6010/v1/dataprep/delete_file +``` + +## ๐Ÿš€4. Troubleshooting + +1. If you get errors from Mosec Embedding Endpoint like `cannot find this task, maybe it has expired` while uploading files, try to reduce the `chunk_size` in the curl command like below (the default chunk_size=1500). + + ```bash + curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file.pdf" \ + -F "chunk_size=500" \ + http://localhost:6010/v1/dataprep + ``` diff --git a/comps/dataprep/milvus/config.py b/comps/dataprep/milvus/config.py index 06aa60975..b4cb72233 100644 --- a/comps/dataprep/milvus/config.py +++ b/comps/dataprep/milvus/config.py @@ -12,7 +12,7 @@ MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus") -MOSEC_EMBEDDING_MODEL = "/root/bce-embedding-base_v1" +MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1") MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" diff --git a/comps/dataprep/milvus/docker/Dockerfile b/comps/dataprep/milvus/docker/Dockerfile index 7e2f2202b..7ce117641 100644 --- a/comps/dataprep/milvus/docker/Dockerfile +++ b/comps/dataprep/milvus/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,10 +9,9 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ - libgl1-mesa-glx \ - libjemalloc-dev \ default-jre \ - vim + libgl1-mesa-glx \ + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -37,4 +35,3 @@ USER user WORKDIR /home/user/comps/dataprep/milvus ENTRYPOINT ["python", "prepare_doc_milvus.py"] - diff --git a/comps/dataprep/milvus/docker/docker-compose-dataprep-milvus.yaml b/comps/dataprep/milvus/docker/docker-compose-dataprep-milvus.yaml new file mode 100644 index 000000000..2b4a05cff --- /dev/null +++ b/comps/dataprep/milvus/docker/docker-compose-dataprep-milvus.yaml @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3" +services: + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.4.6 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + + mosec-embedding: + image: opea/embedding-mosec-endpoint:latest + container_name: embedding-mosec-server + ports: + - "6009:8000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + dataprep-milvus: + image: opea/dataprep-milvus:latest + container_name: dataprep-milvus-server + ports: + - "6010:6010" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MOSEC_EMBEDDING_ENDPOINT: ${MOSEC_EMBEDDING_ENDPOINT} + MILVUS: ${MILVUS} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/dataprep/milvus/docker/milvus.yaml b/comps/dataprep/milvus/docker/milvus.yaml new file mode 100644 index 000000000..b9f22cb3d --- /dev/null +++ b/comps/dataprep/milvus/docker/milvus.yaml @@ -0,0 +1,811 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Licensed to the LF AI & Data foundation under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Related configuration of etcd, used to store Milvus metadata & service discovery. +etcd: + endpoints: localhost:2379 + rootPath: by-dev # The root path where data is stored in etcd + metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath + kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath + log: + level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. + # path is one of: + # - "default" as os.Stderr, + # - "stderr" as os.Stderr, + # - "stdout" as os.Stdout, + # - file path to append server logs to. + # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log + path: stdout + ssl: + enabled: false # Whether to support ETCD secure connection mode + tlsCert: /path/to/etcd-client.pem # path to your cert file + tlsKey: /path/to/etcd-client-key.pem # path to your key file + tlsCACert: /path/to/ca.pem # path to your CACert file + # TLS min version + # Optional values: 1.0, 1.1, 1.2, 1.3ใ€‚ + # We recommend using version 1.2 and above. + tlsMinVersion: 1.3 + requestTimeout: 10000 # Etcd operation timeout in milliseconds + use: + embed: false # Whether to enable embedded Etcd (an in-process EtcdServer). + data: + dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/ + auth: + enabled: false # Whether to enable authentication + userName: # username for etcd authentication + password: # password for etcd authentication + +metastore: + type: etcd # Default value: etcd, Valid values: [etcd, tikv] + +# Related configuration of tikv, used to store Milvus metadata. +# Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery. +# TiKV is a good option when the metadata size requires better horizontal scalability. +tikv: + endpoints: 127.0.0.1:2389 # Note that the default pd port of tikv is 2379, which conflicts with etcd. + rootPath: by-dev # The root path where data is stored in tikv + metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath + kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath + requestTimeout: 10000 # ms, tikv request timeout + snapshotScanSize: 256 # batch size of tikv snapshot scan + ssl: + enabled: false # Whether to support TiKV secure connection mode + tlsCert: # path to your cert file + tlsKey: # path to your key file + tlsCACert: # path to your CACert file + +localStorage: + path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ + +# Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus. +# We refer to the storage service as MinIO/S3 in the following description for simplicity. +minio: + address: localhost # Address of MinIO/S3 + port: 9000 # Port of MinIO/S3 + accessKeyID: minioadmin # accessKeyID of MinIO/S3 + secretAccessKey: minioadmin # MinIO/S3 encryption string + useSSL: false # Access to MinIO/S3 with SSL + ssl: + tlsCACert: /path/to/public.crt # path to your CACert file + bucketName: a-bucket # Bucket name in MinIO/S3 + rootPath: files # The root path where the message is stored in MinIO/S3 + # Whether to useIAM role to access S3/GCS instead of access/secret keys + # For more information, refer to + # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html + # gcp: https://cloud.google.com/storage/docs/access-control/iam + # aliyun (ack): https://www.alibabacloud.com/help/en/container-service-for-kubernetes/latest/use-rrsa-to-enforce-access-control + # aliyun (ecs): https://www.alibabacloud.com/help/en/elastic-compute-service/latest/attach-an-instance-ram-role + useIAM: false + # Cloud Provider of S3. Supports: "aws", "gcp", "aliyun". + # You can use "aws" for other cloud provider supports S3 API with signature v4, e.g.: minio + # You can use "gcp" for other cloud provider supports S3 API with signature v2 + # You can use "aliyun" for other cloud provider uses virtual host style bucket + # When useIAM enabled, only "aws", "gcp", "aliyun" is supported for now + cloudProvider: aws + # Custom endpoint for fetch IAM role credentials. when useIAM is true & cloudProvider is "aws". + # Leave it empty if you want to use AWS default endpoint + iamEndpoint: + logLevel: fatal # Log level for aws sdk log. Supported level: off, fatal, error, warn, info, debug, trace + region: # Specify minio storage system location region + useVirtualHost: false # Whether use virtual host mode for bucket + requestTimeoutMs: 10000 # minio timeout for request time in milliseconds + # The maximum number of objects requested per batch in minio ListObjects rpc, + # 0 means using oss client by default, decrease these configuration if ListObjects timeout + listObjectsMaxKeys: 0 + +# Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. +# You can change your mq by setting mq.type field. +# If you don't set mq.type field as default, there is a note about enabling priority if we config multiple mq in this file. +# 1. standalone(local) mode: rocksmq(default) > natsmq > Pulsar > Kafka +# 2. cluster mode: Pulsar(default) > Kafka (rocksmq and natsmq is unsupported in cluster mode) +mq: + # Default value: "default" + # Valid values: [default, pulsar, kafka, rocksmq, natsmq] + type: default + enablePursuitMode: true # Default value: "true" + pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds + pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes + mqBufSize: 16 # MQ client consumer buffer length + dispatcher: + mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge + targetBufSize: 16 # the length of channel buffer for targe + maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack + +# Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. +pulsar: + address: localhost # Address of pulsar + port: 6650 # Port of Pulsar + webport: 80 # Web port of pulsar, if you connect directly without proxy, should use 8080 + maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar. + tenant: public + namespace: default + requestTimeout: 60 # pulsar client global request timeout in seconds + enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path. + +# If you want to enable kafka, needs to comment the pulsar configs +# kafka: +# brokerList: +# saslUsername: +# saslPassword: +# saslMechanisms: +# securityProtocol: +# ssl: +# enabled: false # whether to enable ssl mode +# tlsCert: # path to client's public key (PEM) used for authentication +# tlsKey: # path to client's private key (PEM) used for authentication +# tlsCaCert: # file or directory path to CA certificate(s) for verifying the broker's key +# tlsKeyPassword: # private key passphrase for use with ssl.key.location and set_ssl_cert(), if any +# readTimeout: 10 + +rocksmq: + # The path where the message is stored in rocksmq + # please adjust in embedded Milvus: /tmp/milvus/rdb_data + path: /var/lib/milvus/rdb_data + lrucacheratio: 0.06 # rocksdb cache memory ratio + rocksmqPageSize: 67108864 # 64 MB, 64 * 1024 * 1024 bytes, The size of each page of messages in rocksmq + retentionTimeInMinutes: 4320 # 3 days, 3 * 24 * 60 minutes, The retention time of the message in rocksmq. + retentionSizeInMB: 8192 # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq. + compactionInterval: 86400 # 1 day, trigger rocksdb compaction every day to remove deleted data + compressionTypes: 0,0,7,7,7 # compaction compression type, only support use 0,7. 0 means not compress, 7 will use zstd. Length of types means num of rocksdb level. + +# natsmq configuration. +# more detail: https://docs.nats.io/running-a-nats-service/configuration +natsmq: + server: + port: 4222 # Port for nats server listening + storeDir: /var/lib/milvus/nats # Directory to use for JetStream storage of nats + maxFileStore: 17179869184 # Maximum size of the 'file' storage + maxPayload: 8388608 # Maximum number of bytes in a message payload + maxPending: 67108864 # Maximum number of bytes buffered for a connection Applies to client connections + initializeTimeout: 4000 # waiting for initialization of natsmq finished + monitor: + trace: false # If true enable protocol trace log messages + debug: false # If true enable debug log messages + logTime: true # If set to false, log without timestamps. + logFile: /tmp/milvus/logs/nats.log # Log file path relative to .. of milvus binary if use relative path + logSizeLimit: 536870912 # Size in bytes after the log file rolls over to a new one + retention: + maxAge: 4320 # Maximum age of any message in the P-channel + maxBytes: # How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size + maxMsgs: # How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit + +# Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests +rootCoord: + dmlChannelNum: 16 # The number of dml channels created at system startup + maxPartitionNum: 1024 # Maximum number of partitions in a collection + minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed + enableActiveStandby: false + maxDatabaseNum: 64 # Maximum number of database + maxGeneralCapacity: 65536 # upper limit for the sum of of product of partitionNumber and shardNumber + gracefulStopTimeout: 5 # seconds. force stop node without graceful stop + ip: # if not specified, use the first unicastable address + port: 53100 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +# Related configuration of proxy, used to validate client requests and reduce the returned results. +proxy: + timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick + healthCheckTimeout: 3000 # ms, the interval that to do component healthy check + msgStream: + timeTick: + bufSize: 512 + maxNameLength: 255 # Maximum length of name for a collection or alias + # Maximum number of fields in a collection. + # As of today (2.2.0 and after) it is strongly DISCOURAGED to set maxFieldNum >= 64. + # So adjust at your risk! + maxFieldNum: 64 + maxVectorFieldNum: 4 # Maximum number of vector fields in a collection. + maxShardNum: 16 # Maximum number of shards in a collection + maxDimension: 32768 # Maximum dimension of a vector + # Whether to produce gin logs.\n + # please adjust in embedded Milvus: false + ginLogging: true + ginLogSkipPaths: / # skip url path for gin log + maxTaskNum: 1024 # max task number of proxy task queue + mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection + accessLog: + enable: false # if use access log + minioEnable: false # if upload sealed access log file to minio + localPath: /tmp/milvus_access + filename: # Log filename, leave empty to use stdout. + maxSize: 64 # Max size for a single file, in MB. + cacheSize: 10240 # Size of log of memory cache, in B + rotatedTime: 0 # Max time for single access log file in seconds + remotePath: access_log/ # File path in minIO + remoteMaxTime: 0 # Max time for log file in minIO, in hours + formatters: + base: + format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost]" + query: + format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost] [database: $database_name] [collection: $collection_name] [partitions: $partition_name] [expr: $method_expr]" + methods: "Query,Search,Delete" + connectionCheckIntervalSeconds: 120 # the interval time(in seconds) for connection manager to scan inactive client info + connectionClientInfoTTLSeconds: 86400 # inactive client info TTL duration, in seconds + maxConnectionNum: 10000 # the max client info numbers that proxy should manage, avoid too many client infos + gracefulStopTimeout: 30 # seconds. force stop node without graceful stop + slowQuerySpanInSeconds: 5 # query whose executed time exceeds the `slowQuerySpanInSeconds` can be considered slow, in seconds. + http: + enabled: true # Whether to enable the http server + debug_mode: false # Whether to enable http server debug mode + port: # high-level restful api + acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64 + enablePprof: true # Whether to enable pprof middleware on the metrics port + ip: # if not specified, use the first unicastable address + port: 19530 + internalPort: 19529 + grpc: + serverMaxSendSize: 268435456 + serverMaxRecvSize: 67108864 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 67108864 + +# Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments. +queryCoord: + taskMergeCap: 1 + taskExecutionCap: 256 + autoHandoff: true # Enable auto handoff + autoBalance: true # Enable auto balance + autoBalanceChannel: true # Enable auto balance channel + balancer: ScoreBasedBalancer # auto balancer used for segments on queryNodes + globalRowCountFactor: 0.1 # the weight used when balancing segments among queryNodes + scoreUnbalanceTolerationFactor: 0.05 # the least value for unbalanced extent between from and to nodes when doing balance + reverseUnBalanceTolerationFactor: 1.3 # the largest value for unbalanced extent between from and to nodes after doing balance + overloadedMemoryThresholdPercentage: 90 # The threshold percentage that memory overload + balanceIntervalSeconds: 60 + memoryUsageMaxDifferencePercentage: 30 + rowCountFactor: 0.4 # the row count weight used when balancing segments among queryNodes + segmentCountFactor: 0.4 # the segment count weight used when balancing segments among queryNodes + globalSegmentCountFactor: 0.1 # the segment count weight used when balancing segments among queryNodes + segmentCountMaxSteps: 50 # segment count based plan generator max steps + rowCountMaxSteps: 50 # segment count based plan generator max steps + randomMaxSteps: 10 # segment count based plan generator max steps + growingRowCountWeight: 4 # the memory weight of growing segment row count + balanceCostThreshold: 0.001 # the threshold of balance cost, if the difference of cluster's cost after executing the balance plan is less than this value, the plan will not be executed + checkSegmentInterval: 1000 + checkChannelInterval: 1000 + checkBalanceInterval: 10000 + checkIndexInterval: 10000 + channelTaskTimeout: 60000 # 1 minute + segmentTaskTimeout: 120000 # 2 minute + distPullInterval: 500 + collectionObserverInterval: 200 + checkExecutedFlagInterval: 100 + heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available + loadTimeoutSeconds: 600 + distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds + heatbeatWarningLag: 5000 # the lag value for querycoord report warning when last heartbeat is too old, in milliseconds + checkHandoffInterval: 5000 + enableActiveStandby: false + checkInterval: 1000 + checkHealthInterval: 3000 # 3s, the interval when query coord try to check health of query node + checkHealthRPCTimeout: 2000 # 100ms, the timeout of check health rpc to query node + brokerTimeout: 5000 # 5000ms, querycoord broker rpc timeout + collectionRecoverTimes: 3 # if collection recover times reach the limit during loading state, release it + observerTaskParallel: 16 # the parallel observer dispatcher task number + checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config + checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session + gracefulStopTimeout: 5 # seconds. force stop node without graceful stop + enableStoppingBalance: true # whether enable stopping balance + channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode + cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds + ip: # if not specified, use the first unicastable address + port: 19531 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +# Related configuration of queryNode, used to run hybrid search between vector and scalar data. +queryNode: + stats: + publishInterval: 1000 # Interval for querynode to report node information (milliseconds) + segcore: + knowhereThreadPoolNumRatio: 4 # The number of threads in knowhere's thread pool. If disk is enabled, the pool size will multiply with knowhereThreadPoolNumRatio([1, 32]). + chunkRows: 128 # The number of vectors in a chunk. + interimIndex: + enableIndex: true # Enable segment build with index to accelerate vector search when segment is in growing or binlog. + nlist: 128 # temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8 + nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist + memExpansionRate: 1.15 # extra memory needed by building interim index + buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic + loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments + enableDisk: false # enable querynode load disk index, and search on disk index + maxDiskUsagePercentage: 95 + cache: + enabled: true + memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 + readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` + # options: async, sync, disable. + # Specifies the necessity for warming up the chunk cache. + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the + # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency + # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable + mmap: + mmapEnabled: false # Enable mmap for loading data + lazyload: + enabled: false # Enable lazyload for loading data + waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve + requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default + requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default + maxRetryTimes: 1 # max retry times for lazy load, 1 by default + maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default + grouping: + enabled: true + maxNQ: 1000 + topKMergeRatio: 20 + scheduler: + receiveChanSize: 10240 + unsolvedQueueSize: 10240 + # maxReadConcurrentRatio is the concurrency ratio of read task (search task and query task). + # Max read concurrency would be the value of hardware.GetCPUNum * maxReadConcurrentRatio. + # It defaults to 2.0, which means max read concurrency would be the value of hardware.GetCPUNum * 2. + # Max read concurrency must greater than or equal to 1, and less than or equal to hardware.GetCPUNum * 100. + # (0, 100] + maxReadConcurrentRatio: 1 + cpuRatio: 10 # ratio used to estimate read task cpu usage. + maxTimestampLag: 86400 + scheduleReadPolicy: + # fifo: A FIFO queue support the schedule. + # user-task-polling: + # The user's tasks will be polled one by one and scheduled. + # Scheduling is fair on task granularity. + # The policy is based on the username for authentication. + # And an empty username is considered the same user. + # When there are no multi-users, the policy decay into FIFO" + name: fifo + taskQueueExpire: 60 # Control how long (many seconds) that queue retains since queue is empty + enableCrossUserGrouping: false # Enable Cross user grouping when using user-task-polling policy. (Disable it if user's task can not merge each other) + maxPendingTaskPerUser: 1024 # Max pending task per user in scheduler + dataSync: + flowGraph: + maxQueueLength: 16 # Maximum length of task queue in flowgraph + maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph + enableSegmentPrune: false # use partition prune function on shard delegator + ip: # if not specified, use the first unicastable address + port: 21123 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +indexCoord: + bindIndexNodeMode: + enable: false + address: localhost:22930 + withCred: false + nodeID: 0 + segment: + minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed + +indexNode: + scheduler: + buildParallel: 1 + enableDisk: true # enable index node build disk vector index + maxDiskUsagePercentage: 95 + ip: # if not specified, use the first unicastable address + port: 21121 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +dataCoord: + channel: + watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. + balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch + legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels + balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing + balanceInterval: 360 # The interval with which the channel manager check dml channel balance status + checkInterval: 1 # The interval in seconds with which the channel manager advances channel states + notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds). + segment: + maxSize: 1024 # Maximum size of a segment in MB + diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index + sealProportion: 0.12 + assignmentExpiration: 2000 # The time of the assignment expiration in ms + allocLatestExpireAttempt: 200 # The time attempting to alloc latest lastExpire from rootCoord after restart + maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60 + # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than + # minSizeFromIdleToSealed, Milvus will automatically seal it. + # The max idle time of segment in seconds, 10*60. + maxIdleTime: 600 + minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed. + # The max number of binlog file for one segment, the segment will be sealed if + # the number of binlog file reaches to max value. + maxBinlogFileNumber: 32 + smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than + # (smallProportion * segment max # of rows). + # A compaction will happen on small segments if the segment after compaction will have + compactableProportion: 0.85 + # over (compactableProportion * segment max # of rows) rows. + # MUST BE GREATER THAN OR EQUAL TO !!! + # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%. + expansionRate: 1.25 + autoUpgradeSegmentIndex: false # whether auto upgrade segment index to index engine's version + enableCompaction: true # Enable data segment compaction + compaction: + enableAutoCompaction: true + indexBasedCompaction: true + rpcTimeout: 10 + maxParallelTaskNum: 10 + workerMaxParallelTaskNum: 2 + levelzero: + forceTrigger: + minSize: 8388608 # The minimum size in bytes to force trigger a LevelZero Compaction, default as 8MB + maxSize: 67108864 # The maxmum size in bytes to force trigger a LevelZero Compaction, default as 64MB + deltalogMinNum: 10 # The minimum number of deltalog files to force trigger a LevelZero Compaction + deltalogMaxNum: 30 # The maxmum number of deltalog files to force trigger a LevelZero Compaction, default as 30 + enableGarbageCollection: true + gc: + interval: 3600 # gc interval in seconds + missingTolerance: 86400 # file meta missing tolerance duration in seconds, default to 24hr(1d) + dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 3600 + removeConcurrent: 32 # number of concurrent goroutines to remove dropped s3 objects + scanInterval: 168 # garbage collection scan residue interval in hours + enableActiveStandby: false + brokerTimeout: 5000 # 5000ms, dataCoord broker rpc timeout + autoBalance: true # Enable auto balance + checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config + import: + filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task. + taskRetention: 10800 # The retention period in seconds for tasks in the Completed or Failed state. + maxSizeInMBPerImportTask: 6144 # To prevent generating of small segments, we will re-group imported files. This parameter represents the sum of file sizes in each group (each ImportTask). + scheduleInterval: 2 # The interval for scheduling import, measured in seconds. + checkIntervalHigh: 2 # The interval for checking import, measured in seconds, is set to a high frequency for the import checker. + checkIntervalLow: 120 # The interval for checking import, measured in seconds, is set to a low frequency for the import checker. + maxImportFileNumPerReq: 1024 # The maximum number of files allowed per single import request. + waitForIndex: true # Indicates whether the import operation waits for the completion of index building. + gracefulStopTimeout: 5 # seconds. force stop node without graceful stop + ip: # if not specified, use the first unicastable address + port: 13333 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +dataNode: + dataSync: + flowGraph: + maxQueueLength: 16 # Maximum length of task queue in flowgraph + maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph + maxParallelSyncMgrTasks: 256 # The max concurrent sync task number of datanode sync mgr globally + skipMode: + enable: true # Support skip some timetick message to reduce CPU usage + skipNum: 4 # Consume one for every n records skipped + coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds + segment: + insertBufSize: 16777216 # Max buffer size to flush for a single segment. + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB + syncPeriod: 600 # The period to sync segments if buffer is not empty. + memory: + forceSyncEnable: true # Set true to force sync if memory usage is too high + forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced. + checkInterval: 3000 # the interval to check datanode memory usage, in milliseconds + forceSyncWatermark: 0.5 # memory watermark for standalone, upon reaching this watermark, segments will be synced. + timetick: + byRPC: true + interval: 500 + channel: + # specify the size of global work pool of all channels + # if this parameter <= 0, will set it as the maximum number of CPUs that can be executing + # suggest to set it bigger on large collection numbers to avoid blocking + workPoolSize: -1 + # specify the size of global work pool for channel checkpoint updating + # if this parameter <= 0, will set it as 10 + updateChannelCheckpointMaxParallel: 10 + updateChannelCheckpointInterval: 60 # the interval duration(in seconds) for datanode to update channel checkpoint of each channel + updateChannelCheckpointRPCTimeout: 20 # timeout in seconds for UpdateChannelCheckpoint RPC call + maxChannelCheckpointsPerPRC: 128 # The maximum number of channel checkpoints per UpdateChannelCheckpoint RPC. + channelCheckpointUpdateTickInSeconds: 10 # The frequency, in seconds, at which the channel checkpoint updater executes updates. + import: + maxConcurrentTaskNum: 16 # The maximum number of import/pre-import tasks allowed to run concurrently on a datanode. + maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files. + readBufferSizeInMB: 16 # The data block size (in MB) read from chunk manager by the datanode during import. + compaction: + levelZeroBatchMemoryRatio: 0.05 # The minimal memory ratio of free memory for level zero compaction executing in batch mode + gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop + ip: # if not specified, use the first unicastable address + port: 21124 + grpc: + serverMaxSendSize: 536870912 + serverMaxRecvSize: 268435456 + clientMaxSendSize: 268435456 + clientMaxRecvSize: 536870912 + +# Configures the system log output. +log: + level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. + file: + rootPath: # root dir path to put logs, default "" means no log file will print. please adjust in embedded Milvus: /tmp/milvus/logs + maxSize: 300 # MB + maxAge: 10 # Maximum time for log retention in day. + maxBackups: 20 + format: text # text or json + stdout: true # Stdout enable or not + +grpc: + log: + level: WARNING + gracefulStopTimeout: 10 # second, time to wait graceful stop finish + client: + compressionEnabled: false + dialTimeout: 200 + keepAliveTime: 10000 + keepAliveTimeout: 20000 + maxMaxAttempts: 10 + initialBackoff: 0.2 + maxBackoff: 10 + minResetInterval: 1000 + maxCancelError: 32 + minSessionCheckInterval: 200 + +# Configure the proxy tls enable. +tls: + serverPemPath: configs/cert/server.pem + serverKeyPath: configs/cert/server.key + caPemPath: configs/cert/ca.pem + +common: + defaultPartitionName: _default # default partition name for a collection + defaultIndexName: _default_idx # default index name + entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire + indexSliceSize: 16 # MB + threadCoreCoefficient: + highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority pool + middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority pool + lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority pool + buildIndexThreadPoolRatio: 0.75 + DiskIndex: + MaxDegree: 56 + SearchListSize: 100 + PQCodeBudgetGBRatio: 0.125 + BuildNumThreadsRatio: 1 + SearchCacheBudgetGBRatio: 0.1 + LoadNumThreadRatio: 8 + BeamWidthRatio: 4 + gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency. + gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time. + storageType: remote # please adjust in embedded Milvus: local, available values are [local, remote, opendal], value minio is deprecated, use remote instead + # Default value: auto + # Valid values: [auto, avx512, avx2, avx, sse4_2] + # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building. + simdType: auto + security: + authorizationEnabled: false + # The superusers will ignore some system check processes, + # like the old password verification when updating the credential + superUsers: + tlsMode: 0 + session: + ttl: 30 # ttl value when session granting a lease to register service + retryTimes: 30 # retry times when session sending etcd requests + locks: + metrics: + enable: false # whether gather statistics for metrics locks + threshold: + info: 500 # minimum milliseconds for printing durations in info level + warn: 1000 # minimum milliseconds for printing durations in warn level + storage: + scheme: s3 + enablev2: false + ttMsgEnabled: true # Whether the instance disable sending ts messages + traceLogMode: 0 # trace request info + bloomFilterSize: 100000 # bloom filter initial size + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter + +# QuotaConfig, configurations of Milvus quota and limits. +# By default, we enable: +# 1. TT protection; +# 2. Memory protection. +# 3. Disk quota protection. +# You can enable: +# 1. DML throughput limitation; +# 2. DDL, DQL qps/rps limitation; +# 3. DQL Queue length/latency protection; +# 4. DQL result rate protection; +# If necessary, you can also manually force to deny RW requests. +quotaAndLimits: + enabled: true # `true` to enable quota and limits, `false` to disable. + # quotaCenterCollectInterval is the time interval that quotaCenter + # collects metrics from Proxies, Query cluster and Data cluster. + # seconds, (0 ~ 65536) + quotaCenterCollectInterval: 3 + ddl: + enabled: false + collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection + partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition + db: + collectionRate: -1 # qps of db level , default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection + partitionRate: -1 # qps of db level, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition + indexRate: + enabled: false + max: -1 # qps, default no limit, rate for CreateIndex, DropIndex + db: + max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex + flushRate: + enabled: true + max: -1 # qps, default no limit, rate for flush + collection: + max: 0.1 # qps, default no limit, rate for flush at collection level. + db: + max: -1 # qps of db level, default no limit, rate for flush + compactionRate: + enabled: false + max: -1 # qps, default no limit, rate for manualCompaction + db: + max: -1 # qps of db level, default no limit, rate for manualCompaction + dml: + # dml limit rates, default no limit. + # The maximum rate will not be greater than max. + enabled: false + insertRate: + max: -1 # MB/s, default no limit + db: + max: -1 # MB/s, default no limit + collection: + max: -1 # MB/s, default no limit + partition: + max: -1 # MB/s, default no limit + upsertRate: + max: -1 # MB/s, default no limit + db: + max: -1 # MB/s, default no limit + collection: + max: -1 # MB/s, default no limit + partition: + max: -1 # MB/s, default no limit + deleteRate: + max: -1 # MB/s, default no limit + db: + max: -1 # MB/s, default no limit + collection: + max: -1 # MB/s, default no limit + partition: + max: -1 # MB/s, default no limit + bulkLoadRate: + max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate + db: + max: -1 # MB/s, default no limit, not support yet. TODO: limit db bulkLoad rate + collection: + max: -1 # MB/s, default no limit, not support yet. TODO: limit collection bulkLoad rate + partition: + max: -1 # MB/s, default no limit, not support yet. TODO: limit partition bulkLoad rate + dql: + # dql limit rates, default no limit. + # The maximum rate will not be greater than max. + enabled: false + searchRate: + max: -1 # vps (vectors per second), default no limit + db: + max: -1 # vps (vectors per second), default no limit + collection: + max: -1 # vps (vectors per second), default no limit + partition: + max: -1 # vps (vectors per second), default no limit + queryRate: + max: -1 # qps, default no limit + db: + max: -1 # qps, default no limit + collection: + max: -1 # qps, default no limit + partition: + max: -1 # qps, default no limit + limits: + maxCollectionNum: 65536 + maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit + maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes + limitWriting: + # forceDeny false means dml requests are allowed (except for some + # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests. + forceDeny: false + ttProtection: + enabled: false + # maxTimeTickDelay indicates the backpressure for DML Operations. + # DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay, + # if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected. + # seconds + maxTimeTickDelay: 300 + memProtection: + # When memory usage > memoryHighWaterLevel, all dml requests would be rejected; + # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate; + # When memory usage < memoryLowWaterLevel, no action. + enabled: true + dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes + dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes + queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes + queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes + growingSegmentsSizeProtection: + # No action will be taken if the growing segments size is less than the low watermark. + # When the growing segments size exceeds the low watermark, the dml rate will be reduced, + # but the rate will not be lower than minRateRatio * dmlRate. + enabled: false + minRateRatio: 0.5 + lowWaterLevel: 0.2 + highWaterLevel: 0.4 + diskProtection: + enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected; + diskQuota: -1 # MB, (0, +inf), default no limit + diskQuotaPerDB: -1 # MB, (0, +inf), default no limit + diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit + diskQuotaPerPartition: -1 # MB, (0, +inf), default no limit + limitReading: + # forceDeny false means dql requests are allowed (except for some + # specific conditions, such as collection has been dropped), true means always reject all dql requests. + forceDeny: false + queueProtection: + enabled: false + # nqInQueueThreshold indicated that the system was under backpressure for Search/Query path. + # If NQ in any QueryNode's queue is greater than nqInQueueThreshold, search&query rates would gradually cool off + # until the NQ in queue no longer exceeds nqInQueueThreshold. We think of the NQ of query request as 1. + # int, default no limit + nqInQueueThreshold: -1 + # queueLatencyThreshold indicated that the system was under backpressure for Search/Query path. + # If dql latency of queuing is greater than queueLatencyThreshold, search&query rates would gradually cool off + # until the latency of queuing no longer exceeds queueLatencyThreshold. + # The latency here refers to the averaged latency over a period of time. + # milliseconds, default no limit + queueLatencyThreshold: -1 + resultProtection: + enabled: false + # maxReadResultRate indicated that the system was under backpressure for Search/Query path. + # If dql result rate is greater than maxReadResultRate, search&query rates would gradually cool off + # until the read result rate no longer exceeds maxReadResultRate. + # MB/s, default no limit + maxReadResultRate: -1 + maxReadResultRatePerDB: -1 + maxReadResultRatePerCollection: -1 + # colOffSpeed is the speed of search&query rates cool off. + # (0, 1] + coolOffSpeed: 0.9 + +trace: + # trace exporter type, default is stdout, + # optional values: ['noop','stdout', 'jaeger', 'otlp'] + exporter: noop + # fraction of traceID based sampler, + # optional values: [0, 1] + # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. + sampleFraction: 0 + jaeger: + url: # when exporter is jaeger should set the jaeger's URL + otlp: + endpoint: # example: "127.0.0.1:4318" + secure: true + +#when using GPU indexing, Milvus will utilize a memory pool to avoid frequent memory allocation and deallocation. +#here, you can set the size of the memory occupied by the memory pool, with the unit being MB. +#note that there is a possibility of Milvus crashing when the actual memory demand exceeds the value set by maxMemSize. +#if initMemSize and MaxMemSize both set zero, +#milvus will automatically initialize half of the available GPU memory, +#maxMemSize will the whole available GPU memory. +gpu: + initMemSize: # Gpu Memory Pool init size + maxMemSize: # Gpu Memory Pool Max size diff --git a/comps/dataprep/milvus/prepare_doc_milvus.py b/comps/dataprep/milvus/prepare_doc_milvus.py index 143010c06..38ad4ef42 100644 --- a/comps/dataprep/milvus/prepare_doc_milvus.py +++ b/comps/dataprep/milvus/prepare_doc_milvus.py @@ -22,12 +22,12 @@ from langchain_core.documents import Document from langchain_milvus.vectorstores import Milvus from langchain_text_splitters import HTMLHeaderTextSplitter -from langsmith import traceable from pyspark import SparkConf, SparkContext -from comps import DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, opea_microservices, register_microservice from comps.dataprep.utils import ( create_upload_folder, + decode_filename, document_loader, encode_filename, get_file_structure, @@ -38,6 +38,9 @@ save_content_to_local_disk, ) +logger = CustomLogger("prepare_doc_milvus") +logflag = os.getenv("LOGFLAG", False) + # workaround notes: cp comps/dataprep/utils.py ./milvus/utils.py # from utils import document_loader, get_tables_result, parse_html index_params = {"index_type": "FLAT", "metric_type": "IP", "params": {}} @@ -70,11 +73,49 @@ def empty_embedding() -> List[float]: return [e if e is not None else empty_embedding() for e in batched_embeddings] -def ingest_data_to_milvus(doc_path: DocPath): +def ingest_chunks_to_milvus(file_name: str, chunks: List, embedder): + if logflag: + logger.info(f"[ ingest chunks ] file name: {file_name}") + + # insert documents to Milvus + insert_docs = [] + for chunk in chunks: + insert_docs.append(Document(page_content=chunk, metadata={partition_field_name: file_name})) + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + + for i in range(0, num_chunks, batch_size): + if logflag: + logger.info(f"[ ingest chunks ] Current batch: {i}") + batch_docs = insert_docs[i : i + batch_size] + + try: + _ = Milvus.from_documents( + batch_docs, + embedder, + collection_name=COLLECTION_NAME, + connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, + partition_key_field=partition_field_name, + ) + except Exception as e: + if logflag: + logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") + + if logflag: + logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.") + + return True + + +def ingest_data_to_milvus(doc_path: DocPath, embedder): """Ingest document to Milvus.""" path = doc_path.path file_name = path.split("/")[-1] - print(f"[ ingest data ] Parsing document {path}, file name: {file_name}.") + if logflag: + logger.info(f"[ ingest data ] Parsing document {path}, file name: {file_name}.") if path.endswith(".html"): headers_to_split_on = [ @@ -85,192 +126,314 @@ def ingest_data_to_milvus(doc_path: DocPath): text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) else: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), ) content = document_loader(path) - chunks = text_splitter.split_text(content) + if logflag: + logger.info("[ ingest data ] file content loaded") + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + if doc_path.process_table and path.endswith(".pdf"): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks - print("[ ingest data ] Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + if logflag: + logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the original file.") - # Create vectorstore - if MOSEC_EMBEDDING_ENDPOINT: - # create embeddings using MOSEC endpoint service - print( - f"[ ingest data ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" - ) - embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) - elif TEI_EMBEDDING_ENDPOINT: - # create embeddings using TEI endpoint service - print(f"[ ingest data ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) - else: - # create embeddings using local embedding model - print(f"[ ingest data ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") - embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) + return ingest_chunks_to_milvus(file_name, chunks, embedder) - # insert documents to Milvus - insert_docs = [] - for chunk in chunks: - insert_docs.append(Document(page_content=chunk, metadata={partition_field_name: file_name})) - try: - _ = Milvus.from_documents( - insert_docs, - embedder, - collection_name=COLLECTION_NAME, - connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, - partition_key_field=partition_field_name, - ) - except Exception as e: - print(f"[ ingest data ] fail to ingest data into Milvus. error: {e}") - return False +def search_by_file(collection, file_name): + query = f"{partition_field_name} == '{file_name}'" + results = collection.query( + expr=query, + output_fields=[partition_field_name, "pk"], + ) + if logflag: + logger.info(f"[ search by file ] searched by {file_name}") + logger.info(f"[ search by file ] {len(results)} results: {results}") + return results - print(f"[ ingest data ] Docs ingested from {path} to Milvus collection {COLLECTION_NAME}.") - return True +def search_all(collection): + results = collection.query(expr="pk >= 0", output_fields=[partition_field_name, "pk"]) + if logflag: + logger.info(f"[ search all ] {len(results)} results: {results}") + return results -async def ingest_link_to_milvus(link_list: List[str]): - # Create vectorstore - if MOSEC_EMBEDDING_ENDPOINT: - # create embeddings using MOSEC endpoint service - print(f"MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT},MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}") - embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) - elif TEI_EMBEDDING_ENDPOINT: - # create embeddings using TEI endpoint service - print(f"TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") - embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) - else: - # create embeddings using local embedding model - print(f"Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") - embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) +def delete_all_data(my_milvus): + if logflag: + logger.info("[ delete all ] deleting all data in milvus") + if my_milvus.col: + my_milvus.col.drop() + if logflag: + logger.info("[ delete all ] delete success: all data") - for link in link_list: - content = parse_html([link])[0][0] - print(f"[ ingest link ] link: {link} content: {content}") - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - print(f"[ ingest link ] save_path: {save_path}") - await save_content_to_local_disk(save_path, content) - - document = Document(page_content=content, metadata={partition_field_name: encoded_link + ".txt"}) - _ = Milvus.from_documents( - document, - embedder, - collection_name=COLLECTION_NAME, - connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, - partition_key_field=partition_field_name, - ) + +def delete_by_partition_field(my_milvus, partition_field): + if logflag: + logger.info(f"[ delete partition ] deleting {partition_field_name} {partition_field}") + pks = my_milvus.get_pks(f'{partition_field_name} == "{partition_field}"') + if logflag: + logger.info(f"[ delete partition ] target pks: {pks}") + res = my_milvus.delete(pks) + my_milvus.col.flush() + if logflag: + logger.info(f"[ delete partition ] delete success: {res}") @register_microservice(name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep", host="0.0.0.0", port=6010) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None), - chunk_size: int = Form(1500), + chunk_size: int = Form(1000), chunk_overlap: int = Form(100), process_table: bool = Form(False), table_strategy: str = Form("fast"), ): - print(f"files:{files}") - print(f"link_list:{link_list}") + if logflag: + logger.info(f"[ upload ] files:{files}") + logger.info(f"[ upload ] link_list:{link_list}") + if files and link_list: raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") + # Create vectorstore + if MOSEC_EMBEDDING_ENDPOINT: + # create embeddings using MOSEC endpoint service + if logflag: + logger.info( + f"[ upload ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" + ) + embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) + elif TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ upload ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ upload ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") + embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) + + # define Milvus obj + my_milvus = Milvus( + embedding_function=embedder, + collection_name=COLLECTION_NAME, + connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, + index_params=index_params, + auto_id=True, + ) + if files: if not isinstance(files, list): files = [files] uploaded_files = [] + for file in files: - save_path = upload_folder + file.filename + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + if logflag: + logger.info(f"[ upload ] processing file {save_path}") + + if my_milvus.col: + # check whether the file is already uploaded + try: + search_res = search_by_file(my_milvus.col, encode_file) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}." + ) + if len(search_res) > 0: + if logflag: + logger.info(f"[ upload ] File {file.filename} already exists.") + raise HTTPException( + status_code=400, + detail=f"Uploaded file {file.filename} already exists. Please change file name.", + ) + await save_content_to_local_disk(save_path, file) + ingest_data_to_milvus( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ), + embedder, + ) uploaded_files.append(save_path) - print(f"Successfully saved file {save_path}") - - def process_files_wrapper(files): - if not isinstance(files, list): - files = [files] - for file in files: - assert ingest_data_to_milvus( - DocPath( - path=file, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, + if logflag: + logger.info(f"Saved file {save_path} into local disk.") + + # def process_files_wrapper(files): + # if not isinstance(files, list): + # files = [files] + # for file in files: + # encode_file = encode_filename(file.filename) + # save_path = upload_folder + encode_file + # ingest_data_to_milvus( + # DocPath( + # path=save_path, + # chunk_size=chunk_size, + # chunk_overlap=chunk_overlap, + # process_table=process_table, + # table_strategy=table_strategy, + # ), + # embedder + # ) + + # try: + # # Create a SparkContext + # conf = SparkConf().setAppName("Parallel-dataprep").setMaster("local[*]") + # sc = SparkContext(conf=conf) + # # Create an RDD with parallel processing + # parallel_num = min(len(uploaded_files), os.cpu_count()) + # rdd = sc.parallelize(uploaded_files, parallel_num) + # print(uploaded_files) + # # Perform a parallel operation + # rdd_trans = rdd.map(process_files_wrapper) + # rdd_trans.collect() + # # Stop the SparkContext + # sc.stop() + # except: + # # Stop the SparkContext + # sc.stop() + results = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(results) + return results + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + + for link in link_list: + encoded_link = encode_filename(link) + if logflag: + logger.info(f"[ upload ] processing link {encoded_link}") + + # check whether the link file already exists + if my_milvus.col: + try: + search_res = search_by_file(my_milvus.col, encoded_link + ".txt") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for link {link}.") + if len(search_res) > 0: + if logflag: + logger.info(f"[ upload ] Link {link} already exists.") + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change link." ) - ) - try: - # Create a SparkContext - conf = SparkConf().setAppName("Parallel-dataprep").setMaster("local[*]") - sc = SparkContext(conf=conf) - # Create an RDD with parallel processing - parallel_num = min(len(uploaded_files), os.cpu_count()) - rdd = sc.parallelize(uploaded_files, parallel_num) - # Perform a parallel operation - rdd_trans = rdd.map(process_files_wrapper) - rdd_trans.collect() - # Stop the SparkContext - sc.stop() - except: - # Stop the SparkContext - sc.stop() + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + await save_content_to_local_disk(save_path, content) + ingest_data_to_milvus( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ), + embedder, + ) + if logflag: + logger.info(f"[ upload ] Successfully saved link list {link_list}") return {"status": 200, "message": "Data preparation succeeded"} - if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - await ingest_link_to_milvus(link_list) - print(f"Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") - raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") @register_microservice( - name="opea_service@prepare_doc_milvus_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6011 + name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6010 ) -@traceable(run_type="tool") async def rag_get_file_structure(): - print("[ dataprep - get file ] start to get file structure") + if logflag: + logger.info("[ get ] start to get file structure") - if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") + # Create vectorstore + if MOSEC_EMBEDDING_ENDPOINT: + # create embeddings using MOSEC endpoint service + if logflag: + logger.info( + f"[ get ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" + ) + embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) + elif TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + if logflag: + logger.info(f"[ get ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + if logflag: + logger.info(f"[ get ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") + embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) + + # define Milvus obj + my_milvus = Milvus( + embedding_function=embedder, + collection_name=COLLECTION_NAME, + connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, + index_params=index_params, + auto_id=True, + ) + + # collection does not exist + if not my_milvus.col: + logger.info(f"[ get ] collection {COLLECTION_NAME} does not exist.") return [] - file_content = get_file_structure(upload_folder) - return file_content + # get all files from db + try: + all_data = search_all(my_milvus.col) + except Exception as e: + raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.") + # return [] if no data in db + if len(all_data) == 0: + return [] -def delete_all_data(my_milvus): - print("[ delete ] deleting all data in milvus") - my_milvus.delete(expr="pk >= 0") - my_milvus.col.flush() - print("[ delete ] delete success: all data") + res_file = [res["filename"] for res in all_data] + unique_list = list(set(res_file)) + if logflag: + logger.info(f"[ get ] unique list from db: {unique_list}") + # construct result file list in format + file_list = [] + for file_name in unique_list: + file_dict = { + "name": decode_filename(file_name), + "id": decode_filename(file_name), + "type": "File", + "parent": "", + } + file_list.append(file_dict) -def delete_by_partition_field(my_milvus, partition_field): - print(f"[ delete ] deleting {partition_field_name} {partition_field}") - pks = my_milvus.get_pks(f'{partition_field_name} == "{partition_field}"') - print(f"[ delete ] target pks: {pks}") - res = my_milvus.delete(pks) - my_milvus.col.flush() - print(f"[ delete ] delete success: {res}") + if logflag: + logger.info(f"[ get ] final file list: {file_list}") + return file_list @register_microservice( - name="opea_service@prepare_doc_milvus_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6012 + name="opea_service@prepare_doc_milvus", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6010 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -278,20 +441,26 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): - file/link path (e.g. /path/to/file.txt) - "all": delete all files uploaded """ - # create embedder obj + if logflag: + logger.info(file_path) + + # Create vectorstore if MOSEC_EMBEDDING_ENDPOINT: # create embeddings using MOSEC endpoint service - print( - f"[ dataprep - del ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT},MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" - ) + if logflag: + logger.info( + f"[ delete ] MOSEC_EMBEDDING_ENDPOINT:{MOSEC_EMBEDDING_ENDPOINT}, MOSEC_EMBEDDING_MODEL:{MOSEC_EMBEDDING_MODEL}" + ) embedder = MosecEmbeddings(model=MOSEC_EMBEDDING_MODEL) elif TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service - print(f"[ dataprep - del ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + if logflag: + logger.info(f"[ delete ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) else: # create embeddings using local embedding model - print(f"[ dataprep - del ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") + if logflag: + logger.info(f"[ delete ] Local TEI_EMBEDDING_MODEL:{TEI_EMBEDDING_MODEL}") embedder = HuggingFaceBgeEmbeddings(model_name=TEI_EMBEDDING_MODEL) # define Milvus obj @@ -305,40 +474,62 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): # delete all uploaded files if file_path == "all": - print("[ dataprep - del ] deleting all files") + if logflag: + logger.info("[ delete ] deleting all files") + delete_all_data(my_milvus) - remove_folder_with_ignore(upload_folder) - print("[ dataprep - del ] successfully delete all files.") + + # delete files on local disk + try: + remove_folder_with_ignore(upload_folder) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to delete {upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") + + if logflag: + logger.info("[ delete ] successfully delete all files.") + create_upload_folder(upload_folder) + if logflag: + logger.info("[ delete ] new upload folder created.") return {"status": True} encode_file_name = encode_filename(file_path) delete_path = Path(upload_folder + "/" + encode_file_name) - print(f"[dataprep - del] delete_path: {delete_path}") + if logflag: + logger.info(f"[delete] delete_path: {delete_path}") # partially delete files if delete_path.exists(): - # file + + # TODO: check existence before delete + + # delete file if delete_path.is_file(): - print(f"[dataprep - del] deleting file {encode_file_name}") + if logflag: + logger.info(f"[delete] deleting file {encode_file_name}") try: delete_by_partition_field(my_milvus, encode_file_name) - delete_path.unlink() - print(f"[dataprep - del] file {encode_file_name} deleted") - return {"status": True} except Exception as e: - print(f"[dataprep - del] fail to delete file {delete_path}: {e}") + if logflag: + logger.info(f"[delete] fail to delete file {delete_path}: {e}") return {"status": False} - # folder + delete_path.unlink() + if logflag: + logger.info(f"[delete] file {file_path} deleted") + return {"status": True} + + # delete folder else: - print("[dataprep - del] delete folder is not supported for now.") - return {"status": False} + if logflag: + logger.info(f"[delete] delete folder {file_path} is not supported for now.") + raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") else: raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") if __name__ == "__main__": create_upload_folder(upload_folder) + opea_microservices["opea_service@prepare_doc_milvus"].start() - opea_microservices["opea_service@prepare_doc_milvus_file"].start() - opea_microservices["opea_service@prepare_doc_milvus_del"].start() diff --git a/comps/dataprep/milvus/requirements.txt b/comps/dataprep/milvus/requirements.txt index cf088a1c0..48b8be1d9 100644 --- a/comps/dataprep/milvus/requirements.txt +++ b/comps/dataprep/milvus/requirements.txt @@ -9,7 +9,6 @@ langchain langchain-community langchain-text-splitters langchain_milvus -langsmith markdown numpy openai diff --git a/comps/dataprep/multimodal_utils.py b/comps/dataprep/multimodal_utils.py new file mode 100644 index 000000000..cd71c5fc3 --- /dev/null +++ b/comps/dataprep/multimodal_utils.py @@ -0,0 +1,258 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import json +import os +import uuid +from pathlib import Path +from typing import Iterator + +import cv2 +import requests +import webvtt +import whisper +from moviepy.editor import VideoFileClip + + +def create_upload_folder(upload_path): + """Create a directory to store uploaded video data.""" + if not os.path.exists(upload_path): + Path(upload_path).mkdir(parents=True, exist_ok=True) + + +def load_json_file(file_path): + """Read contents of json file.""" + with open(file_path, "r") as file: + data = json.load(file) + return data + + +def clear_upload_folder(upload_path): + """Clear the upload directory.""" + for root, dirs, files in os.walk(upload_path, topdown=False): + for file in files: + file_path = os.path.join(root, file) + os.remove(file_path) + for dir in dirs: + dir_path = os.path.join(root, dir) + os.rmdir(dir_path) + + +def generate_video_id(): + """Generates a unique identifier for a video file.""" + return str(uuid.uuid4()) + + +def convert_video_to_audio(video_path: str, output_audio_path: str): + """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood. + + :param video_path: file path of video file (.mp4) + :param output_audio_path: file path of audio file (.wav) to be created + """ + video_clip = VideoFileClip(video_path) + audio_clip = video_clip.audio + audio_clip.write_audiofile(output_audio_path) + video_clip.close() + audio_clip.close() + + +def load_whisper_model(model_name: str = "base"): + """Load a whisper model for generating video transcripts.""" + return whisper.load_model(model_name) + + +def extract_transcript_from_audio(whisper_model, audio_path: str): + """Generate transcript from audio file. + + :param whisper_model: a pre-loaded whisper model object + :param audio_path: file path of audio file (.wav) + """ + options = dict(task="translate", best_of=5, language="en") + return whisper_model.transcribe(audio_path, **options) + + +def format_timestamp_for_transcript(seconds: float, always_include_hours: bool = True, fractionalSeperator: str = "."): + """Format timestamp for video transcripts.""" + milliseconds = round(seconds * 1000.0) + + hours = milliseconds // 3_600_000 + milliseconds -= hours * 3_600_000 + + minutes = milliseconds // 60_000 + milliseconds -= minutes * 60_000 + + seconds = milliseconds // 1_000 + milliseconds -= seconds * 1_000 + + hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" + return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}" + + +def write_vtt(transcript: Iterator[dict], vtt_path: str): + """Write transcripts to a .vtt file.""" + with open(vtt_path, "a") as file: + file.write("WEBVTT\n\n") + for segment in transcript["segments"]: + text = (segment["text"]).replace("-->", "->") + file.write( + f"{format_timestamp_for_transcript(segment['start'])} --> {format_timestamp_for_transcript(segment['end'])}\n" + ) + file.write(f"{text.strip()}\n\n") + + +def delete_audio_file(audio_path: str): + """Delete audio file after extracting transcript.""" + os.remove(audio_path) + + +def time_to_frame(time: float, fps: float): + """Convert time in seconds into frame number.""" + return int(time * fps - 1) + + +def str2time(strtime: str): + """Get time in seconds from string.""" + strtime = strtime.strip('"') + hrs, mins, seconds = [float(c) for c in strtime.split(":")] + + total_seconds = hrs * 60**2 + mins * 60 + seconds + + return total_seconds + + +def convert_img_to_base64(image): + "Convert image to base64 string" + _, buffer = cv2.imencode(".jpg", image) + encoded_string = base64.b64encode(buffer) + return encoded_string.decode() + + +def extract_frames_and_annotations_from_transcripts(video_id: str, video_path: str, vtt_path: str, output_dir: str): + """Extract frames (.jpg) and annotations (.json) from video file (.mp4) and captions file (.vtt)""" + # Set up location to store frames and annotations + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, "frames"), exist_ok=True) + + # Load video and get fps + vidcap = cv2.VideoCapture(video_path) + fps = vidcap.get(cv2.CAP_PROP_FPS) + + # read captions file + captions = webvtt.read(vtt_path) + + annotations = [] + for idx, caption in enumerate(captions): + start_time = str2time(caption.start) + end_time = str2time(caption.end) + + mid_time = (end_time + start_time) / 2 + text = caption.text.replace("\n", " ") + + frame_no = time_to_frame(mid_time, fps) + mid_time_ms = mid_time * 1000 + vidcap.set(cv2.CAP_PROP_POS_MSEC, mid_time_ms) + success, frame = vidcap.read() + + if success: + # Save frame for further processing + img_fname = f"frame_{idx}" + img_fpath = os.path.join(output_dir, "frames", img_fname + ".jpg") + cv2.imwrite(img_fpath, frame) + + # Convert image to base64 encoded string + b64_img_str = convert_img_to_base64(frame) + + # Create annotations for frame from transcripts + annotations.append( + { + "video_id": video_id, + "video_name": os.path.basename(video_path), + "b64_img_str": b64_img_str, + "caption": text, + "time": mid_time_ms, + "frame_no": frame_no, + "sub_video_id": idx, + } + ) + + # Save transcript annotations as json file for further processing + with open(os.path.join(output_dir, "annotations.json"), "w") as f: + json.dump(annotations, f) + + vidcap.release() + return annotations + + +def use_lvm(endpoint: str, img_b64_string: str, prompt: str = "Provide a short description for this scene."): + """Generate image captions/descriptions using LVM microservice.""" + inputs = {"image": img_b64_string, "prompt": prompt, "max_new_tokens": 32} + response = requests.post(url=endpoint, data=json.dumps(inputs)) + print(response) + return response.json()["text"] + + +def extract_frames_and_generate_captions( + video_id: str, video_path: str, lvm_endpoint: str, output_dir: str, key_frame_per_second: int = 1 +): + """Extract frames (.jpg) and annotations (.json) from video file (.mp4) by generating captions using LVM microservice.""" + # Set up location to store frames and annotations + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, "frames"), exist_ok=True) + + # Load video and get fps + vidcap = cv2.VideoCapture(video_path) + fps = vidcap.get(cv2.CAP_PROP_FPS) + + annotations = [] + hop = round(fps / key_frame_per_second) + curr_frame = 0 + idx = -1 + + while True: + ret, frame = vidcap.read() + if not ret: + break + + if curr_frame % hop == 0: + idx += 1 + + mid_time = vidcap.get(cv2.CAP_PROP_POS_MSEC) + mid_time_ms = mid_time * 1000 + + frame_no = curr_frame + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # Save frame for further processing + img_fname = f"frame_{idx}" + img_fpath = os.path.join(output_dir, "frames", img_fname + ".jpg") + cv2.imwrite(img_fpath, frame) + + # Convert image to base64 encoded string + b64_img_str = convert_img_to_base64(frame) + + # Caption generation using LVM microservice + caption = use_lvm(lvm_endpoint, b64_img_str) + caption = caption.strip() + text = caption.replace("\n", " ") + + # Create annotations for frame from transcripts + annotations.append( + { + "video_id": video_id, + "video_name": os.path.basename(video_path), + "b64_img_str": b64_img_str, + "caption": text, + "time": mid_time_ms, + "frame_no": frame_no, + "sub_video_id": idx, + } + ) + + curr_frame += 1 + + # Save caption annotations as json file for further processing + with open(os.path.join(output_dir, "annotations.json"), "w") as f: + json.dump(annotations, f) + + vidcap.release() diff --git a/comps/dataprep/pgvector/README.md b/comps/dataprep/pgvector/README.md index 36b99b6eb..1a7772eb8 100644 --- a/comps/dataprep/pgvector/README.md +++ b/comps/dataprep/pgvector/README.md @@ -1,18 +1,14 @@ # Dataprep Microservice with PGVector -# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ +## ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start PGVector - -Please refer to this [readme](../../../vectorstores/langchain/pgvcetor/README.md). - -## 1.3 Setup Environment Variables +### 1.2 Setup Environment Variables ```bash export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb @@ -22,7 +18,11 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep" ``` -## 1.4 Start Document Preparation Microservice for PGVector with Python Script +### 1.3 Start PGVector + +Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md). + +### 1.4 Start Document Preparation Microservice for PGVector with Python Script Start document preparation microservice for PGVector with below command. @@ -30,13 +30,13 @@ Start document preparation microservice for PGVector with below command. python prepare_doc_pgvector.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Start PGVector +### 2.1 Start PGVector -Please refer to this [readme](../../../vectorstores/langchain/pgvector/README.md). +Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md). -## 2.2 Setup Environment Variables +### 2.2 Setup Environment Variables ```bash export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb @@ -46,27 +46,29 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/dataprep" ``` -## 2.3 Build Docker Image +### 2.3 Build Docker Image ```bash -cd comps/dataprep/langchain/pgvector/docker -docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/langchain/pgvector/docker/Dockerfile . +cd GenAIComps +docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pgvector/langchain/docker/Dockerfile . ``` -## 2.4 Run Docker with CLI (Option A) +### 2.4 Run Docker with CLI (Option A) ```bash -docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest +docker run --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest ``` -## 2.5 Run with Docker Compose (Option B) +### 2.5 Run with Docker Compose (Option B) ```bash cd comps/dataprep/langchain/pgvector/docker docker compose -f docker-compose-dataprep-pgvector.yaml up -d ``` -# ๐Ÿš€3. Consume Microservice +## ๐Ÿš€3. Consume Microservice + +### 3.1 Consume Upload API Once document preparation microservice for PGVector is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. @@ -76,3 +78,58 @@ curl -X POST \ -d '{"path":"/path/to/document"}' \ http://localhost:6007/v1/dataprep ``` + +### 3.2 Consume get_file API + +To get uploaded file structures, use the following command: + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6007/v1/dataprep/get_file +``` + +Then you will get the response JSON like this: + +```json +[ + { + "name": "uploaded_file_1.txt", + "id": "uploaded_file_1.txt", + "type": "File", + "parent": "" + }, + { + "name": "uploaded_file_2.txt", + "id": "uploaded_file_2.txt", + "type": "File", + "parent": "" + } +] +``` + +### 4.3 Consume delete_file API + +To delete uploaded file/link, use the following command. + +The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. + +```bash +# delete link +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "https://www.ces.tech/.txt"}' \ + http://localhost:6007/v1/dataprep/delete_file + +# delete file +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "uploaded_file_1.txt"}' \ + http://localhost:6007/v1/dataprep/delete_file + +# delete all files and links +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "all"}' \ + http://localhost:6007/v1/dataprep/delete_file +``` diff --git a/comps/dataprep/pgvector/langchain/docker/Dockerfile b/comps/dataprep/pgvector/langchain/docker/Dockerfile index 75e70c524..897d15564 100644 --- a/comps/dataprep/pgvector/langchain/docker/Dockerfile +++ b/comps/dataprep/pgvector/langchain/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,10 +9,9 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ - libgl1-mesa-glx \ - libjemalloc-dev \ default-jre \ - vim + libgl1-mesa-glx \ + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -38,4 +36,3 @@ USER user WORKDIR /home/user/comps/dataprep/pgvector/langchain ENTRYPOINT ["python", "prepare_doc_pgvector.py"] - diff --git a/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml b/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml index f11a88b93..d396bda3a 100644 --- a/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml +++ b/comps/dataprep/pgvector/langchain/docker/docker-compose-dataprep-pgvector.yaml @@ -16,6 +16,9 @@ services: - POSTGRES_USER=testuser - POSTGRES_PASSWORD=testpwd - POSTGRES_HOST_AUTH_METHOD=trust + - no_proxy= ${no_proxy} + - http_proxy= ${http_proxy} + - https_proxy= ${https_proxy} volumes: - ./init.sql:/docker-entrypoint-initdb.d/init.sql diff --git a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py index 9c38cbe6a..1331f3772 100644 --- a/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py +++ b/comps/dataprep/pgvector/langchain/prepare_doc_pgvector.py @@ -3,21 +3,34 @@ import json import os -import uuid from pathlib import Path from typing import List, Optional, Union +from urllib.parse import urlparse +import psycopg2 from config import CHUNK_OVERLAP, CHUNK_SIZE, EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING -from fastapi import File, Form, HTTPException, UploadFile +from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import PGVector -from langsmith import traceable -from comps import DocPath, ServiceType, opea_microservices, register_microservice, register_statistics -from comps.dataprep.utils import document_loader, get_separators, parse_html +from comps import CustomLogger, DocPath, opea_microservices, register_microservice +from comps.dataprep.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) + +logger = CustomLogger("prepare_doc_pgvector") +logflag = os.getenv("LOGFLAG", False) tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +upload_folder = "./uploaded_files/" async def save_file_to_local_disk(save_path: str, file): @@ -27,22 +40,81 @@ async def save_file_to_local_disk(save_path: str, file): content = await file.read() fout.write(content) except Exception as e: - print(f"Write file failed. Exception: {e}") + if logflag: + logger.info(f"Write file failed. Exception: {e}") raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") +def delete_embeddings(doc_name): + """Get all ids from a vectorstore.""" + try: + result = urlparse(PG_CONNECTION_STRING) + username = result.username + password = result.password + database = result.path[1:] + hostname = result.hostname + port = result.port + + connection = psycopg2.connect(database=database, user=username, password=password, host=hostname, port=port) + + # Create a cursor object to execute SQL queries + + if logflag: + logger.info(f"Deleting {doc_name} from vectorstore") + + cur = connection.cursor() + if doc_name == "all": + cur.execute( + "DELETE FROM langchain_pg_collection lpe WHERE lpe.name = %(index_name)s", + {"index_name": INDEX_NAME}, + ) + else: + cur.execute( + "DELETE FROM langchain_pg_embedding lpe WHERE lpe.uuid in (SELECT lpc.uuid\ + FROM langchain_pg_embedding lpc where lpc.cmetadata ->> 'doc_name' = %(doc_name)s)", + {"doc_name": doc_name}, + ) + + connection.commit() # commit the transaction + cur.close() + + return True + + except psycopg2.Error as e: + if logflag: + logger.info(f"Error deleting document from vectorstore: {e}") + return False + + except Exception as e: + if logflag: + logger.info(f"An unexpected error occurred: {e}") + return False + + def ingest_doc_to_pgvector(doc_path: DocPath): """Ingest document to PGVector.""" doc_path = doc_path.path - print(f"Parsing document {doc_path}.") + if logflag: + logger.info(f"Parsing document {doc_path}.") text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators() + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() ) + content = document_loader(doc_path) - chunks = text_splitter.split_text(content) - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - print("PG Connection", PG_CONNECTION_STRING) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(doc_path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + logger.info("PG Connection", PG_CONNECTION_STRING) + metadata = [dict({"doc_name": str(doc_path)})] # Create vectorstore if tei_embedding_endpoint: @@ -60,23 +132,18 @@ def ingest_doc_to_pgvector(doc_path: DocPath): batch_texts = batch_chunks _ = PGVector.from_texts( - texts=batch_texts, embedding=embedder, collection_name=INDEX_NAME, connection_string=PG_CONNECTION_STRING + texts=batch_texts, + embedding=embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") return True -def ingest_link_to_pgvector(link_list: List[str]): - data_collection = parse_html(link_list) - - texts = [] - metadatas = [] - for data, meta in data_collection: - doc_id = str(uuid.uuid4()) - metadata = {"source": meta, "identify_id": doc_id} - texts.append(data) - metadatas.append(metadata) - +async def ingest_link_to_pgvector(link_list: List[str]): # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service @@ -85,58 +152,166 @@ def ingest_link_to_pgvector(link_list: List[str]): # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - _ = PGVector.from_texts( - texts=texts, - embedding=embedder, - metadatas=metadatas, - collection_name=INDEX_NAME, - connection_string=PG_CONNECTION_STRING, + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() ) + for link in link_list: + texts = [] + content = parse_html([link])[0][0] + if logflag: + logger.info(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + doc_path = upload_folder + link + ".txt" + if logflag: + logger.info(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + metadata = [dict({"doc_name": str(doc_path)})] + + chunks = text_splitter.split_text(content) + + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = batch_chunks + + _ = PGVector.from_texts( + texts=batch_texts, + embedding=embedder, + metadatas=metadata, + collection_name=INDEX_NAME, + connection_string=PG_CONNECTION_STRING, + ) + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + return True + @register_microservice( name="opea_service@prepare_doc_pgvector", - service_type=ServiceType.DATAPREP, endpoint="/v1/dataprep", host="0.0.0.0", port=6007, ) -@traceable(run_type="tool") -@register_statistics(names=["opea_service@dataprep_pgvector"]) async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None) ): - print(f"files:{files}") - print(f"link_list:{link_list}") + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") if files and link_list: raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") if files: if not isinstance(files, list): files = [files] - upload_folder = "./uploaded_files/" + if not os.path.exists(upload_folder): Path(upload_folder).mkdir(parents=True, exist_ok=True) for file in files: save_path = upload_folder + file.filename await save_file_to_local_disk(save_path, file) + ingest_doc_to_pgvector(DocPath(path=save_path)) - print(f"Successfully saved file {save_path}") - return {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result if link_list: try: link_list = json.loads(link_list) # Parse JSON string to list if not isinstance(link_list, list): raise HTTPException(status_code=400, detail="link_list should be a list.") - ingest_link_to_pgvector(link_list) - print(f"Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} + await ingest_link_to_pgvector(link_list) + if logflag: + logger.info(f"Successfully saved link list {link_list}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result except json.JSONDecodeError: raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") +@register_microservice( + name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 +) +async def rag_get_file_structure(): + if logflag: + logger.info("[ dataprep - get file ] start to get file structure") + + if not Path(upload_folder).exists(): + if logflag: + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(upload_folder) + if logflag: + logger.info(file_content) + return file_content + + +@register_microservice( + name="opea_service@prepare_doc_pgvector", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 +) +async def delete_single_file(file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - folder path (e.g. /path/to/folder) + - "all": delete all files uploaded + """ + if file_path == "all": + if logflag: + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(upload_folder) + assert delete_embeddings(file_path) + if logflag: + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) + return {"status": True} + + delete_path = Path(upload_folder + "/" + encode_filename(file_path)) + doc_path = upload_folder + file_path + if logflag: + logger.info(f"[dataprep - del] delete_path: {delete_path}") + + # partially delete files/folders + if delete_path.exists(): + # delete file + if delete_path.is_file(): + try: + assert delete_embeddings(doc_path) + delete_path.unlink() + except Exception as e: + if logflag: + logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") + logger.info({"status": False}) + return {"status": False} + # delete folder + else: + if logflag: + logger.info("[dataprep - del] delete folder is not supported for now.") + logger.info({"status": False}) + return {"status": False} + if logflag: + logger.info({"status": True}) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + + if __name__ == "__main__": + create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_pgvector"].start() diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt index 47b03f429..b5b4f168b 100644 --- a/comps/dataprep/pgvector/langchain/requirements.txt +++ b/comps/dataprep/pgvector/langchain/requirements.txt @@ -7,7 +7,7 @@ fastapi huggingface_hub langchain langchain-community -langsmith +langchain-text-splitters markdown numpy opentelemetry-api @@ -16,11 +16,16 @@ opentelemetry-sdk pandas pgvector==0.2.5 Pillow -prometheus-fastapi-instrumentator==7.0.0 +prometheus-fastapi-instrumentator psycopg2-binary pymupdf +pyspark python-docx +python-multipart python-pptx sentence_transformers shortuuid +tiktoken +unstructured[all-docs]==0.11.5 uvicorn + diff --git a/comps/dataprep/pinecone/README.md b/comps/dataprep/pinecone/README.md index 3a9f6fc30..42e3d048a 100644 --- a/comps/dataprep/pinecone/README.md +++ b/comps/dataprep/pinecone/README.md @@ -1,18 +1,18 @@ # Dataprep Microservice with Pinecone -# ๐Ÿš€Start Microservice with Python +## ๐Ÿš€Start Microservice with Python -## Install Requirements +### Install Requirements ```bash pip install -r requirements.txt ``` -## Start Pinecone Server +### Start Pinecone Server Please refer to this [readme](../../../vectorstores/langchain/pinecone/README.md). -## Setup Environment Variables +### Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -21,7 +21,7 @@ export PINECONE_API_KEY=${PINECONE_API_KEY} export PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME} ``` -## Start Document Preparation Microservice for Pinecone with Python Script +### Start Document Preparation Microservice for Pinecone with Python Script Start document preparation microservice for Pinecone with below command. @@ -29,22 +29,22 @@ Start document preparation microservice for Pinecone with below command. python prepare_doc_pinecone.py ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ../../../../ docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile . ``` -## Run Docker with CLI +### Run Docker with CLI ```bash docker run -d --name="dataprep-pinecone-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest ``` -## Setup Environment Variables +### Setup Environment Variables ```bash export http_proxy=${your_http_proxy} @@ -53,14 +53,14 @@ export PINECONE_API_KEY=${PINECONE_API_KEY} export PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME} ``` -## Run Docker with Docker Compose +### Run Docker with Docker Compose ```bash cd comps/dataprep/pinecone/docker docker compose -f docker-compose-dataprep-pinecone.yaml up -d ``` -# Invoke Microservice +## Invoke Microservice Once document preparation microservice for Pinecone is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. diff --git a/comps/dataprep/pinecone/config.py b/comps/dataprep/pinecone/config.py index e6e62db6c..7a761a09c 100644 --- a/comps/dataprep/pinecone/config.py +++ b/comps/dataprep/pinecone/config.py @@ -4,13 +4,13 @@ import os # Embedding model -EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # Pinecone configuration PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = int(os.getenv("PINECONE_INDEX_NAME", "langchain-test")) +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/dataprep/pinecone/docker/Dockerfile b/comps/dataprep/pinecone/docker/Dockerfile index d19ff6ab4..4bb51956b 100644 --- a/comps/dataprep/pinecone/docker/Dockerfile +++ b/comps/dataprep/pinecone/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,11 +5,14 @@ FROM python:3.11-slim ENV LANG=C.UTF-8 +ARG ARCH="cpu" + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ + default-jre \ + libcairo2 \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -23,9 +25,18 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /home/user/comps/dataprep/pinecone/requirements.txt +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/dataprep/pinecone/requirements.txt + ENV PYTHONPATH=$PYTHONPATH:/home/user +USER root + +RUN mkdir -p /home/user/comps/dataprep/pinecone/uploaded_files && chown -R user /home/user/comps/dataprep/pinecone/uploaded_files + +USER user + WORKDIR /home/user/comps/dataprep/pinecone ENTRYPOINT ["python", "prepare_doc_pinecone.py"] - diff --git a/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml b/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml index 93636f3d0..0ee20389d 100644 --- a/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml +++ b/comps/dataprep/pinecone/docker/docker-compose-dataprep-pinecone.yaml @@ -1,19 +1,40 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 version: "3" services: + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate dataprep-pinecone: - image: opea/gen-ai-comps:dataprep-pinecone-xeon-server + image: opea/dataprep-pinecone:latest container_name: dataprep-pinecone-server ports: - - "6000:6000" + - "6007:6007" + - "6008:6008" + - "6009:6009" ipc: host environment: + no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} PINECONE_API_KEY: ${PINECONE_API_KEY} PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped networks: diff --git a/comps/dataprep/pinecone/prepare_doc_pinecone.py b/comps/dataprep/pinecone/prepare_doc_pinecone.py index 1a001a1fd..9bb5c35ff 100644 --- a/comps/dataprep/pinecone/prepare_doc_pinecone.py +++ b/comps/dataprep/pinecone/prepare_doc_pinecone.py @@ -1,40 +1,125 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os +import shutil +import uuid +from pathlib import Path +from typing import List, Optional, Union from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME +from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Pinecone +from langchain_pinecone import PineconeVectorStore +from langchain_text_splitters import HTMLHeaderTextSplitter +from pinecone import Pinecone, ServerlessSpec -from comps import DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.utils import document_loader, get_separators +from comps import CustomLogger, DocPath, opea_microservices, opea_telemetry, register_microservice +from comps.dataprep.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + get_tables_result, + parse_html, + remove_folder_with_ignore, + save_content_to_local_disk, +) -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +logger = CustomLogger("prepare_doc_pinecone") +logflag = os.getenv("LOGFLAG", False) +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") +upload_folder = "./uploaded_files/" -@register_microservice( - name="opea_service@prepare_doc_pinecone", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6000, - input_datatype=DocPath, - output_datatype=None, -) -@opea_telemetry -def ingest_documents(doc_path: DocPath): + +def check_index_existance(): + if logflag: + logger.info(f"[ check index existence ] checking {PINECONE_INDEX_NAME}") + pc = Pinecone(api_key=PINECONE_API_KEY) + existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] + if PINECONE_INDEX_NAME not in existing_indexes: + if logflag: + logger.info("[ check index existence ] index does not exist") + return None + else: + return True + + +def create_index(client): + if logflag: + logger.info(f"[ create index ] creating index {PINECONE_INDEX_NAME}") + try: + client.create_index( + name=PINECONE_INDEX_NAME, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-east-1"), + ) + if logflag: + logger.info(f"[ create index ] index {PINECONE_INDEX_NAME} successfully created") + except Exception as e: + if logflag: + logger.info(f"[ create index ] fail to create index {PINECONE_INDEX_NAME}: {e}") + return False + return True + + +def drop_index(index_name): + if logflag: + logger.info(f"[ drop index ] dropping index {index_name}") + pc = Pinecone(api_key=PINECONE_API_KEY) + try: + pc.delete_index(index_name) + if logflag: + logger.info(f"[ drop index ] index {index_name} deleted") + except Exception as e: + if logflag: + logger.info(f"[ drop index ] index {index_name} delete failed: {e}") + return False + return True + + +def ingest_data_to_pinecone(doc_path: DocPath): """Ingest document to Pinecone.""" - doc_path = doc_path.path - print(f"Parsing document {doc_path}.") + path = doc_path.path + if logflag: + logger.info(f"Parsing document {path}.") + + if path.endswith(".html"): + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + else: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), + ) + + content = document_loader(path) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators() - ) - content = document_loader(doc_path) - chunks = text_splitter.split_text(content) + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + if doc_path.process_table and path.endswith(".pdf"): + table_chunks = get_tables_result(path, doc_path.table_strategy) + chunks = chunks + table_chunks + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service @@ -43,20 +128,174 @@ def ingest_documents(doc_path: DocPath): # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + pc = Pinecone(api_key=PINECONE_API_KEY) + + # Checking Index existence + if not check_index_existance(): + # Creating the index + create_index(pc) + if logflag: + logger.info("Successfully created the index", PINECONE_INDEX_NAME) + # Batch size batch_size = 32 num_chunks = len(chunks) + file_ids = [] + for i in range(0, num_chunks, batch_size): batch_chunks = chunks[i : i + batch_size] batch_texts = batch_chunks - _ = Pinecone.from_texts( + vectorstore = PineconeVectorStore.from_texts( texts=batch_texts, embedding=embedder, index_name=PINECONE_INDEX_NAME, ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + # store file_ids into index file-keys + pc = Pinecone(api_key=PINECONE_API_KEY) + + +async def ingest_link_to_pinecone(link_list: List[str]): + # Create embedding obj + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + pc = Pinecone(api_key=PINECONE_API_KEY) + + # Checking Index existence + if not check_index_existance(): + # Creating the index + create_index(pc) + if logflag: + logger.info("Successfully created the index", PINECONE_INDEX_NAME) + + # save link contents and doc_ids one by one + for link in link_list: + content = parse_html([link])[0][0] + if logflag: + logger.info(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + if logflag: + logger.info(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(save_path, content) + + vectorstore = PineconeVectorStore.from_texts( + texts=content, + embedding=embedder, + index_name=PINECONE_INDEX_NAME, + ) + + return True + + +@register_microservice(name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) +async def ingest_documents( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_pinecone( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + await ingest_link_to_pinecone(link_list) + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(f"Successfully saved link list {link_list}") + logger.info(result) + return result + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + +@register_microservice( + name="opea_service@prepare_doc_pinecone_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 +) +async def rag_get_file_structure(): + if logflag: + logger.info("[ dataprep - get file ] start to get file structure") + + if not Path(upload_folder).exists(): + if logflag: + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(upload_folder) + if logflag: + logger.info(file_content) + return file_content + + +@register_microservice( + name="opea_service@prepare_doc_pinecone_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 +) +async def delete_all(file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - "all": delete all files uploaded + """ + # delete all uploaded files + if file_path == "all": + if logflag: + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(upload_folder) + assert drop_index(index_name=PINECONE_INDEX_NAME) + if logflag: + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) + return {"status": True} + else: + raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet") if __name__ == "__main__": + create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_pinecone"].start() + opea_microservices["opea_service@prepare_doc_pinecone_file"].start() + opea_microservices["opea_service@prepare_doc_pinecone_del"].start() diff --git a/comps/dataprep/pinecone/requirements.txt b/comps/dataprep/pinecone/requirements.txt index 9e6a21681..06ab12ecd 100644 --- a/comps/dataprep/pinecone/requirements.txt +++ b/comps/dataprep/pinecone/requirements.txt @@ -1,12 +1,16 @@ beautifulsoup4 +cairosvg docarray[full] +docx2txt easyocr fastapi huggingface_hub langchain langchain-community +langchain-openai langchain-pinecone -langsmith +langchain-text-splitters +markdown numpy opentelemetry-api opentelemetry-exporter-otlp @@ -14,8 +18,13 @@ opentelemetry-sdk pandas Pillow pinecone-client +prometheus-fastapi-instrumentator pymupdf +pyspark +python-bidi==0.4.2 python-docx +python-pptx sentence_transformers shortuuid +unstructured[all-docs]==0.11.5 uvicorn diff --git a/comps/dataprep/qdrant/README.md b/comps/dataprep/qdrant/README.md index 24f58fc09..4b52eaeeb 100644 --- a/comps/dataprep/qdrant/README.md +++ b/comps/dataprep/qdrant/README.md @@ -1,8 +1,8 @@ # Dataprep Microservice with Qdrant -# ๐Ÿš€Start Microservice with Python +## ๐Ÿš€Start Microservice with Python -## Install Requirements +### Install Requirements ```bash pip install -r requirements.txt @@ -11,11 +11,11 @@ apt-get install libtesseract-dev -y apt-get install poppler-utils -y ``` -## Start Qdrant Server +### Start Qdrant Server Please refer to this [readme](../../vectorstores/langchain/qdrant/README.md). -## Setup Environment Variables +### Setup Environment Variables ```bash export no_proxy=${your_no_proxy} @@ -27,7 +27,7 @@ export COLLECTION_NAME=${your_collection_name} export PYTHONPATH=${path_to_comps} ``` -## Start Document Preparation Microservice for Qdrant with Python Script +### Start Document Preparation Microservice for Qdrant with Python Script Start document preparation microservice for Qdrant with below command. @@ -35,50 +35,58 @@ Start document preparation microservice for Qdrant with below command. python prepare_doc_qdrant.py ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ../../../../ docker build -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile . ``` -## Run Docker with CLI +### Run Docker with CLI ```bash -docker run -d --name="dataprep-qdrant-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest +docker run -d --name="dataprep-qdrant-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest ``` -## Setup Environment Variables +### Setup Environment Variables ```bash export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export QDRANT=${host_ip} +export QDRANT_HOST=${host_ip} export QDRANT_PORT=6333 export COLLECTION_NAME=${your_collection_name} ``` -## Run Docker with Docker Compose +### Run Docker with Docker Compose ```bash cd comps/dataprep/qdrant/docker docker compose -f docker-compose-dataprep-qdrant.yaml up -d ``` -# Invoke Microservice +## Invoke Microservice Once document preparation microservice for Qdrant is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + http://localhost:6007/v1/dataprep ``` You can specify chunk_size and chunk_size by the following commands. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","chunk_size":1500,"chunk_overlap":100}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./file1.txt" \ + -F "chunk_size=1500" \ + -F "chunk_overlap=100" \ + http://localhost:6007/v1/dataprep ``` We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast". @@ -86,5 +94,10 @@ We support table extraction from pdf documents. You can specify process_table an Note: If you specify "table_strategy=llm", You should first start TGI Service, please refer to 1.2.1, 1.3.1 in https://github.com/opea-project/GenAIComps/tree/main/comps/llms/README.md, and then `export TGI_LLM_ENDPOINT="http://${your_ip}:8008"`. ```bash -curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","process_table":true,"table_strategy":"hq"}' http://localhost:6000/v1/dataprep +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./your_file.pdf" \ + -F "process_table=true" \ + -F "table_strategy=hq" \ + http://localhost:6007/v1/dataprep ``` diff --git a/comps/dataprep/qdrant/config.py b/comps/dataprep/qdrant/config.py index 2b30a3682..7cf37f404 100644 --- a/comps/dataprep/qdrant/config.py +++ b/comps/dataprep/qdrant/config.py @@ -7,7 +7,7 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") # Qdrant configuration -QDRANT_HOST = os.getenv("QDRANT", "localhost") +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant") diff --git a/comps/dataprep/qdrant/docker/Dockerfile b/comps/dataprep/qdrant/docker/Dockerfile index bdf0315e2..f36b80bc9 100644 --- a/comps/dataprep/qdrant/docker/Dockerfile +++ b/comps/dataprep/qdrant/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,9 +9,9 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ + default-jre \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -22,13 +21,18 @@ USER user COPY comps /home/user/comps -RUN pip install --no-cache-dir --upgrade pip && \ - if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/dataprep/qdrant/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user +USER root + +RUN mkdir -p /home/user/comps/dataprep/qdrant/uploaded_files && chown -R user /home/user/comps/dataprep/qdrant/uploaded_files + +USER user + WORKDIR /home/user/comps/dataprep/qdrant ENTRYPOINT ["python", "prepare_doc_qdrant.py"] - diff --git a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml index e86dc2c4e..aaf2a17dd 100644 --- a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml +++ b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml @@ -9,19 +9,36 @@ services: ports: - "6333:6333" - "6334:6334" + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate dataprep-qdrant: image: opea/gen-ai-comps:dataprep-qdrant-xeon-server container_name: dataprep-qdrant-server + depends_on: + - qdrant-vector-db + - tei-embedding-service ports: - - "6000:6000" + - "6007:6007" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - QDRANT: ${QDRANT} + QDRANT_HOST: ${QDRANT_HOST} QDRANT_PORT: ${QDRANT_PORT} COLLECTION_NAME: ${COLLECTION_NAME} + TEI_ENDPOINT: ${TEI_ENDPOINT} restart: unless-stopped networks: diff --git a/comps/dataprep/qdrant/prepare_doc_qdrant.py b/comps/dataprep/qdrant/prepare_doc_qdrant.py index 422854eec..a97987817 100644 --- a/comps/dataprep/qdrant/prepare_doc_qdrant.py +++ b/comps/dataprep/qdrant/prepare_doc_qdrant.py @@ -1,33 +1,39 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os +from typing import List, Optional, Union -from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT +from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT +from fastapi import File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.vectorstores import Qdrant +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter -from comps import DocPath, opea_microservices, opea_telemetry, register_microservice -from comps.dataprep.utils import document_loader, get_separators, get_tables_result +from comps import CustomLogger, DocPath, opea_microservices, register_microservice +from comps.dataprep.utils import ( + document_loader, + encode_filename, + get_separators, + get_tables_result, + parse_html, + save_content_to_local_disk, +) -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +logger = CustomLogger("prepare_doc_qdrant") +logflag = os.getenv("LOGFLAG", False) +upload_folder = "./uploaded_files/" -@register_microservice( - name="opea_service@prepare_doc_qdrant", - endpoint="/v1/dataprep", - host="0.0.0.0", - port=6000, - input_datatype=DocPath, - output_datatype=None, -) -@opea_telemetry -def ingest_documents(doc_path: DocPath): + +def ingest_data_to_qdrant(doc_path: DocPath): """Ingest document to Qdrant.""" path = doc_path.path - print(f"Parsing document {path}.") + if logflag: + logger.info(f"Parsing document {path}.") if path.endswith(".html"): headers_to_split_on = [ @@ -38,23 +44,39 @@ def ingest_documents(doc_path: DocPath): text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) else: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators() + chunk_size=doc_path.chunk_size, + chunk_overlap=doc_path.chunk_overlap, + add_start_index=True, + separators=get_separators(), ) content = document_loader(path) - chunks = text_splitter.split_text(content) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + if doc_path.process_table and path.endswith(".pdf"): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + if logflag: + logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.") + # Create vectorstore - if tei_embedding_endpoint: + if TEI_EMBEDDING_ENDPOINT: # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + if logflag: + logger.info("embedder created.") + # Batch size batch_size = 32 num_chunks = len(chunks) @@ -69,7 +91,88 @@ def ingest_documents(doc_path: DocPath): host=QDRANT_HOST, port=QDRANT_PORT, ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + if logflag: + logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + return True + + +@register_microservice( + name="opea_service@prepare_doc_qdrant", + endpoint="/v1/dataprep", + host="0.0.0.0", + port=6007, + input_datatype=DocPath, + output_datatype=None, +) +async def ingest_documents( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), + link_list: Optional[str] = Form(None), + chunk_size: int = Form(1500), + chunk_overlap: int = Form(100), + process_table: bool = Form(False), + table_strategy: str = Form("fast"), +): + if logflag: + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + + if files: + if not isinstance(files, list): + files = [files] + uploaded_files = [] + for file in files: + encode_file = encode_filename(file.filename) + save_path = upload_folder + encode_file + await save_content_to_local_disk(save_path, file) + ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + uploaded_files.append(save_path) + if logflag: + logger.info(f"Successfully saved file {save_path}") + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + if link_list: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + try: + await save_content_to_local_disk(save_path, content) + ingest_data_to_qdrant( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, + ) + ) + except json.JSONDecodeError: + raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.") + + if logflag: + logger.info(f"Successfully saved link {link}") + + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result + + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") if __name__ == "__main__": diff --git a/comps/dataprep/qdrant/requirements.txt b/comps/dataprep/qdrant/requirements.txt index eb92e628f..e5bcf80b3 100644 --- a/comps/dataprep/qdrant/requirements.txt +++ b/comps/dataprep/qdrant/requirements.txt @@ -8,6 +8,7 @@ huggingface_hub langchain langchain-community langchain-text-splitters +langchain_huggingface markdown numpy opentelemetry-api diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/redis/README.md index 8d1d29a97..76361a236 100644 --- a/comps/dataprep/redis/README.md +++ b/comps/dataprep/redis/README.md @@ -1,12 +1,14 @@ # Dataprep Microservice with Redis -For dataprep microservice, we provide two frameworks: `Langchain` and `LlamaIndex`. We also provide `Langchain_ray` which uses ray to parallel the data prep for multi-file performance improvement(observed 5x - 15x speedup by processing 1000 files/links.). +We have provided dataprep microservice for multimodal data input (e.g., text and image) [here](multimodal_langchain/README.md). + +For dataprep microservice for text input, we provide here two frameworks: `Langchain` and `LlamaIndex`. We also provide `Langchain_ray` which uses ray to parallel the data prep for multi-file performance improvement(observed 5x - 15x speedup by processing 1000 files/links.). We organized these two folders in the same way, so you can use either framework for dataprep microservice with the following constructions. -# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ +## ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ -## 1.1 Install Requirements +### 1.1 Install Requirements - option 1: Install Single-process version (for 1-10 files processing) @@ -29,22 +31,19 @@ pip install -r requirements.txt cd langchain_ray; pip install -r requirements_ray.txt ``` -## 1.2 Start Redis Stack Server +### 1.2 Start Redis Stack Server Please refer to this [readme](../../vectorstores/langchain/redis/README.md). -## 1.3 Setup Environment Variables +### 1.3 Setup Environment Variables ```bash export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep" export PYTHONPATH=${path_to_comps} ``` -## 1.4 Start Embedding Service +### 1.4 Start Embedding Service First, you need to start a TEI service. @@ -70,7 +69,7 @@ After checking that it works, set up environment variables. export TEI_ENDPOINT="http://localhost:$your_port" ``` -## 1.4 Start Document Preparation Microservice for Redis with Python Script +### 1.4 Start Document Preparation Microservice for Redis with Python Script Start document preparation microservice for Redis with below command. @@ -86,27 +85,23 @@ python prepare_doc_redis.py python prepare_doc_redis_on_ray.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Start Redis Stack Server +### 2.1 Start Redis Stack Server Please refer to this [readme](../../vectorstores/langchain/redis/README.md). -## 2.2 Setup Environment Variables +### 2.2 Setup Environment Variables ```bash export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export TEI_ENDPOINT="http://${your_ip}:6006" -export REDIS_HOST=${your_ip} -export REDIS_PORT=6379 export REDIS_URL="redis://${your_ip}:6379" export INDEX_NAME=${your_index_name} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/dataprep" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} ``` -## 2.3 Build Docker Image +### 2.3 Build Docker Image - Build docker image with langchain @@ -131,21 +126,21 @@ cd ../../../../ docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile . ``` -## 2.4 Run Docker with CLI (Option A) +### 2.4 Run Docker with CLI (Option A) - option 1: Start single-process version (for 1-10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest ``` - option 2: Start multi-process version (for >10 files processing) ```bash -docker run -d --name="dataprep-redis-server" -p 6007:6007 -p 6008:6008 -p 6009:6009 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest ``` -## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) +### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) ```bash # for langchain @@ -155,15 +150,15 @@ cd comps/dataprep/redis/llama_index/docker docker compose -f docker-compose-dataprep-redis.yaml up -d ``` -# ๐Ÿš€3. Status Microservice +## ๐Ÿš€3. Status Microservice ```bash docker container logs -f dataprep-redis-server ``` -# ๐Ÿš€4. Consume Microservice +## ๐Ÿš€4. Consume Microservice -## 4.1 Consume Upload API +### 4.1 Consume Upload API Once document preparation microservice for Redis is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database. @@ -243,14 +238,14 @@ except requests.exceptions.RequestException as e: print("An error occurred:", e) ``` -## 4.2 Consume get_file API +### 4.2 Consume get_file API To get uploaded file structures, use the following command: ```bash curl -X POST \ -H "Content-Type: application/json" \ - http://localhost:6008/v1/dataprep/get_file + http://localhost:6007/v1/dataprep/get_file ``` Then you will get the response JSON like this: @@ -272,7 +267,7 @@ Then you will get the response JSON like this: ] ``` -## 4.3 Consume delete_file API +### 4.3 Consume delete_file API To delete uploaded file/link, use the following command. @@ -283,17 +278,17 @@ The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API. curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "https://www.ces.tech/.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete file curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "uploaded_file_1.txt"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file # delete all files and links curl -X POST \ -H "Content-Type: application/json" \ -d '{"file_path": "all"}' \ - http://10.165.57.68:6009/v1/dataprep/delete_file + http://localhost:6007/v1/dataprep/delete_file ``` diff --git a/comps/dataprep/redis/langchain/config.py b/comps/dataprep/redis/langchain/config.py index b441f80d8..75715912c 100644 --- a/comps/dataprep/redis/langchain/config.py +++ b/comps/dataprep/redis/langchain/config.py @@ -62,3 +62,5 @@ def format_redis_conn_from_env(): KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys") TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) + +SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10)) diff --git a/comps/dataprep/redis/langchain/docker/Dockerfile b/comps/dataprep/redis/langchain/docker/Dockerfile index f7fcff5a7..61620b88f 100644 --- a/comps/dataprep/redis/langchain/docker/Dockerfile +++ b/comps/dataprep/redis/langchain/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,10 +9,12 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ + default-jre \ libgl1-mesa-glx \ libjemalloc-dev \ - default-jre \ - vim + libreoffice \ + poppler-utils \ + tesseract-ocr RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -38,4 +39,3 @@ USER user WORKDIR /home/user/comps/dataprep/redis/langchain ENTRYPOINT ["python", "prepare_doc_redis.py"] - diff --git a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml index 74e2bb78f..0ef8a1f1a 100644 --- a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml @@ -27,8 +27,6 @@ services: container_name: dataprep-redis-server ports: - "6007:6007" - - "6008:6008" - - "6009:6009" ipc: host environment: no_proxy: ${no_proxy} @@ -39,7 +37,7 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: ${TEI_ENDPOINT} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped networks: diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py index aff3da605..0082ddcd6 100644 --- a/comps/dataprep/redis/langchain/prepare_doc_redis.py +++ b/comps/dataprep/redis/langchain/prepare_doc_redis.py @@ -10,22 +10,22 @@ # from pyspark import SparkConf, SparkContext import redis -from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL +from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.vectorstores import Redis +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter -from langsmith import traceable from redis.commands.search.field import TextField from redis.commands.search.indexDefinition import IndexDefinition, IndexType -from comps import DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, opea_microservices, register_microservice from comps.dataprep.utils import ( create_upload_folder, document_loader, encode_filename, - get_file_structure, + format_search_results, get_separators, get_tables_result, parse_html, @@ -33,63 +33,81 @@ save_content_to_local_disk, ) +logger = CustomLogger("prepare_doc_redis") +logflag = os.getenv("LOGFLAG", False) + tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") upload_folder = "./uploaded_files/" redis_pool = redis.ConnectionPool.from_url(REDIS_URL) def check_index_existance(client): - print(f"[ check index existence ] checking {client}") + if logflag: + logger.info(f"[ check index existence ] checking {client}") try: results = client.search("*") - print(f"[ check index existence ] index of client exists: {client}") + if logflag: + logger.info(f"[ check index existence ] index of client exists: {client}") return results except Exception as e: - print(f"[ check index existence ] index does not exist: {e}") + if logflag: + logger.info(f"[ check index existence ] index does not exist: {e}") return None def create_index(client, index_name: str = KEY_INDEX_NAME): - print(f"[ create index ] creating index {index_name}") + if logflag: + logger.info(f"[ create index ] creating index {index_name}") try: definition = IndexDefinition(index_type=IndexType.HASH, prefix=["file:"]) client.create_index((TextField("file_name"), TextField("key_ids")), definition=definition) - print(f"[ create index ] index {index_name} successfully created") + if logflag: + logger.info(f"[ create index ] index {index_name} successfully created") except Exception as e: - print(f"[ create index ] fail to create index {index_name}: {e}") + if logflag: + logger.info(f"[ create index ] fail to create index {index_name}: {e}") return False return True def store_by_id(client, key, value): - print(f"[ store by id ] storing ids of {key}") + if logflag: + logger.info(f"[ store by id ] storing ids of {key}") try: client.add_document(doc_id="file:" + key, file_name=key, key_ids=value) - print(f"[ store by id ] store document success. id: file:{key}") + if logflag: + logger.info(f"[ store by id ] store document success. id: file:{key}") except Exception as e: - print(f"[ store by id ] fail to store document file:{key}: {e}") + if logflag: + logger.info(f"[ store by id ] fail to store document file:{key}: {e}") return False return True def search_by_id(client, doc_id): - print(f"[ search by id ] searching docs of {doc_id}") + if logflag: + logger.info(f"[ search by id ] searching docs of {doc_id}") try: results = client.load_document(doc_id) - print(f"[ search by id ] search success of {doc_id}") + if logflag: + logger.info(f"[ search by id ] search success of {doc_id}: {results}") return results except Exception as e: - print(f"[ search by id ] fail to search docs of {doc_id}: {e}") + if logflag: + logger.info(f"[ search by id ] fail to search docs of {doc_id}: {e}") return None def drop_index(index_name, redis_url=REDIS_URL): - print(f"[ drop index ] dropping index {index_name}") + if logflag: + logger.info(f"[ drop index ] dropping index {index_name}") try: assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) - print(f"[ drop index ] index {index_name} deleted") + if logflag: + logger.info(f"[ drop index ] index {index_name} deleted") except Exception as e: - print(f"[ drop index ] index {index_name} delete failed: {e}") + if logflag: + logger.info(f"[ drop index ] index {index_name} delete failed: {e}") return False return True @@ -97,19 +115,22 @@ def drop_index(index_name, redis_url=REDIS_URL): def delete_by_id(client, id): try: assert client.delete_document(id) - print(f"[ delete by id ] delete id success: {id}") + if logflag: + logger.info(f"[ delete by id ] delete id success: {id}") except Exception as e: - print(f"[ delete by id ] fail to delete ids {id}: {e}") + if logflag: + logger.info(f"[ delete by id ] fail to delete ids {id}: {e}") return False return True def ingest_chunks_to_redis(file_name: str, chunks: List): - print(f"[ ingest chunks ] file name: {file_name}") + if logflag: + logger.info(f"[ ingest chunks ] file name: {file_name}") # Create vectorstore if tei_embedding_endpoint: # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + embedder = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) else: # create embeddings using local embedding model embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) @@ -120,7 +141,8 @@ def ingest_chunks_to_redis(file_name: str, chunks: List): file_ids = [] for i in range(0, num_chunks, batch_size): - print(f"[ ingest chunks ] Current batch: {i}") + if logflag: + logger.info(f"[ ingest chunks ] Current batch: {i}") batch_chunks = chunks[i : i + batch_size] batch_texts = batch_chunks @@ -130,24 +152,32 @@ def ingest_chunks_to_redis(file_name: str, chunks: List): index_name=INDEX_NAME, redis_url=REDIS_URL, ) - print(f"[ ingest chunks ] keys: {keys}") + if logflag: + logger.info(f"[ ingest chunks ] keys: {keys}") file_ids.extend(keys) - print(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + if logflag: + logger.info(f"[ ingest chunks ] Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") # store file_ids into index file-keys r = redis.Redis(connection_pool=redis_pool) client = r.ft(KEY_INDEX_NAME) if not check_index_existance(client): assert create_index(client) - assert store_by_id(client, key=file_name, value="#".join(file_ids)) + try: + assert store_by_id(client, key=file_name, value="#".join(file_ids)) + except Exception as e: + if logflag: + logger.info(f"[ ingest chunks ] {e}. Fail to store chunks of file {file_name}.") + raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.") return True def ingest_data_to_redis(doc_path: DocPath): """Ingest document to Redis.""" path = doc_path.path - print(f"Parsing document {path}.") + if logflag: + logger.info(f"[ ingest data ] Parsing document {path}.") if path.endswith(".html"): headers_to_split_on = [ @@ -165,19 +195,29 @@ def ingest_data_to_redis(doc_path: DocPath): ) content = document_loader(path) + if logflag: + logger.info("[ ingest data ] file content loaded") + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(path) - chunks = text_splitter.split_text(content) + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + ### Specially processing for the table content in PDFs if doc_path.process_table and path.endswith(".pdf"): table_chunks = get_tables_result(path, doc_path.table_strategy) chunks = chunks + table_chunks - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + if logflag: + logger.info(f"[ ingest data ] Done preprocessing. Created {len(chunks)} chunks of the given file.") file_name = doc_path.path.split("/")[-1] return ingest_chunks_to_redis(file_name, chunks) @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -@traceable(run_type="tool") async def ingest_documents( files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), link_list: Optional[str] = Form(None), @@ -186,15 +226,37 @@ async def ingest_documents( process_table: bool = Form(False), table_strategy: str = Form("fast"), ): - print(f"files:{files}") - print(f"link_list:{link_list}") + if logflag: + logger.info(f"[ upload ] files:{files}") + logger.info(f"[ upload ] link_list:{link_list}") + + r = redis.Redis(connection_pool=redis_pool) + client = r.ft(KEY_INDEX_NAME) if files: if not isinstance(files, list): files = [files] uploaded_files = [] + for file in files: encode_file = encode_filename(file.filename) + doc_id = "file:" + encode_file + if logflag: + logger.info(f"[ upload ] processing file {doc_id}") + + # check whether the file already exists + key_ids = None + try: + key_ids = search_by_id(client, doc_id).key_ids + if logflag: + logger.info(f"[ upload ] File {file.filename} already exists.") + except Exception as e: + logger.info(f"[ upload ] File {file.filename} does not exist.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded file {file.filename} already exists. Please change file name." + ) + save_path = upload_folder + encode_file await save_content_to_local_disk(save_path, file) ingest_data_to_redis( @@ -207,7 +269,8 @@ async def ingest_documents( ) ) uploaded_files.append(save_path) - print(f"Successfully saved file {save_path}") + if logflag: + logger.info(f"[ upload ] Successfully saved file {save_path}") # def process_files_wrapper(files): # if not isinstance(files, list): @@ -230,109 +293,210 @@ async def ingest_documents( # except: # # Stop the SparkContext # sc.stop() - - return {"status": 200, "message": "Data preparation succeeded"} + result = {"status": 200, "message": "Data preparation succeeded"} + if logflag: + logger.info(result) + return result if link_list: - try: - link_list = json.loads(link_list) # Parse JSON string to list - if not isinstance(link_list, list): - raise HTTPException(status_code=400, detail="link_list should be a list.") - for link in link_list: - encoded_link = encode_filename(link) - save_path = upload_folder + encoded_link + ".txt" - content = parse_html([link])[0][0] - await save_content_to_local_disk(save_path, content) - ingest_data_to_redis( - DocPath( - path=save_path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - process_table=process_table, - table_strategy=table_strategy, - ) + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.") + for link in link_list: + encoded_link = encode_filename(link) + doc_id = "file:" + encoded_link + ".txt" + if logflag: + logger.info(f"[ upload ] processing link {doc_id}") + + # check whether the link file already exists + key_ids = None + try: + key_ids = search_by_id(client, doc_id).key_ids + if logflag: + logger.info(f"[ upload ] Link {link} already exists.") + except Exception as e: + logger.info(f"[ upload ] Link {link} does not exist. Keep storing.") + if key_ids: + raise HTTPException( + status_code=400, detail=f"Uploaded link {link} already exists. Please change another link." + ) + + save_path = upload_folder + encoded_link + ".txt" + content = parse_html([link])[0][0] + await save_content_to_local_disk(save_path, content) + ingest_data_to_redis( + DocPath( + path=save_path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + process_table=process_table, + table_strategy=table_strategy, ) - print(f"Successfully saved link list {link_list}") - return {"status": 200, "message": "Data preparation succeeded"} - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + ) + if logflag: + logger.info(f"[ upload ] Successfully saved link list {link_list}") + return {"status": 200, "message": "Data preparation succeeded"} raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") @register_microservice( - name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def rag_get_file_structure(): - print("[ dataprep - get file ] start to get file structure") + if logflag: + logger.info("[ get ] start to get file structure") - if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") - return [] - - file_content = get_file_structure(upload_folder) - return file_content + # define redis client + r = redis.Redis(connection_pool=redis_pool) + offset = 0 + file_list = [] + + # check index existence + res = check_index_existance(r.ft(KEY_INDEX_NAME)) + if not res: + if logflag: + logger.info(f"[ get ] index {KEY_INDEX_NAME} does not exist") + return file_list + + while True: + response = r.execute_command("FT.SEARCH", KEY_INDEX_NAME, "*", "LIMIT", offset, offset + SEARCH_BATCH_SIZE) + # no doc retrieved + if len(response) < 2: + break + file_list = format_search_results(response, file_list) + offset += SEARCH_BATCH_SIZE + # last batch + if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE: + break + if logflag: + logger.info(f"[get] final file_list: {file_list}") + return file_list @register_microservice( - name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 + name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. `file_path`: - specific file path (e.g. /path/to/file.txt) - - folder path (e.g. /path/to/folder) - "all": delete all files uploaded """ + + # define redis client + r = redis.Redis(connection_pool=redis_pool) + client = r.ft(KEY_INDEX_NAME) + client2 = r.ft(INDEX_NAME) + # delete all uploaded files if file_path == "all": - print("[dataprep - del] delete all files") - remove_folder_with_ignore(upload_folder) - assert drop_index(index_name=INDEX_NAME) - assert drop_index(index_name=KEY_INDEX_NAME) - print("[dataprep - del] successfully delete all files.") + if logflag: + logger.info("[ delete ] delete all files") + + # drop index KEY_INDEX_NAME + if check_index_existance(client): + try: + assert drop_index(index_name=KEY_INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to drop index {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.") + else: + logger.info(f"[ delete ] Index {KEY_INDEX_NAME} does not exits.") + + # drop index INDEX_NAME + if check_index_existance(client2): + try: + assert drop_index(index_name=INDEX_NAME) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to drop index {INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.") + else: + if logflag: + logger.info(f"[ delete ] Index {INDEX_NAME} does not exits.") + + # delete files on local disk + try: + remove_folder_with_ignore(upload_folder) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. Fail to delete {upload_folder}.") + raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.") + + if logflag: + logger.info("[ delete ] successfully delete all files.") create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) return {"status": True} delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - print(f"[dataprep - del] delete_path: {delete_path}") + if logflag: + logger.info(f"[ delete ] delete_path: {delete_path}") - # partially delete files/folders + # partially delete files if delete_path.exists(): - r = redis.Redis(connection_pool=redis_pool) - client = r.ft(KEY_INDEX_NAME) - client2 = r.ft(INDEX_NAME) doc_id = "file:" + encode_filename(file_path) - objs = search_by_id(client, doc_id).key_ids - file_ids = objs.split("#") + + # determine whether this file exists in db KEY_INDEX_NAME + try: + key_ids = search_by_id(client, doc_id).key_ids + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}, File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {KEY_INDEX_NAME}. Please check file_path." + ) + file_ids = key_ids.split("#") # delete file if delete_path.is_file(): + # delete file keys id in db KEY_INDEX_NAME try: - for file_id in file_ids: - assert delete_by_id(client2, file_id) assert delete_by_id(client, doc_id) - delete_path.unlink() except Exception as e: - print(f"[dataprep - del] fail to delete file {delete_path}: {e}") - return {"status": False} + if logflag: + logger.info(f"[ delete ] {e}. File {file_path} delete failed for db {KEY_INDEX_NAME}.") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.") + + # delete file content in db INDEX_NAME + for file_id in file_ids: + # determine whether this file exists in db INDEX_NAME + try: + content = search_by_id(client2, file_id).content + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. File {file_path} does not exists.") + raise HTTPException( + status_code=404, detail=f"File not found in db {INDEX_NAME}. Please check file_path." + ) + + # delete file content + try: + assert delete_by_id(client2, file_id) + except Exception as e: + if logflag: + logger.info(f"[ delete ] {e}. File {file_path} delete failed for db {INDEX_NAME}") + raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.") + + # delete file on local disk + delete_path.unlink() + if logflag: + logger.info({"status": True}) + return {"status": True} + # delete folder else: - try: - shutil.rmtree(delete_path) - except Exception as e: - print(f"[dataprep - del] fail to delete folder {delete_path}: {e}") - return {"status": False} - return {"status": True} + if logflag: + logger.info(f"[ delete ] Delete folder {file_path} is not supported for now.") + raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.") else: - raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + raise HTTPException(status_code=404, detail=f"File {file_path} not found. Please check file_path.") if __name__ == "__main__": create_upload_folder(upload_folder) opea_microservices["opea_service@prepare_doc_redis"].start() - opea_microservices["opea_service@prepare_doc_redis_file"].start() - opea_microservices["opea_service@prepare_doc_redis_del"].start() diff --git a/comps/dataprep/redis/langchain/requirements.txt b/comps/dataprep/redis/langchain/requirements.txt index 12d389513..284b9379b 100644 --- a/comps/dataprep/redis/langchain/requirements.txt +++ b/comps/dataprep/redis/langchain/requirements.txt @@ -5,10 +5,10 @@ docx2txt easyocr fastapi huggingface_hub -langchain +langchain==0.2.12 langchain-community langchain-text-splitters -langsmith +langchain_huggingface markdown numpy opentelemetry-api diff --git a/comps/dataprep/redis/langchain_ray/docker/Dockerfile b/comps/dataprep/redis/langchain_ray/docker/Dockerfile index e5c27cb34..3f6b10b61 100644 --- a/comps/dataprep/redis/langchain_ray/docker/Dockerfile +++ b/comps/dataprep/redis/langchain_ray/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,10 +9,11 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ + libcairo2 \ libgl1-mesa-glx \ libjemalloc-dev \ - vim \ - libcairo2 + poppler-utils \ + tesseract-ocr RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -39,4 +39,3 @@ USER user WORKDIR /home/user/comps/dataprep/redis/langchain_ray ENTRYPOINT ["python", "prepare_doc_redis_on_ray.py"] - diff --git a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py index c55165061..d5ec731ba 100644 --- a/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py +++ b/comps/dataprep/redis/langchain_ray/prepare_doc_redis_on_ray.py @@ -26,7 +26,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis -from langsmith import traceable cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../../") @@ -41,7 +40,7 @@ from ray.data.datasource import FileBasedDatasource from tqdm import tqdm -from comps import DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, opea_microservices, register_microservice from comps.dataprep.utils import ( Timer, create_upload_folder, @@ -55,6 +54,9 @@ timeout, ) +logger = CustomLogger("prepare_doc_redis") +logflag = os.getenv("LOGFLAG", False) + tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") debug = False upload_folder = "./uploaded_files/" @@ -74,7 +76,8 @@ def prepare_env(enable_ray=False, pip_requirements=None): def generate_log_name(file_list): file_set = f"{sorted(file_list)}" - # print(f"file_set: {file_set}") + # if logflag: + # logger.info(f"file_set: {file_set}") md5_str = hashlib.md5(file_set.encode(), usedforsecurity=False).hexdigest() return f"status/status_{md5_str}.log" @@ -174,7 +177,12 @@ def data_to_redis(data): text_splitter = RecursiveCharacterTextSplitter( chunk_size=1500, chunk_overlap=100, add_start_index=True, separators=get_separators(), is_separator_regex=False ) - chunks = text_splitter.split_text(data) + if isinstance(data, list): + chunks = data + elif isinstance(data, str): + chunks = text_splitter.split_text(data) + else: + raise TypeError("The content must be either a list or a string.") # Create vectorstore if tei_embedding_endpoint: @@ -197,7 +205,8 @@ def data_to_redis(data): index_name=INDEX_NAME, redis_url=REDIS_URL, ) - # print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + # if logflag: + # logger.info(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") return num_chunks @@ -258,8 +267,8 @@ def _parse_html(link): for link in tqdm(link_list, total=len(link_list)): with Timer(f"read document {link}."): data = _parse_html(link) - if debug: - print("content is: ", data) + if logflag: + logger.info("content is: ", data) with Timer(f"ingest document {link} to Redis."): data_to_redis(data) return True @@ -267,6 +276,9 @@ def _parse_html(link): @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) async def ingest_documents(files: List[UploadFile] = File(None), link_list: str = Form(None)): + if logflag: + logger.info(files) + logger.info(link_list) if files and link_list: raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") @@ -293,9 +305,13 @@ async def ingest_documents(files: List[UploadFile] = File(None), link_list: str enable_ray = True prepare_env(enable_ray=enable_ray) num_cpus = get_max_cpus(len(saved_path_list)) - print(f"per task num_cpus: {num_cpus}") + if logflag: + logger.info(f"per task num_cpus: {num_cpus}") ret = ingest_data_to_redis(saved_path_list, enable_ray=enable_ray, num_cpus=num_cpus) - return {"status": 200, "message": f"Data preparation succeeded. ret msg is {ret}"} + result = {"status": 200, "message": f"Data preparation succeeded. ret msg is {ret}"} + if logflag: + logger.info(result) + return result except Exception as e: raise HTTPException(status_code=400, detail=f"An error occurred: {e}") @@ -310,9 +326,13 @@ async def ingest_documents(files: List[UploadFile] = File(None), link_list: str enable_ray = True prepare_env(enable_ray=enable_ray) num_cpus = get_max_cpus(len(link_list)) - print(f"per task num_cpus: {num_cpus}") + if logflag: + logger.info(f"per task num_cpus: {num_cpus}") ret = ingest_link_to_redis(link_list, enable_ray=enable_ray, num_cpus=num_cpus) - return {"status": 200, "message": f"Data preparation succeeded, ret msg is {ret}"} + result = {"status": 200, "message": f"Data preparation succeeded. ret msg is {ret}"} + if logflag: + logger.info(result) + return result except json.JSONDecodeError: raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") except Exception as e: @@ -322,22 +342,24 @@ async def ingest_documents(files: List[UploadFile] = File(None), link_list: str @register_microservice( name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 ) -@traceable(run_type="tool") async def rag_get_file_structure(): - print("[ get_file_structure] ") + if logflag: + logger.info("[ get_file_structure] ") if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") + if logflag: + logger.info("No file uploaded, return empty list.") return [] file_content = get_file_structure(upload_folder) + if logflag: + logger.info(file_content) return file_content @register_microservice( name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -346,16 +368,23 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): - folder path (e.g. /path/to/folder) - "all": delete all files uploaded """ + if logflag: + logger.info(file_path) # delete all uploaded files if file_path == "all": - print("[dataprep - del] delete all files") + if logflag: + logger.info("[dataprep - del] delete all files") remove_folder_with_ignore(upload_folder) - print("[dataprep - del] successfully delete all files.") + if logflag: + logger.info("[dataprep - del] successfully delete all files.") create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) return {"status": True} delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - print(f"[dataprep - del] delete_path: {delete_path}") + if logflag: + logger.info(f"[dataprep - del] delete_path: {delete_path}") # partially delete files/folders if delete_path.exists(): @@ -364,15 +393,21 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): try: delete_path.unlink() except Exception as e: - print(f"[dataprep - del] fail to delete file {delete_path}: {e}") + if logflag: + logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") + logger.info({"status": False}) return {"status": False} # delete folder else: try: shutil.rmtree(delete_path) except Exception as e: - print(f"[dataprep - del] fail to delete folder {delete_path}: {e}") + if logflag: + logger.info(f"[dataprep - del] fail to delete folder {delete_path}: {e}") + logger.info({"status": False}) return {"status": False} + if logflag: + logger.info({"status": True}) return {"status": True} else: raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt index b16a4ac82..a77ba5c4d 100644 --- a/comps/dataprep/redis/langchain_ray/requirements.txt +++ b/comps/dataprep/redis/langchain_ray/requirements.txt @@ -7,7 +7,6 @@ fastapi huggingface_hub langchain langchain-community -langsmith numpy opentelemetry-api opentelemetry-exporter-otlp @@ -19,6 +18,7 @@ pyarrow pymupdf python-bidi==0.4.2 python-docx +python-multipart python-pptx ray redis diff --git a/comps/dataprep/redis/llama_index/docker/Dockerfile b/comps/dataprep/redis/llama_index/docker/Dockerfile index 1bf0e8d4a..f34930e71 100644 --- a/comps/dataprep/redis/llama_index/docker/Dockerfile +++ b/comps/dataprep/redis/llama_index/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,10 +9,11 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ + libcairo2 \ libgl1-mesa-glx \ libjemalloc-dev \ - vim \ - libcairo2 + poppler-utils \ + tesseract-ocr RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -38,4 +38,3 @@ USER user WORKDIR /home/user/comps/dataprep/redis/llama_index ENTRYPOINT ["python", "prepare_doc_redis.py"] - diff --git a/comps/dataprep/redis/llama_index/prepare_doc_redis.py b/comps/dataprep/redis/llama_index/prepare_doc_redis.py index da176555b..fc93ebaad 100644 --- a/comps/dataprep/redis/llama_index/prepare_doc_redis.py +++ b/comps/dataprep/redis/llama_index/prepare_doc_redis.py @@ -8,7 +8,6 @@ from config import EMBED_MODEL, INDEX_NAME, REDIS_URL from fastapi import Body, File, HTTPException, UploadFile -from langsmith import traceable from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex from llama_index.core.settings import Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding @@ -17,7 +16,10 @@ from redisvl.schema import IndexSchema from utils import * -from comps import DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, opea_microservices, register_microservice + +logger = CustomLogger("prepare_doc_redis") +logflag = os.getenv("LOGFLAG", False) upload_folder = "./uploaded_files/" @@ -50,15 +52,16 @@ async def ingest_data_to_redis(doc_path: DocPath): vector_store = RedisVectorStore(redis_client=redis_client, schema=schema) storage_context = StorageContext.from_defaults(vector_store=vector_store) _ = VectorStoreIndex.from_documents(content, storage_context=storage_context) - print("[ ingest data ] data ingested into Redis DB.") + if logflag: + logger.info("[ ingest data ] data ingested into Redis DB.") return True @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007) -@traceable(run_type="tool") # llama index only support upload files now async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] = File(None)): - print(f"files:{files}") + if logflag: + logger.info(f"files:{files}") if not files: raise HTTPException(status_code=400, detail="Please provide at least one file.") @@ -71,32 +74,37 @@ async def ingest_documents(files: Optional[Union[UploadFile, List[UploadFile]]] save_path = upload_folder + file.filename await save_content_to_local_disk(save_path, file) await ingest_data_to_redis(DocPath(path=save_path)) - print(f"Successfully saved file {save_path}") + if logflag: + logger.info(f"Successfully saved file {save_path}") + logger.info({"status": 200, "message": "Data preparation succeeded"}) return {"status": 200, "message": "Data preparation succeeded"} except Exception as e: - print(f"Data preparation failed. Exception: {e}") + if logflag: + logger.info(f"Data preparation failed. Exception: {e}") raise HTTPException(status_code=500, detail=f"Data preparation failed. Exception: {e}") @register_microservice( name="opea_service@prepare_doc_redis_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008 ) -@traceable(run_type="tool") async def rag_get_file_structure(): - print("[ get_file_structure] ") + if logflag: + logger.info("[ get_file_structure] ") if not Path(upload_folder).exists(): - print("No file uploaded, return empty list.") + if logflag: + logger.info("No file uploaded, return empty list.") return [] file_content = get_file_structure(upload_folder) + if logflag: + logger.info(file_content) return file_content @register_microservice( name="opea_service@prepare_doc_redis_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009 ) -@traceable(run_type="tool") async def delete_single_file(file_path: str = Body(..., embed=True)): """Delete file according to `file_path`. @@ -105,16 +113,23 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): - folder path (e.g. /path/to/folder) - "all": delete all files uploaded """ + if logflag: + logger.info(file_path) # delete all uploaded files if file_path == "all": - print("[dataprep - del] delete all files") + if logflag: + logger.info("[dataprep - del] delete all files") remove_folder_with_ignore(upload_folder) - print("[dataprep - del] successfully delete all files.") + if logflag: + logger.info("[dataprep - del] successfully delete all files.") create_upload_folder(upload_folder) + if logflag: + logger.info({"status": True}) return {"status": True} delete_path = Path(upload_folder + "/" + encode_filename(file_path)) - print(f"[dataprep - del] delete_path: {delete_path}") + if logflag: + logger.info(f"[dataprep - del] delete_path: {delete_path}") # partially delete files/folders if delete_path.exists(): @@ -123,15 +138,21 @@ async def delete_single_file(file_path: str = Body(..., embed=True)): try: delete_path.unlink() except Exception as e: - print(f"[dataprep - del] fail to delete file {delete_path}: {e}") + if logflag: + logger.info(f"[dataprep - del] fail to delete file {delete_path}: {e}") + logger.info({"status": False}) return {"status": False} # delete folder else: try: shutil.rmtree(delete_path) except Exception as e: - print(f"[dataprep - del] fail to delete folder {delete_path}: {e}") + if logflag: + logger.info(f"[dataprep - del] fail to delete folder {delete_path}: {e}") + logger.info({"status": False}) return {"status": False} + if logflag: + logger.info({"status": True}) return {"status": True} else: raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") diff --git a/comps/dataprep/redis/llama_index/requirements.txt b/comps/dataprep/redis/llama_index/requirements.txt index e754a4275..2f808e534 100644 --- a/comps/dataprep/redis/llama_index/requirements.txt +++ b/comps/dataprep/redis/llama_index/requirements.txt @@ -1,8 +1,7 @@ docarray[full] fastapi huggingface_hub -langsmith -llama-index +llama-index llama-index-embeddings-huggingface==0.2.0 llama-index-readers-file llama-index-vector-stores-redis @@ -12,6 +11,7 @@ opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator python-bidi==0.4.2 +python-multipart redis sentence_transformers shortuuid diff --git a/comps/dataprep/redis/multimodal_langchain/README.md b/comps/dataprep/redis/multimodal_langchain/README.md new file mode 100644 index 000000000..19042c6ae --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/README.md @@ -0,0 +1,213 @@ +# Dataprep Microservice for Multimodal Data with Redis + +This `dataprep` microservice accepts videos (mp4 files) and their transcripts (optional) from the user and ingests them into Redis vectorstore. + +# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ + +## 1.1 Install Requirements + +```bash +# Install ffmpeg static build +wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz +mkdir ffmpeg-git-amd64-static +tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 +export PATH=$(pwd)/ffmpeg-git-amd64-static:$PATH +cp $(pwd)/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ + +pip install -r requirements.txt +``` + +## 1.2 Start Redis Stack Server + +Please refer to this [readme](../../../vectorstores/langchain/redis/README.md). + +## 1.3 Setup Environment Variables + +```bash +export your_ip=$(hostname -I | awk '{print $1}') +export REDIS_URL="redis://${your_ip}:6379" +export INDEX_NAME=${your_redis_index_name} +export PYTHONPATH=${path_to_comps} +``` + +## 1.4 Start LVM Microservice (Optional) + +This is required only if you are going to consume the _generate_captions_ API of this microservice as in [Section 4.3](#43-consume-generate_captions-api). + +Please refer to this [readme](../../../lvms/README.md) to start the LVM microservice. +After LVM is up, set up environment variables. + +```bash +export your_ip=$(hostname -I | awk '{print $1}') +export LVM_ENDPOINT="http://${your_ip}:9399/v1/lvm" +``` + +## 1.5 Start Data Preparation Microservice for Redis with Python Script + +Start document preparation microservice for Redis with below command. + +```bash +python prepare_videodoc_redis.py +``` + +# ๐Ÿš€2. Start Microservice with Docker (Option 2) + +## 2.1 Start Redis Stack Server + +Please refer to this [readme](../../../vectorstores/langchain/redis/README.md). + +## 2.2 Start LVM Microservice (Optional) + +This is required only if you are going to consume the _generate_captions_ API of this microservice as described [here](#43-consume-generate_captions-api). + +Please refer to this [readme](../../../lvms/README.md) to start the LVM microservice. +After LVM is up, set up environment variables. + +```bash +export your_ip=$(hostname -I | awk '{print $1}') +export LVM_ENDPOINT="http://${your_ip}:9399/v1/lvm" +``` + +## 2.3 Setup Environment Variables + +```bash +export your_ip=$(hostname -I | awk '{print $1}') +export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" +export REDIS_URL="redis://${your_ip}:6379" +export WHISPER_MODEL="base" +export INDEX_NAME=${your_redis_index_name} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +``` + +## 2.4 Build Docker Image + +```bash +cd ../../../../ +docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/multimodal_langchain/docker/Dockerfile . +``` + +## 2.5 Run Docker with CLI (Option A) + +```bash +docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest +``` + +## 2.6 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future) + +```bash +cd comps/dataprep/redis/multimodal_langchain/docker +docker compose -f docker-compose-dataprep-redis.yaml up -d +``` + +# ๐Ÿš€3. Status Microservice + +```bash +docker container logs -f dataprep-redis-server +``` + +# ๐Ÿš€4. Consume Microservice + +Once this dataprep microservice is started, user can use the below commands to invoke the microservice to convert videos and their transcripts (optional) to embeddings and save to the Redis vector store. + +This mircroservice has provided 3 different ways for users to ingest videos into Redis vector store corresponding to the 3 use cases. + +## 4.1 Consume _videos_with_transcripts_ API + +**Use case:** This API is used when a transcript file (under `.vtt` format) is available for each video. + +**Important notes:** + +- Make sure the file paths after `files=@` are correct. +- Every transcript file's name must be identical with its corresponding video file's name (except their extension .vtt and .mp4). For example, `video1.mp4` and `video1.vtt`. Otherwise, if `video1.vtt` is not included correctly in this API call, this microservice will return error `No captions file video1.vtt found for video1.mp4`. + +### Single video-transcript pair upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + -F "files=@./video1.vtt" \ + http://localhost:6007/v1/videos_with_transcripts +``` + +### Multiple video-transcript pair upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + -F "files=@./video1.vtt" \ + -F "files=@./video2.mp4" \ + -F "files=@./video2.vtt" \ + http://localhost:6007/v1/videos_with_transcripts +``` + +## 4.2 Consume _generate_transcripts_ API + +**Use case:** This API should be used when a video has meaningful audio or recognizable speech but its transcript file is not available. + +In this use case, this microservice will use [`whisper`](https://openai.com/index/whisper/) model to generate the `.vtt` transcript for the video. + +### Single video upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + http://localhost:6007/v1/generate_transcripts +``` + +### Multiple video upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + -F "files=@./video2.mp4" \ + http://localhost:6007/v1/generate_transcripts +``` + +## 4.3 Consume _generate_captions_ API + +**Use case:** This API should be used when a video does not have meaningful audio or does not have audio. + +In this use case, transcript either does not provide any meaningful information or does not exist. Thus, it is preferred to leverage a LVM microservice to summarize the video frames. + +- Single video upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + http://localhost:6007/v1/generate_captions +``` + +- Multiple video upload + +```bash +curl -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./video1.mp4" \ + -F "files=@./video2.mp4" \ + http://localhost:6007/v1/generate_captions +``` + +## 4.4 Consume get_videos API + +To get names of uploaded videos, use the following command. + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6007/v1/dataprep/get_videos +``` + +## 4.5 Consume delete_videos API + +To delete uploaded videos and clear the database, use the following command. + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6007/v1/dataprep/delete_videos +``` diff --git a/comps/dataprep/redis/multimodal_langchain/__init__.py b/comps/dataprep/redis/multimodal_langchain/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/dataprep/redis/multimodal_langchain/config.py b/comps/dataprep/redis/multimodal_langchain/config.py new file mode 100644 index 000000000..0cae53378 --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/config.py @@ -0,0 +1,70 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +# Models +EMBED_MODEL = os.getenv("EMBED_MODEL", "BridgeTower/bridgetower-large-itm-mlm-itc") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") + +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + +# Lvm Microservice Information +LVM_ENDPOINT = os.getenv("LVM_ENDPOINT", "http://localhost:9399/v1/lvm") + + +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "mm-rag-redis") + +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) +REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "schema.yml") +TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600)) +schema_path = os.path.join(parent_dir, REDIS_SCHEMA) +INDEX_SCHEMA = schema_path diff --git a/comps/dataprep/redis/multimodal_langchain/docker/Dockerfile b/comps/dataprep/redis/multimodal_langchain/docker/Dockerfile new file mode 100644 index 000000000..a6c2be7e3 --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/docker/Dockerfile @@ -0,0 +1,37 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + libgl1-mesa-glx \ + libjemalloc-dev \ + default-jre \ + wget \ + vim + +# Install ffmpeg static build +RUN cd /root && wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ + mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ + export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ + cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ + +RUN mkdir -p /home/user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/dataprep/redis/multimodal_langchain/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/dataprep/redis/multimodal_langchain + +ENTRYPOINT ["python", "prepare_videodoc_redis.py"] + diff --git a/comps/dataprep/redis/multimodal_langchain/docker/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/multimodal_langchain/docker/docker-compose-dataprep-redis.yaml new file mode 100644 index 000000000..d98ddbd87 --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/docker/docker-compose-dataprep-redis.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis: + image: opea/dataprep-redis:latest + container_name: dataprep-redis-server + ports: + - "6007:6007" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + LVM_ENDPOINT: ${LVM_ENDPOINT} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/dataprep/redis/multimodal_langchain/prepare_videodoc_redis.py b/comps/dataprep/redis/multimodal_langchain/prepare_videodoc_redis.py new file mode 100644 index 000000000..d658c58b0 --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/prepare_videodoc_redis.py @@ -0,0 +1,527 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import shutil +import subprocess +import time +import uuid +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Type, Union + +from config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, LVM_ENDPOINT, REDIS_URL, WHISPER_MODEL +from fastapi import File, HTTPException, UploadFile +from langchain_community.utilities.redis import _array_to_buffer +from langchain_community.vectorstores import Redis +from langchain_community.vectorstores.redis.base import _generate_field_schema, _prepare_metadata +from langchain_community.vectorstores.redis.schema import read_schema +from langchain_core.embeddings import Embeddings +from langchain_core.utils import get_from_dict_or_env +from PIL import Image + +from comps import opea_microservices, register_microservice +from comps.dataprep.multimodal_utils import ( + clear_upload_folder, + convert_video_to_audio, + create_upload_folder, + delete_audio_file, + extract_frames_and_annotations_from_transcripts, + extract_frames_and_generate_captions, + extract_transcript_from_audio, + generate_video_id, + load_json_file, + load_whisper_model, + write_vtt, +) +from comps.embeddings.multimodal_embeddings.bridgetower.bridgetower_embedding import BridgeTowerEmbedding + +device = "cpu" +upload_folder = "./uploaded_files/" + + +class MultimodalRedis(Redis): + """Redis vector database to process multimodal data.""" + + @classmethod + def from_text_image_pairs_return_keys( + cls: Type[Redis], + texts: List[str], + images: List[str], + embedding: Embeddings = BridgeTowerEmbedding, + metadatas: Optional[List[dict]] = None, + index_name: Optional[str] = None, + index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None, + vector_schema: Optional[Dict[str, Union[str, int]]] = None, + **kwargs: Any, + ): + """ + Args: + texts (List[str]): List of texts to add to the vectorstore. + images (List[str]): List of path-to-images to add to the vectorstore. + embedding (Embeddings): Embeddings to use for the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadata + dicts to add to the vectorstore. Defaults to None. + index_name (Optional[str], optional): Optional name of the index to + create or add to. Defaults to None. + index_schema (Optional[Union[Dict[str, str], str, os.PathLike]], optional): + Optional fields to index within the metadata. Overrides generated + schema. Defaults to None. + vector_schema (Optional[Dict[str, Union[str, int]]], optional): Optional + vector schema to use. Defaults to None. + **kwargs (Any): Additional keyword arguments to pass to the Redis client. + Returns: + Tuple[Redis, List[str]]: Tuple of the Redis instance and the keys of + the newly created documents. + Raises: + ValueError: If the number of texts does not equal the number of images. + ValueError: If the number of metadatas does not match the number of texts. + """ + # the length of texts must be equal to the length of images + if len(texts) != len(images): + raise ValueError(f"the len of captions {len(texts)} does not equal the len of images {len(images)}") + + redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") + + if "redis_url" in kwargs: + kwargs.pop("redis_url") + + # flag to use generated schema + if "generate" in kwargs: + kwargs.pop("generate") + + # see if the user specified keys + keys = None + if "keys" in kwargs: + keys = kwargs.pop("keys") + + # Name of the search index if not given + if not index_name: + index_name = uuid.uuid4().hex + + # type check for metadata + if metadatas: + if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 + raise ValueError("Number of metadatas must match number of texts") + if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): + raise ValueError("Metadatas must be a list of dicts") + generated_schema = _generate_field_schema(metadatas[0]) + + if not index_schema: + index_schema = generated_schema + + # Create instance + instance = cls( + redis_url, + index_name, + embedding, + index_schema=index_schema, + vector_schema=vector_schema, + **kwargs, + ) + # Add data to Redis + keys = instance.add_text_image_pairs(texts, images, metadatas, keys=keys) + return instance, keys + + def add_text_image_pairs( + self, + texts: Iterable[str], + images: Iterable[str], + metadatas: Optional[List[dict]] = None, + embeddings: Optional[List[List[float]]] = None, + batch_size: int = 2, + clean_metadata: bool = True, + **kwargs: Any, + ) -> List[str]: + """Add more embeddings of text-image pairs to the vectorstore. + + Args: + texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + images: Iterable[str]: Iterable of strings/text of path-to-image to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + Defaults to None. + embeddings (Optional[List[List[float]]], optional): Optional pre-generated + embeddings. Defaults to None. + keys (List[str]) or ids (List[str]): Identifiers of entries. + Defaults to None. + batch_size (int, optional): Batch size to use for writes. Defaults to 1000. + Returns: + List[str]: List of ids added to the vectorstore + """ + ids = [] + # Get keys or ids from kwargs + # Other vectorstores use ids + keys_or_ids = kwargs.get("keys", kwargs.get("ids")) + + # type check for metadata + if metadatas: + if isinstance(metadatas, list) and len(metadatas) != len(texts): # type: ignore # noqa: E501 + raise ValueError("Number of metadatas must match number of texts") + if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): + raise ValueError("Metadatas must be a list of dicts") + pil_imgs = [Image.open(img) for img in images] + if not embeddings: + embeddings = self._embeddings.embed_image_text_pairs(list(texts), pil_imgs, batch_size=batch_size) + self._create_index_if_not_exist(dim=len(embeddings[0])) + + # Write data to redis + pipeline = self.client.pipeline(transaction=False) + for i, text in enumerate(texts): + # Use provided values by default or fallback + key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex) + if not key.startswith(self.key_prefix + ":"): + key = self.key_prefix + ":" + key + metadata = metadatas[i] if metadatas else {} + metadata = _prepare_metadata(metadata) if clean_metadata else metadata + pipeline.hset( + key, + mapping={ + self._schema.content_key: text, + self._schema.content_vector_key: _array_to_buffer(embeddings[i], self._schema.vector_dtype), + **metadata, + }, + ) + ids.append(key) + + # Write batch + if i % batch_size == 0: + pipeline.execute() + + # Cleanup final batch + pipeline.execute() + return ids + + +def prepare_data_and_metadata_from_annotation( + annotation, path_to_frames, title, num_transcript_concat_for_ingesting=2, num_transcript_concat_for_inference=7 +): + text_list = [] + image_list = [] + metadatas = [] + for i, frame in enumerate(annotation): + frame_index = frame["sub_video_id"] + path_to_frame = os.path.join(path_to_frames, f"frame_{frame_index}.jpg") + # augment this frame's transcript with a reasonable number of neighboring frames' transcripts helps semantic retrieval + lb_ingesting = max(0, i - num_transcript_concat_for_ingesting) + ub_ingesting = min(len(annotation), i + num_transcript_concat_for_ingesting + 1) + caption_for_ingesting = " ".join([annotation[j]["caption"] for j in range(lb_ingesting, ub_ingesting)]) + + # augment this frame's transcript with more neighboring frames' transcript to provide more context to LVM for question answering + lb_inference = max(0, i - num_transcript_concat_for_inference) + ub_inference = min(len(annotation), i + num_transcript_concat_for_inference + 1) + caption_for_inference = " ".join([annotation[j]["caption"] for j in range(lb_inference, ub_inference)]) + + video_id = frame["video_id"] + b64_img_str = frame["b64_img_str"] + time_of_frame = frame["time"] + embedding_type = "pair" + source_video = frame["video_name"] + + text_list.append(caption_for_ingesting) + image_list.append(path_to_frame) + metadatas.append( + { + "content": caption_for_ingesting, + "b64_img_str": b64_img_str, + "video_id": video_id, + "source_video": source_video, + "time_of_frame_ms": float(time_of_frame), + "embedding_type": embedding_type, + "title": title, + "transcript_for_inference": caption_for_inference, + } + ) + + return text_list, image_list, metadatas + + +def ingest_multimodal(videoname, data_folder, embeddings): + """Ingest text image pairs to Redis from the data/ directory that consists of frames and annotations.""" + data_folder = os.path.abspath(data_folder) + annotation_file_path = os.path.join(data_folder, "annotations.json") + path_to_frames = os.path.join(data_folder, "frames") + + annotation = load_json_file(annotation_file_path) + + # prepare data to ingest + text_list, image_list, metadatas = prepare_data_and_metadata_from_annotation(annotation, path_to_frames, videoname) + + MultimodalRedis.from_text_image_pairs_return_keys( + texts=[f"From {videoname}. " + text for text in text_list], + images=image_list, + embedding=embeddings, + metadatas=metadatas, + index_name=INDEX_NAME, + index_schema=INDEX_SCHEMA, + redis_url=REDIS_URL, + ) + + +def drop_index(index_name, redis_url=REDIS_URL): + print(f"dropping index {index_name}") + try: + assert Redis.drop_index(index_name=index_name, delete_documents=True, redis_url=redis_url) + print(f"index {index_name} deleted") + except Exception as e: + print(f"index {index_name} delete failed: {e}") + return False + return True + + +@register_microservice( + name="opea_service@prepare_videodoc_redis", endpoint="/v1/generate_transcripts", host="0.0.0.0", port=6007 +) +async def ingest_videos_generate_transcripts(files: List[UploadFile] = File(None)): + """Upload videos with speech, generate transcripts using whisper and ingest into redis.""" + + if files: + video_files = [] + for file in files: + if os.path.splitext(file.filename)[1] == ".mp4": + video_files.append(file) + else: + raise HTTPException( + status_code=400, detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only." + ) + + for video_file in video_files: + st = time.time() + print(f"Processing video {video_file.filename}") + + # Assign unique identifier to video + video_id = generate_video_id() + + # Create video file name by appending identifier + video_name = os.path.splitext(video_file.filename)[0] + video_file_name = f"{video_name}_{video_id}.mp4" + video_dir_name = os.path.splitext(video_file_name)[0] + + # Save video file in upload_directory + with open(os.path.join(upload_folder, video_file_name), "wb") as f: + shutil.copyfileobj(video_file.file, f) + + # Extract temporary audio wav file from video mp4 + audio_file = video_dir_name + ".wav" + print(f"Extracting {audio_file}") + convert_video_to_audio( + os.path.join(upload_folder, video_file_name), os.path.join(upload_folder, audio_file) + ) + print(f"Done extracting {audio_file}") + + # Load whisper model + print("Loading whisper model....") + whisper_model = load_whisper_model(model_name=WHISPER_MODEL) + print("Done loading whisper!") + + # Extract transcript from audio + print("Extracting transcript from audio") + transcripts = extract_transcript_from_audio(whisper_model, os.path.join(upload_folder, audio_file)) + + # Save transcript as vtt file and delete audio file + vtt_file = video_dir_name + ".vtt" + write_vtt(transcripts, os.path.join(upload_folder, vtt_file)) + delete_audio_file(os.path.join(upload_folder, audio_file)) + print("Done extracting transcript.") + + # Store frames and caption annotations in a new directory + print("Extracting frames and generating annotation") + extract_frames_and_annotations_from_transcripts( + video_id, + os.path.join(upload_folder, video_file_name), + os.path.join(upload_folder, vtt_file), + os.path.join(upload_folder, video_dir_name), + ) + print("Done extracting frames and generating annotation") + # Delete temporary vtt file + os.remove(os.path.join(upload_folder, vtt_file)) + + # Ingest multimodal data into redis + print("Ingesting data to redis vector store") + ingest_multimodal(video_name, os.path.join(upload_folder, video_dir_name), embeddings) + + # Delete temporary video directory containing frames and annotations + shutil.rmtree(os.path.join(upload_folder, video_dir_name)) + + print(f"Processed video {video_file.filename}") + end = time.time() + print(str(end - st)) + + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide at least one video (.mp4) file.") + + +@register_microservice( + name="opea_service@prepare_videodoc_redis", endpoint="/v1/generate_captions", host="0.0.0.0", port=6007 +) +async def ingest_videos_generate_caption(files: List[UploadFile] = File(None)): + """Upload videos without speech (only background music or no audio), generate captions using lvm microservice and ingest into redis.""" + + if files: + video_files = [] + for file in files: + if os.path.splitext(file.filename)[1] == ".mp4": + video_files.append(file) + else: + raise HTTPException( + status_code=400, detail=f"File {file.filename} is not an mp4 file. Please upload mp4 files only." + ) + + for video_file in video_files: + print(f"Processing video {video_file.filename}") + + # Assign unique identifier to video + video_id = generate_video_id() + + # Create video file name by appending identifier + video_name = os.path.splitext(video_file.filename)[0] + video_file_name = f"{video_name}_{video_id}.mp4" + video_dir_name = os.path.splitext(video_file_name)[0] + + # Save video file in upload_directory + with open(os.path.join(upload_folder, video_file_name), "wb") as f: + shutil.copyfileobj(video_file.file, f) + + # Store frames and caption annotations in a new directory + extract_frames_and_generate_captions( + video_id, + os.path.join(upload_folder, video_file_name), + LVM_ENDPOINT, + os.path.join(upload_folder, video_dir_name), + ) + + # Ingest multimodal data into redis + ingest_multimodal(video_name, os.path.join(upload_folder, video_dir_name), embeddings) + + # Delete temporary video directory containing frames and annotations + # shutil.rmtree(os.path.join(upload_folder, video_dir_name)) + + print(f"Processed video {video_file.filename}") + + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException(status_code=400, detail="Must provide at least one video (.mp4) file.") + + +@register_microservice( + name="opea_service@prepare_videodoc_redis", + endpoint="/v1/videos_with_transcripts", + host="0.0.0.0", + port=6007, +) +async def ingest_videos_with_transcripts(files: List[UploadFile] = File(None)): + + if files: + video_files, video_file_names = [], [] + captions_files, captions_file_names = [], [] + for file in files: + if os.path.splitext(file.filename)[1] == ".mp4": + video_files.append(file) + video_file_names.append(file.filename) + elif os.path.splitext(file.filename)[1] == ".vtt": + captions_files.append(file) + captions_file_names.append(file.filename) + else: + print(f"Skipping file {file.filename} because of unsupported format.") + + # Check if every video file has a captions file + for video_file_name in video_file_names: + file_prefix = os.path.splitext(video_file_name)[0] + if (file_prefix + ".vtt") not in captions_file_names: + raise HTTPException( + status_code=400, detail=f"No captions file {file_prefix}.vtt found for {video_file_name}" + ) + + if len(video_files) == 0: + return HTTPException( + status_code=400, + detail="The uploaded files have unsupported formats. Please upload at least one video file (.mp4) with captions (.vtt)", + ) + + for video_file in video_files: + print(f"Processing video {video_file.filename}") + + # Assign unique identifier to video + video_id = generate_video_id() + + # Create video file name by appending identifier + video_name = os.path.splitext(video_file.filename)[0] + video_file_name = f"{video_name}_{video_id}.mp4" + video_dir_name = os.path.splitext(video_file_name)[0] + + # Save video file in upload_directory + with open(os.path.join(upload_folder, video_file_name), "wb") as f: + shutil.copyfileobj(video_file.file, f) + + # Save captions file in upload directory + vtt_file_name = os.path.splitext(video_file.filename)[0] + ".vtt" + vtt_idx = None + for idx, caption_file in enumerate(captions_files): + if caption_file.filename == vtt_file_name: + vtt_idx = idx + break + vtt_file = video_dir_name + ".vtt" + with open(os.path.join(upload_folder, vtt_file), "wb") as f: + shutil.copyfileobj(captions_files[vtt_idx].file, f) + + # Store frames and caption annotations in a new directory + extract_frames_and_annotations_from_transcripts( + video_id, + os.path.join(upload_folder, video_file_name), + os.path.join(upload_folder, vtt_file), + os.path.join(upload_folder, video_dir_name), + ) + + # Delete temporary vtt file + os.remove(os.path.join(upload_folder, vtt_file)) + + # Ingest multimodal data into redis + ingest_multimodal(video_name, os.path.join(upload_folder, video_dir_name), embeddings) + + # Delete temporary video directory containing frames and annotations + shutil.rmtree(os.path.join(upload_folder, video_dir_name)) + + print(f"Processed video {video_file.filename}") + + return {"status": 200, "message": "Data preparation succeeded"} + + raise HTTPException( + status_code=400, detail="Must provide at least one pair consisting of video (.mp4) and captions (.vtt)" + ) + + +@register_microservice( + name="opea_service@prepare_videodoc_redis", endpoint="/v1/dataprep/get_videos", host="0.0.0.0", port=6007 +) +async def rag_get_file_structure(): + """Returns list of names of uploaded videos saved on the server.""" + + if not Path(upload_folder).exists(): + print("No file uploaded, return empty list.") + return [] + + uploaded_videos = os.listdir(upload_folder) + return uploaded_videos + + +@register_microservice( + name="opea_service@prepare_videodoc_redis", endpoint="/v1/dataprep/delete_videos", host="0.0.0.0", port=6007 +) +async def delete_videos(): + """Delete all uploaded videos along with redis index.""" + index_deleted = drop_index(index_name=INDEX_NAME) + + if not index_deleted: + raise HTTPException(status_code=409, detail="Uploaded videos could not be deleted. Index does not exist") + + clear_upload_folder(upload_folder) + print("Successfully deleted all uploaded videos.") + return {"status": True} + + +if __name__ == "__main__": + create_upload_folder(upload_folder) + # Load embeddings model + print("Initializing BridgeTower model as embedder...") + embeddings = BridgeTowerEmbedding(model_name=EMBED_MODEL, device=device) + print("Done initialization of embedder!") + opea_microservices["opea_service@prepare_videodoc_redis"].start() diff --git a/comps/dataprep/redis/multimodal_langchain/requirements.txt b/comps/dataprep/redis/multimodal_langchain/requirements.txt new file mode 100644 index 000000000..574d2952a --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/requirements.txt @@ -0,0 +1,19 @@ +docarray[full] +fastapi +langchain==0.1.12 +langchain_benchmarks +moviepy +openai-whisper +opencv-python +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +prometheus-fastapi-instrumentator +pydantic==2.8.2 +python-multipart +redis +shortuuid +transformers +uvicorn +webvtt-py diff --git a/comps/dataprep/redis/multimodal_langchain/schema.yml b/comps/dataprep/redis/multimodal_langchain/schema.yml new file mode 100644 index 000000000..32f4a79ae --- /dev/null +++ b/comps/dataprep/redis/multimodal_langchain/schema.yml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +text: + - name: content + - name: b64_img_str + - name: video_id + - name: source_video + - name: embedding_type + - name: title + - name: transcript_for_inference +numeric: + - name: time_of_frame_ms +vector: + - name: content_vector + algorithm: HNSW + datatype: FLOAT32 + dims: 512 + distance_metric: COSINE diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py index 46acc8f5b..b300af428 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/utils.py @@ -276,7 +276,8 @@ def load_json(json_path): """Load and process json file.""" with open(json_path, "r") as file: data = json.load(file) - return json.dumps(data) + content_list = [json.dumps(item) for item in data] + return content_list def load_yaml(yaml_path): @@ -289,13 +290,15 @@ def load_yaml(yaml_path): def load_xlsx(input_path): """Load and process xlsx file.""" df = pd.read_excel(input_path) - return df.to_string() + content_list = df.apply(lambda row: ", ".join(row.astype(str)), axis=1).tolist() + return content_list def load_csv(input_path): """Load the csv file.""" df = pd.read_csv(input_path) - return df.to_string() + content_list = df.apply(lambda row: ", ".join(row.astype(str)), axis=1).tolist() + return content_list def load_image(image_path): @@ -717,6 +720,19 @@ def get_file_structure(root_path: str, parent_path: str = "") -> List[Dict[str, return result +def format_search_results(response, file_list: list): + for i in range(1, len(response), 2): + file_name = response[i].decode()[5:] + file_dict = { + "name": decode_filename(file_name), + "id": decode_filename(file_name), + "type": "File", + "parent": "", + } + file_list.append(file_dict) + return file_list + + def remove_folder_with_ignore(folder_path: str, except_patterns: List = []): """Remove the specific folder, and ignore some files/folders. diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index a4ae1c8e8..1dbac7201 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -14,7 +14,7 @@ Key Features: Users are able to configure and build embedding-related services according to their actual needs. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) Currently, we provide three ways to implement the embedding service: @@ -26,7 +26,7 @@ Currently, we provide three ways to implement the embedding service: Regardless of the implementation, you need to install requirements first. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash # run with langchain @@ -39,7 +39,7 @@ pip install -r llama_index/requirements.txt pip install -r predictionguard/requirements.txt ``` -## 1.2 Start Embedding Service +### 1.2 Start Embedding Service You can select one of following ways to start the embedding service: @@ -73,15 +73,14 @@ export PREDICTIONGUARD_API_KEY=${your_api_key} python embedding_pg.py ``` -### Start Embedding Service with TEI +#### Start Embedding Service with TEI First, you need to start a TEI service. ```bash your_port=8090 model="BAAI/bge-large-en-v1.5" -revision="refs/pr/5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model ``` Then you need to test your TEI service using the following commands: @@ -102,13 +101,10 @@ cd langchain cd llama_index export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings" python embedding_tei.py ``` -### Start Embedding Service with Local Model +#### Start Embedding Service with Local Model ```bash # run with langchain @@ -118,17 +114,16 @@ cd llama_index python local_embedding.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Optional 2) +## ๐Ÿš€2. Start Microservice with Docker (Optional 2) -## 2.1 Start Embedding Service with TEI +### 2.1 Start Embedding Service with TEI First, you need to start a TEI service. ```bash your_port=8090 model="BAAI/bge-large-en-v1.5" -revision="refs/pr/5" -docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model ``` Then you need to test your TEI service using the following commands: @@ -147,7 +142,7 @@ export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" ``` -## 2.2 Start Embedding Service with PredictionGuard +### 2.2 Start Embedding Service with PredictionGuard First, build the Docker image for the PredictionGuard embedding microservice: @@ -176,18 +171,18 @@ curl http://localhost:6000/v1/embeddings\ ## 2.3 Build Docker Image -### Build Langchain Docker (Option a) +#### Build Langchain Docker (Option a) ```bash cd ../../ docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile . ``` -### Build LlamaIndex Docker (Option b) +#### Build LlamaIndex Docker (Option b) ```bash cd ../../ -docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . +docker build -t opea/embedding-tei-llama-index:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . ``` ### Build PredictionGuard Docker (Option c) @@ -196,26 +191,29 @@ docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy - docker build -t opea/embedding-predictionguard:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/embeddings/predictionguard/docker/Dockerfile . ``` -## 2.4 Run Docker with CLI +### 2.4 Run Docker with CLI ```bash +# run with langchain docker docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest +# run with llama-index docker +docker run -d --name="embedding-tei-llama-index-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei-llama-index:latest ``` ```bash docker run -d --name="embedding-predictionguard" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest ``` -## 2.5 Run Docker with Docker Compose +### 2.5 Run Docker with Docker Compose ```bash cd docker docker compose -f docker_compose_embedding.yaml up -d ``` -# ๐Ÿš€3. Consume Embedding Service +## ๐Ÿš€3. Consume Embedding Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:6000/v1/health_check\ @@ -223,11 +221,11 @@ curl http://localhost:6000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume Embedding Service +### 3.2 Consume Embedding Service ```bash curl http://localhost:6000/v1/embeddings\ -X POST \ - -d '{"input":"Hello, world!"}' \ + -d '{"text":"Hello, world!"}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/embeddings/langchain-mosec/README.md b/comps/embeddings/langchain-mosec/README.md index 788e88dd4..4fd265829 100644 --- a/comps/embeddings/langchain-mosec/README.md +++ b/comps/embeddings/langchain-mosec/README.md @@ -1,29 +1,29 @@ # build Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . ``` -# build embedding microservice docker image +## build embedding microservice docker image ``` docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec:latest -f comps/embeddings/langchain-mosec/docker/Dockerfile . ``` -# launch Mosec endpoint docker container +## launch Mosec endpoint docker container ``` -docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 langchain-mosec:latest +docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 opea/embedding-langchain-mosec-endpoint:latest ``` -# launch embedding microservice docker container +## launch embedding microservice docker container ``` export MOSEC_EMBEDDING_ENDPOINT=http://{mosec_embedding_host_ip}:6001 docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:latest ``` -# run client test +## run client test ``` curl localhost:6000/v1/embeddings \ diff --git a/comps/embeddings/langchain-mosec/docker/Dockerfile b/comps/embeddings/langchain-mosec/docker/Dockerfile index 2fa2e7036..4628216f1 100644 --- a/comps/embeddings/langchain-mosec/docker/Dockerfile +++ b/comps/embeddings/langchain-mosec/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -25,4 +23,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/embeddings/langchain-mosec ENTRYPOINT ["python", "embedding_mosec.py"] - diff --git a/comps/embeddings/langchain-mosec/embedding_mosec.py b/comps/embeddings/langchain-mosec/embedding_mosec.py index f34b56a18..61a3db7f2 100644 --- a/comps/embeddings/langchain-mosec/embedding_mosec.py +++ b/comps/embeddings/langchain-mosec/embedding_mosec.py @@ -6,9 +6,9 @@ from typing import List, Optional from langchain_community.embeddings import OpenAIEmbeddings -from langsmith import traceable from comps import ( + CustomLogger, EmbedDoc, ServiceType, TextDoc, @@ -18,6 +18,9 @@ statistics_dict, ) +logger = CustomLogger("embedding_mosec") +logflag = os.getenv("LOGFLAG", False) + class MosecEmbeddings(OpenAIEmbeddings): def _get_len_safe_embeddings( @@ -53,13 +56,16 @@ def empty_embedding() -> List[float]: input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_mosec"]) def embedding(input: TextDoc) -> EmbedDoc: + if logflag: + logger.info(input) start = time.time() embed_vector = embeddings.embed_query(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None) + if logflag: + logger.info(res) return res @@ -67,7 +73,7 @@ def embedding(input: TextDoc) -> EmbedDoc: MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" - MODEL_ID = "/root/bge-large-zh-v1.5" + MODEL_ID = "/home/user/bge-large-zh-v1.5" embeddings = MosecEmbeddings(model=MODEL_ID) - print("Mosec Embedding initialized.") + logger.info("Mosec Embedding initialized.") opea_microservices["opea_service@embedding_mosec"].start() diff --git a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile index eec3020a4..945f7b90c 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,25 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive +RUN apt update && apt install -y python3 python3-pip -COPY comps /root/comps +COPY comps /home/user/comps -RUN apt update && apt install -y python3 python3-pip RUN pip3 install torch==2.2.2 torchvision --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /root/bge-large-zh-v1.5 - -ENV EMB_MODEL="/root/bge-large-zh-v1.5/" +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /home/user/bge-large-zh-v1.5 +USER user +ENV EMB_MODEL="/home/user/bge-large-zh-v1.5/" -WORKDIR /root/comps/embeddings/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/embeddings/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/embeddings/langchain-mosec/mosec-docker/README.md b/comps/embeddings/langchain-mosec/mosec-docker/README.md index e7f59d616..3222a1b1e 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/README.md +++ b/comps/embeddings/langchain-mosec/mosec-docker/README.md @@ -25,13 +25,13 @@ docker run -itd -p 8000:8000 embedding:latest - Restful API by curl ```shell -curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/root/bge-large-zh-v1.5/", "input": "hello world"}' +curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/home/user/bge-large-zh-v1.5/", "input": "hello world"}' ``` - generate embedding from python ```python -DEFAULT_MODEL = "/root/bge-large-zh-v1.5/" +DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" SERVICE_URL = "http://127.0.0.1:8000" INPUT_STR = "Hello world!" diff --git a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py index 6db56fb88..9639b424a 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py +++ b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py @@ -13,7 +13,7 @@ from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage from mosec import ClientError, Runtime, Server, Worker -DEFAULT_MODEL = "/root/bge-large-zh-v1.5/" +DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" class Embedding(Worker): diff --git a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py index d2d67c836..67a3939e1 100644 --- a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py +++ b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py @@ -4,7 +4,7 @@ from openai import Client -DEFAULT_MODEL = "/root/bge-large-zh-v1.5/" +DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/" SERVICE_URL = "http://127.0.0.1:8000" INPUT_STR = "Hello world!" diff --git a/comps/embeddings/langchain/docker/Dockerfile b/comps/embeddings/langchain/docker/Dockerfile index 464bacf66..365c73811 100644 --- a/comps/embeddings/langchain/docker/Dockerfile +++ b/comps/embeddings/langchain/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -8,8 +7,7 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -28,4 +26,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/embeddings/langchain ENTRYPOINT ["python", "embedding_tei.py"] - diff --git a/comps/embeddings/langchain/embedding_tei.py b/comps/embeddings/langchain/embedding_tei.py index 4c482db51..0ddefb49a 100644 --- a/comps/embeddings/langchain/embedding_tei.py +++ b/comps/embeddings/langchain/embedding_tei.py @@ -3,11 +3,12 @@ import os import time +from typing import Union -from langchain_community.embeddings import HuggingFaceHubEmbeddings -from langsmith import traceable +from langchain_huggingface import HuggingFaceEndpointEmbeddings from comps import ( + CustomLogger, EmbedDoc, ServiceType, TextDoc, @@ -16,6 +17,15 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + EmbeddingRequest, + EmbeddingResponse, + EmbeddingResponseData, +) + +logger = CustomLogger("embedding_tei_langchain") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -24,21 +34,38 @@ endpoint="/v1/embeddings", host="0.0.0.0", port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") @register_statistics(names=["opea_service@embedding_tei_langchain"]) -def embedding(input: TextDoc) -> EmbedDoc: +def embedding( + input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] +) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: start = time.time() - embed_vector = embeddings.embed_query(input.text) - res = EmbedDoc(text=input.text, embedding=embed_vector) + if logflag: + logger.info(input) + if isinstance(input, TextDoc): + embed_vector = embeddings.embed_query(input.text) + res = EmbedDoc(text=input.text, embedding=embed_vector) + else: + embed_vector = embeddings.embed_query(input.input) + if input.dimensions is not None: + embed_vector = embed_vector[: input.dimensions] + + if isinstance(input, ChatCompletionRequest): + input.embedding = embed_vector + # keep + res = input + if isinstance(input, EmbeddingRequest): + # for standard openai embedding format + res = EmbeddingResponse(data=[EmbeddingResponseData(index=0, embedding=embed_vector)]) + statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None) + if logflag: + logger.info(res) return res if __name__ == "__main__": tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8080") - embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - print("TEI Gaudi Embedding initialized.") + embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint) + logger.info("TEI Gaudi Embedding initialized.") opea_microservices["opea_service@embedding_tei_langchain"].start() diff --git a/comps/embeddings/langchain/local_embedding.py b/comps/embeddings/langchain/local_embedding.py index 4aff07c6d..32f8944a9 100644 --- a/comps/embeddings/langchain/local_embedding.py +++ b/comps/embeddings/langchain/local_embedding.py @@ -1,9 +1,22 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langchain_community.embeddings import HuggingFaceBgeEmbeddings +import os -from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice +from langchain_huggingface import HuggingFaceEmbeddings + +from comps import ( + CustomLogger, + EmbedDoc, + ServiceType, + TextDoc, + opea_microservices, + opea_telemetry, + register_microservice, +) + +logger = CustomLogger("local_embedding") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -17,11 +30,15 @@ ) @opea_telemetry def embedding(input: TextDoc) -> EmbedDoc: + if logflag: + logger.info(input) embed_vector = embeddings.embed_query(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) + if logflag: + logger.info(res) return res if __name__ == "__main__": - embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-large-en-v1.5") + embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5") opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/langchain/local_embedding_768.py b/comps/embeddings/langchain/local_embedding_768.py new file mode 100644 index 000000000..a079bd6ed --- /dev/null +++ b/comps/embeddings/langchain/local_embedding_768.py @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from langchain_community.embeddings import HuggingFaceBgeEmbeddings + +from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice + + +@register_microservice( + name="opea_service@local_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc768, +) +@opea_telemetry +def embedding(input: TextDoc) -> EmbedDoc768: + embed_vector = embeddings.embed_query(input.text) + res = EmbedDoc768(text=input.text, embedding=embed_vector) + return res + + +if __name__ == "__main__": + embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5") + opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/langchain/requirements.txt b/comps/embeddings/langchain/requirements.txt index eaa946aad..1bfe6f44c 100644 --- a/comps/embeddings/langchain/requirements.txt +++ b/comps/embeddings/langchain/requirements.txt @@ -2,7 +2,7 @@ docarray[full] fastapi huggingface_hub langchain -langsmith +langchain_huggingface opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/embeddings/llama_index/docker/Dockerfile b/comps/embeddings/llama_index/docker/Dockerfile index 914293db8..8d17b0dfa 100644 --- a/comps/embeddings/llama_index/docker/Dockerfile +++ b/comps/embeddings/llama_index/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -7,7 +6,6 @@ FROM ubuntu:22.04 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ libjemalloc-dev \ - vim \ python3 \ python3-pip @@ -27,4 +25,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/embeddings/llama_index ENTRYPOINT ["python3", "embedding_tei.py"] - diff --git a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml index 62f5870b7..152f5030b 100644 --- a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml +++ b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml @@ -5,7 +5,7 @@ version: "3.8" services: embedding: - image: opea/embedding-tei:latest + image: opea/embedding-tei-llama-index:latest container_name: embedding-tei-server ports: - "6000:6000" @@ -16,7 +16,6 @@ services: https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} restart: unless-stopped networks: diff --git a/comps/embeddings/llama_index/embedding_tei.py b/comps/embeddings/llama_index/embedding_tei.py index 4f3920d32..cf14f7790 100644 --- a/comps/embeddings/llama_index/embedding_tei.py +++ b/comps/embeddings/llama_index/embedding_tei.py @@ -3,10 +3,12 @@ import os -from langsmith import traceable from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference -from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("embedding_tei_llamaindex") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -18,10 +20,13 @@ input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: + if logflag: + logger.info(input) embed_vector = embeddings._get_query_embedding(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) + if logflag: + logger.info(res) return res @@ -29,5 +34,5 @@ def embedding(input: TextDoc) -> EmbedDoc: tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-large-en-v1.5") tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090") embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint) - print("TEI Gaudi Embedding initialized.") + logger.info("TEI Gaudi Embedding initialized.") opea_microservices["opea_service@embedding_tei_llamaindex"].start() diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py index bccec24ca..143d7bb07 100644 --- a/comps/embeddings/llama_index/local_embedding.py +++ b/comps/embeddings/llama_index/local_embedding.py @@ -1,10 +1,14 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langsmith import traceable -from llama_index.embeddings.huggingface import HuggingFaceEmbedding +import os -from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding + +from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("local_embedding") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -16,13 +20,16 @@ input_datatype=TextDoc, output_datatype=EmbedDoc, ) -@traceable(run_type="embedding") def embedding(input: TextDoc) -> EmbedDoc: + if logflag: + logger.info(input) embed_vector = embeddings.get_text_embedding(input.text) res = EmbedDoc(text=input.text, embedding=embed_vector) + if logflag: + logger.info(res) return res if __name__ == "__main__": - embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") + embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-large-en-v1.5") opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt index b1d2beba9..4f1457e4a 100644 --- a/comps/embeddings/llama_index/requirements.txt +++ b/comps/embeddings/llama_index/requirements.txt @@ -1,7 +1,7 @@ docarray[full] fastapi huggingface_hub -langsmith +llama-index-embeddings-huggingface-api llama-index-embeddings-text-embeddings-inference opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/embeddings/multimodal_embeddings/README.md b/comps/embeddings/multimodal_embeddings/README.md new file mode 100644 index 000000000..c2cf2b875 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/README.md @@ -0,0 +1,185 @@ +# Multimodal Embeddings Microservice + +The Multimodal Embedding Microservice is designed to efficiently convert pairs of textual string and image into vectorized embeddings, facilitating seamless integration into various machine learning and data processing workflows. This service utilizes advanced algorithms to generate high-quality embeddings that capture the joint semantic essence of the input text-and-image pairs, making it ideal for applications in multi-modal data processing, information retrieval, and similar fields. + +Key Features: + +**High Performance**: Optimized for quick and reliable conversion of textual data and image inputs into vector embeddings. + +**Scalability**: Built to handle high volumes of requests simultaneously, ensuring robust performance even under heavy loads. + +**Ease of Integration**: Provides a simple and intuitive API, allowing for straightforward integration into existing systems and workflows. + +**Customizable**: Supports configuration and customization to meet specific use case requirements, including different embedding models and preprocessing techniques. + +Users are albe to configure and build embedding-related services according to their actual needs. + +## ๐Ÿš€1. Start Microservice with Python (Option 1) + +Currently, we provide two ways to implement the multimodal embedding service: + +1. Build the multimodal embedding model **locally** from the server, which is faster, but takes up memory on the local server. +2. Build it based on the multimodal embedding inference endpoint (**MMEI endpoint**), which provides more flexibility, but may bring some network latency. + +For both of the implementations, you need to install requirements first. + +### 1.1 Install Requirements + +```bash +# run with langchain +pip install -r multimodal_langchain/requirements.txt +``` + +### 1.2 Start Embedding Service + +You can select one of the following to start the multimodal embedding service: + +**Start Multimodal Embedding Service with MMEI** + +First, you need to start a MMEI service. + +```bash +export your_mmei_port=8080 +export EMBEDDER_PORT=$your_mmei_port +``` + +Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) model for MMEI and provide two ways to start MMEI: + +1. Start MMEI on Gaudi2 HPU +2. Start MMEI on Xeon CPU (if Gaudi2 HPU is not available) + +- Gaudi2 HPU + +```bash +cd ../../.. +docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu . +cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ +docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d +``` + +- Xeon CPU + +```bash +cd ../../.. +docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile . +cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ +docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d +``` + +Then you need to test your MMEI service using the following commands: + +```bash +curl http://localhost:$your_mmei_port/v1/encode \ + -X POST \ + -H "Content-Type:application/json" \ + -d '{"text":"This is example"}' +``` + +Start the embedding service with MMEI_EMBEDDING_ENDPOINT. + +```bash +# run with langchain +cd multimodal_langchain +export MMEI_EMBEDDING_ENDPOINT="http://localhost:$your_mmei_port/v1/encode" +export your_embedding_port_microservice=6600 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +python mm_embedding_mmei.py +``` + +**Start Embedding Service with Local Model** + +```bash +# run with langchain +cd multimodal_langchain +export your_embedding_port_microservice=6600 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +python local_mm_embedding.py +``` + +## ๐Ÿš€2. Start Microservice with Docker (Option 2) + +### 2.1 Start Multimodal Embedding Inference (MMEI) Service + +First, you need to start a MMEI service. + +```bash +export your_mmei_port=8080 +export EMBEDDER_PORT=$your_mmei_port +``` + +Currently, we employ [**BridgeTower**](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) model for MMEI and provide two ways to start MMEI: + +1. Start MMEI on Gaudi2 HPU +2. Start MMEI on Xeon CPU (if Gaudi2 HPU is not available) + +- Gaudi2 HPU + +```bash +cd ../../.. +docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu . +cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ +docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d +``` + +- Xeon CPU + +```bash +cd ../../.. +docker build -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile . +cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ +docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d +``` + +Then you need to test your MMEI service using the following commands: + +```bash +curl http://localhost:$your_mmei_port/v1/encode \ + -X POST \ + -H "Content-Type:application/json" \ + -d '{"text":"This is example"}' +``` + +Export the `MMEI_EMBEDDING_ENDPOINT` for later usage: + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" +``` + +### 2.2 Build Docker Image + +#### Build Langchain Docker + +```bash +cd ../../.. +docker build -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile . +``` + +### 2.3 Run Docker with Docker Compose + +```bash +cd multimodal_langchain/docker +export your_embedding_port_microservice=6600 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +docker compose -f docker_compose_multimodal_embedding.yaml up -d +``` + +## ๐Ÿš€3. Consume Embedding Service + +### 2.2 Consume Embedding Service + +**Compute a joint embedding of an image-text pair** + +```bash +curl -X POST http://0.0.0.0:6600/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}' +``` + +**Compute an embedding of a text** + +```bash +curl -X POST http://0.0.0.0:6600/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"text" : "This is some sample text."}' +``` diff --git a/comps/embeddings/multimodal_embeddings/__init__.py b/comps/embeddings/multimodal_embeddings/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/__init__.py b/comps/embeddings/multimodal_embeddings/bridgetower/__init__.py new file mode 100644 index 000000000..e64366189 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/__init__.py @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from .bridgetower_embedding import BridgeTowerEmbedding +from .bridgetower_custom import BridgeTowerTextFeatureExtractor, BridgeTowerForITC diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_custom.py b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_custom.py new file mode 100644 index 000000000..0a89c3fa9 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_custom.py @@ -0,0 +1,243 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +from torch import nn +from torchvision import transforms +from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor +from transformers import BridgeTowerModel, BridgeTowerPreTrainedModel +from transformers.modeling_outputs import SequenceClassifierOutput +from transformers.models.bridgetower.modeling_bridgetower import ( + BridgeTowerContrastiveHead, + BridgeTowerTextModel, + BridgeTowerVisionModel, +) + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class BridgeTowerImageFeatureExtractor(nn.Module): + def __init__( + self, + patch_size=14, + width=1024, + resolution_after=294, + ckpt_path=None, + ): + super().__init__() + + self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False) + + scale = width**-0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter(scale * torch.randn((resolution_after // patch_size) ** 2 + 1, width)) + self.ln_pre = LayerNorm(width) + + if ckpt_path is not None: + sd = torch.load(ckpt_path) + if "state_dict" in sd: + sd = sd["state_dict"] + print(f"Loading feature extractor checkpoint from {ckpt_path}") + self.load_state_dict(sd) + + def forward(self, x: torch.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + t = self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device) + x = torch.cat([t, x], dim=1) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + x = self.ln_pre(x) + x = x.permute(1, 0, 2) # NLD -> LND + return x + + +class BridgeTowerITCHead(nn.Module): + def __init__(self, hidden_size, embed_size): + super().__init__() + self.fc = nn.Linear(hidden_size, embed_size) + + def forward(self, x): + x = self.fc(x) + return x + + +class _BridgeTowerTextModelWrapper(nn.Module): + def __init__(self, config): + super().__init__() + self.text_model = BridgeTowerTextModel(config) + + def forward(self, **kwargs): + return self.text_model(**kwargs) + + +class _BridgeTowerVisionModelWrapper(nn.Module): + def __init__(self, config): + super().__init__() + self.vision_model = BridgeTowerVisionModel(config.vision_config) + + if config.share_cross_modal_transformer_layers: + self.cross_modal_image_transform = nn.Linear(config.vision_config.hidden_size, config.hidden_size) + else: + self.cross_modal_image_transform = nn.ModuleList( + [ + nn.Linear(config.vision_config.hidden_size, config.hidden_size) + for _ in range(config.num_hidden_layers) + ] + ) + self.token_type_embeddings = nn.Embedding(2, config.hidden_size) + + def forward(self, **kwargs): + return self.vision_model(**kwargs) + + +class BridgeTowerVisionFeatureExtractor(BridgeTowerPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bridgetower = _BridgeTowerVisionModelWrapper(config) + self.itc_image_head = BridgeTowerContrastiveHead(config.hidden_size, config.contrastive_hidden_size) + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.FloatTensor] = None, + token_type_ids: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.LongTensor] = None, + ): + + outputs = self.bridgetower(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True) + final_hidden_cls = outputs.hidden_states[-1][:, 0, :] + + image_embeds_with_ln = self.bridgetower.vision_model.visual.forward_post(final_hidden_cls) + image_token_type_embeddings = self.bridgetower.token_type_embeddings( + torch.full((1,), 1, dtype=torch.long, device=self.bridgetower.token_type_embeddings.weight.device) + ).expand_as(image_embeds_with_ln) + + image_embeds = self.bridgetower.cross_modal_image_transform(image_embeds_with_ln) + image_token_type_embeddings + + final_hidden_cls = F.normalize(self.itc_image_head(image_embeds), dim=-1, p=2) + + return final_hidden_cls + + +class BridgeTowerTextFeatureExtractor(BridgeTowerPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bridgetower = _BridgeTowerTextModelWrapper(config.text_config) + self.itc_text_head = BridgeTowerITCHead(config.hidden_size, config.contrastive_hidden_size) + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.FloatTensor] = None, + token_type_ids: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.LongTensor] = None, + ): + + outputs = self.bridgetower(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True) + final_hidden_cls = outputs.hidden_states[-1][:, 0, :] + final_hidden_cls = F.normalize(self.itc_text_head(final_hidden_cls), dim=-1, p=2) + + return final_hidden_cls + + +class BridgeTowerForITC(BridgeTowerPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.bridgetower = BridgeTowerModel(config) + + self.itc_text_head = BridgeTowerITCHead(config.hidden_size, config.contrastive_hidden_size) + self.itc_image_head = BridgeTowerITCHead(config.hidden_size, config.contrastive_hidden_size) + self.itc_cross_modal_head = BridgeTowerITCHead(config.hidden_size * 2, config.contrastive_hidden_size) + + # Initialize weights and apply final processing + self.post_init() + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.FloatTensor] = None, + token_type_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + image_embeds: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[torch.LongTensor] = None, + ) -> Union[SequenceClassifierOutput, Tuple[torch.FloatTensor]]: + + assert output_hidden_states, "output_hidden_states should be set to True for BridgeTowerForITC" + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.bridgetower( + input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + pixel_values=pixel_values, + pixel_mask=pixel_mask, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + image_embeds=image_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + pooler_output = outputs.pooler_output if return_dict else outputs[2] + + hidden_states_txt, hidden_states_img, hidden_states_cross_modal = outputs.hidden_states + + final_hidden_txt = hidden_states_txt[-1] + final_hidden_img = hidden_states_img[-1] + + image_embeds_with_ln = self.bridgetower.vision_model.visual.forward_post(final_hidden_img) + image_token_type_embeddings = self.bridgetower.token_type_embeddings( + torch.full((1,), 1, dtype=torch.long, device=self.bridgetower.token_type_embeddings.weight.device) + ).expand_as(image_embeds_with_ln) + + final_hidden_img = ( + self.bridgetower.cross_modal_image_transform(image_embeds_with_ln) + image_token_type_embeddings + ) + + final_hidden_txt = F.normalize(self.itc_text_head(final_hidden_txt[:, 0, :]), dim=-1, p=2) + final_hidden_img = F.normalize(self.itc_image_head(final_hidden_img[:, 0, :]), dim=-1, p=2) + final_hidden_cross = F.normalize(self.itc_cross_modal_head(pooler_output), dim=-1, p=2) + + logits = torch.stack([final_hidden_txt, final_hidden_img, final_hidden_cross], dim=-2) + + if not return_dict: + return tuple(logits) + + return SequenceClassifierOutput( + loss=None, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_embedding.py b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_embedding.py new file mode 100644 index 000000000..f61d8e1c3 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_embedding.py @@ -0,0 +1,122 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, List + +import torch +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra +from PIL import Image +from transformers import BridgeTowerProcessor + +from .bridgetower_custom import BridgeTowerForITC, BridgeTowerTextFeatureExtractor + + +class BridgeTowerEmbedding(BaseModel, Embeddings): + """BridgeTower embedding model.""" + + model_name: str = "BridgeTower/bridgetower-large-itm-mlm-itc" + device: str = "cpu" + TEXT_MODEL: Any + PROCESSOR: Any + MODEL: Any + + def __init__(self, **kwargs: Any): + """Initialize the BridgeTowerEmbedding class.""" + super().__init__(**kwargs) + + if "device" in kwargs: + if kwargs["device"] == "hpu": + try: + import habana_frameworks.torch.core as htcore + + self.device = torch.device("hpu") + except ImportError: + self.device = "cpu" + elif kwargs["device"] == "gpu": + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + + self.TEXT_MODEL = BridgeTowerTextFeatureExtractor.from_pretrained(self.model_name).to(self.device) + self.PROCESSOR = BridgeTowerProcessor.from_pretrained(self.model_name) + self.MODEL = BridgeTowerForITC.from_pretrained(self.model_name).to(self.device) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed a list of documents using BridgeTower. + + Args: + texts: The list of texts to embed. + Returns: + List of embeddings, one for each text. + """ + encodings = self.PROCESSOR.tokenizer(texts, return_tensors="pt").to(self.device) + with torch.no_grad(): + outputs = self.TEXT_MODEL(**encodings) + embeddings = outputs.cpu().numpy().tolist() + return embeddings + + def embed_query(self, text: str) -> List[float]: + """Embed a query using BridgeTower. + + Args: + text: The text to embed. + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] + + def embed_image_text_pairs(self, texts: List[str], images: list[Image], batch_size=2) -> List[List[float]]: # type: ignore + """Embed a list of image-text pairs using BridgeTower. + + Args: + texts: The list of texts to embed. + images: The list of path-to-images to embed + batch_size: the batch size to process, default to 2 + Returns: + List of embeddings, one for each image-text pairs. + """ + + # the length of texts must be equal to the length of images + assert len(texts) == len(images), "the number of captions should be equal to the number of images" + + image_list = [] + text_list = [] + embeddings = [] + for pil_img, text in zip(images, texts): + # print(path_to_img) + # img = read_image(path_to_img, mode=ImageReadMode.RGB) + # img = transform.to_pil_image(img) + + img = pil_img.convert("RGB") + image_list.append(img) + text_list.append(text) + if len(text_list) == batch_size: + batch = self.PROCESSOR( + image_list, text_list, return_tensors="pt", max_length=200, padding="max_length", truncation=True + ).to(self.device) + with torch.no_grad(): + batch_embeddings = self.MODEL(**batch, output_hidden_states=True) + + for i in range(len(text_list)): + embeddings.append(batch_embeddings.logits[i, 2, :].detach().cpu().numpy().tolist()) + image_list = [] + text_list = [] + # embedding the remaining + if len(text_list) > 0: + batch = self.PROCESSOR( + image_list, text_list, return_tensors="pt", max_length=100, padding="max_length", truncation=True + ).to(self.device) + with torch.no_grad(): + batch_embeddings = self.MODEL(**batch, output_hidden_states=True) + for i in range(len(text_list)): + embeddings.append(batch_embeddings.logits[i, 2, :].detach().cpu().numpy().tolist()) + image_list = [] + text_list = [] + return embeddings diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_server.py b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_server.py new file mode 100644 index 000000000..62e70c74f --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/bridgetower_server.py @@ -0,0 +1,153 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import asyncio +import base64 +import os +import uuid +from functools import partial +from io import BytesIO +from typing import List + +import PIL +import PIL.Image +import requests +import uvicorn +from fastapi import BackgroundTasks, FastAPI, Request +from fastapi.responses import JSONResponse, Response +from utils import build_logger + +from comps.embeddings.multimodal_embeddings.bridgetower import BridgeTowerEmbedding + +worker_id = str(uuid.uuid4())[:6] +print(f"worker_id: {worker_id}") +logger = build_logger("embedding_worker", f"bridgetower_embedding_worker_{worker_id}.log") +model_semaphore = None +global_counter = 0 + +model_name_or_path = None +model_dtype = None +use_hpu_graphs = True + + +app = FastAPI() + + +def release_model_semaphore(fn=None): + model_semaphore.release() + if fn is not None: + fn() + + +def get_queue_length(): + if model_semaphore is None: + return 0 + else: + return ( + args.limit_model_concurrency + - model_semaphore._value + + (len(model_semaphore._waiters) if model_semaphore._waiters is not None else 0) + ) + + +def get_status(): + return { + "model_names": [model_name_or_path], + "speed": 1, + "queue_length": get_queue_length(), + "global_counter": global_counter, + } + + +@app.get("/v1/health_check") +async def health() -> Response: + """Health check.""" + return Response(status_code=200, content=b'{"message" : "BridgeTower server is running..."}') + + +@app.post("/v1/encode") +async def encode(request: Request) -> Response: + global model_semaphore, global_counter + global_counter += 1 + + request_dict = await request.json() + if model_semaphore is None: + model_semaphore = asyncio.Semaphore(args.limit_model_concurrency) + await model_semaphore.acquire() + + text = request_dict.pop("text") + image = None + if "img_b64_str" in request_dict.keys(): + img_b64_str = request_dict.pop("img_b64_str") + image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str))) + if image is None: + # embed text only + embeddings = embedder.embed_documents([text])[0] + else: + # embed image and text pair + embeddings = embedder.embed_image_text_pairs([text], [image], batch_size=1)[0] + + background_tasks = BackgroundTasks() + background_tasks.add_task(partial(release_model_semaphore)) + return JSONResponse( + status_code=200, + content={ + "embedding": embeddings, + }, + background=background_tasks, + ) + + +@app.post("/v1/worker_get_status") +async def get_woker_status(): + return get_status() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--model_name_or_path", type=str, default="BridgeTower/bridgetower-large-itm-mlm-itc") + parser.add_argument("--warmup", type=int, default=1, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--device", type=str, default="cpu") + parser.add_argument("--limit-model-concurrency", type=int, default=5) + + args = parser.parse_args() + # get port from env variable if exist + args.port = int(os.getenv("PORT", 8080)) + + print(f"device: {args.device}") + logger.info(f"args: {args}") + + if args.device == "hpu": + try: + import habana_frameworks.torch.core as htcore + except ImportError: + print("device: hpu is not available. Using cpu instead!") + args.device = "cpu" + + model_name_or_path = args.model_name_or_path + + embedder = BridgeTowerEmbedding(device=args.device) + + # warmup + print("Warmup...") + image_paths = ["https://llava-vl.github.io/static/images/view.jpg"] + example_prompts = ["This is test image!"] + images = [] + for image_path in image_paths: + images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)) + for i in range(args.warmup): + embedder.embed_image_text_pairs( + example_prompts, + images, + batch_size=1, + ) + print("Done warmup...") + + uvicorn.run( + app, + host=args.host, + port=args.port, + log_level="debug", + ) diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile b/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile new file mode 100644 index 000000000..83cd41ae1 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.10-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana + +COPY --chown=user comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +ARG EMBEDDER_PORT=8080 +ENV PORT=$EMBEDDER_PORT + +WORKDIR /home/user/comps/embeddings/multimodal_embeddings/bridgetower + +ENTRYPOINT ["python", "bridgetower_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu b/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu new file mode 100644 index 000000000..e571ab253 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +RUN rm -rf /etc/ssh/ssh_host* +USER user +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana + +COPY --chown=user comps /home/user/comps + +# Install requirements and optimum habana +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt && \ + pip install optimum[habana] + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +ARG EMBEDDER_PORT=8080 +ENV PORT=$EMBEDDER_PORT + +WORKDIR /home/user/comps/embeddings/multimodal_embeddings/bridgetower +ENTRYPOINT ["python", "bridgetower_server.py", "--device", "hpu"] \ No newline at end of file diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/docker/docker_compose_bridgetower_embedding_endpoint.yaml b/comps/embeddings/multimodal_embeddings/bridgetower/docker/docker_compose_bridgetower_embedding_endpoint.yaml new file mode 100644 index 000000000..9767490d0 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/docker/docker_compose_bridgetower_embedding_endpoint.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + bridgetower: + image: opea/bridgetower-embedder:latest + container_name: bridgetower-embedding-server + ports: + - ${EMBEDDER_PORT}:${EMBEDDER_PORT} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/multimodal_embeddings/bridgetower/utils.py b/comps/embeddings/multimodal_embeddings/bridgetower/utils.py new file mode 100644 index 000000000..673d54dbc --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/bridgetower/utils.py @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import sys + +handler = None +save_log = True +LOGDIR = "." + + +def build_logger(logger_name, logger_filename): + global handler + + formatter = logging.Formatter( + fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + # Set the format of root handlers + if not logging.getLogger().handlers: + logging.basicConfig(level=logging.INFO) + logging.getLogger().handlers[0].setFormatter(formatter) + + # Redirect stdout and stderr to loggers + stdout_logger = logging.getLogger("stdout") + stdout_logger.setLevel(logging.INFO) + sl = StreamToLogger(stdout_logger, logging.INFO) + sys.stdout = sl + + stderr_logger = logging.getLogger("stderr") + stderr_logger.setLevel(logging.ERROR) + sl = StreamToLogger(stderr_logger, logging.ERROR) + sys.stderr = sl + + # Get logger + logger = logging.getLogger(logger_name) + logger.setLevel(logging.INFO) + + # Add a file handler for all loggers + if handler is None and save_log: + os.makedirs(LOGDIR, exist_ok=True) + filename = os.path.join(LOGDIR, logger_filename) + handler = logging.handlers.TimedRotatingFileHandler(filename, when="D", utc=True, encoding="UTF-8") + handler.setFormatter(formatter) + + for name, item in logging.root.manager.loggerDict.items(): + if isinstance(item, logging.Logger): + item.addHandler(handler) + + return logger + + +class StreamToLogger(object): + """Fake file-like stream object that redirects writes to a logger instance.""" + + def __init__(self, logger, log_level=logging.INFO): + self.terminal = sys.stdout + self.logger = logger + self.log_level = log_level + self.linebuf = "" + + def __getattr__(self, attr): + return getattr(self.terminal, attr) + + def write(self, buf): + temp_linebuf = self.linebuf + buf + self.linebuf = "" + for line in temp_linebuf.splitlines(True): + # From the io.TextIOWrapper docs: + # On output, if newline is None, any '\n' characters written + # are translated to the system default line separator. + # By default sys.stdout.write() expects '\n' newlines and then + # translates them so this is still cross platform. + if line[-1] == "\n": + self.logger.log(self.log_level, line.rstrip()) + else: + self.linebuf += line + + def flush(self): + if self.linebuf != "": + self.logger.log(self.log_level, self.linebuf.rstrip()) + self.linebuf = "" + + +def pretty_print_semaphore(semaphore): + if semaphore is None: + return "None" + return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})" diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/__init__.py b/comps/embeddings/multimodal_embeddings/multimodal_langchain/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile b/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile new file mode 100644 index 000000000..97d5906ec --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile @@ -0,0 +1,29 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt + +# RUN pip install --upgrade pydantic + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/embeddings/multimodal_embeddings/multimodal_langchain + +ENTRYPOINT ["python", "mm_embedding_mmei.py"] diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/docker_compose_multimodal_embedding.yaml b/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/docker_compose_multimodal_embedding.yaml new file mode 100644 index 000000000..314233f93 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/docker_compose_multimodal_embedding.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + embedding: + image: opea/embedding-multimodal:latest + container_name: embedding-multimodal-server + ports: + - ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE} + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT} + MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/local_mm_embedding.py b/comps/embeddings/multimodal_embeddings/multimodal_langchain/local_mm_embedding.py new file mode 100644 index 000000000..7327284a8 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/local_mm_embedding.py @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from comps import ( + CustomLogger, + EmbedDoc, + EmbedMultimodalDoc, + MultimodalDoc, + ServiceType, + TextDoc, + TextImageDoc, + opea_microservices, + register_microservice, +) +from comps.embeddings.multimodal_embeddings.bridgetower import BridgeTowerEmbedding + +logger = CustomLogger("local_multimodal_embedding") +logflag = os.getenv("LOGFLAG", False) + +port = int(os.getenv("MM_EMBEDDING_PORT_MICROSERVICE", 6600)) + + +@register_microservice( + name="opea_service@local_multimodal_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=port, + input_datatype=MultimodalDoc, + output_datatype=EmbedMultimodalDoc, +) +def embedding(input: MultimodalDoc) -> EmbedDoc: + if logflag: + logger.info(input) + + if isinstance(input, TextDoc): + # Handle text input + embed_vector = embeddings.embed_query(input.text) + res = EmbedDoc(text=input.text, embedding=embed_vector) + + elif isinstance(input, TextImageDoc): + # Handle text + image input + pil_image = input.image.url.load_pil() + embed_vector = embeddings.embed_image_text_pairs([input.text.text], [pil_image], batch_size=1)[0] + res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) + else: + raise ValueError("Invalid input type") + + if logflag: + logger.info(res) + return res + + +if __name__ == "__main__": + embeddings = BridgeTowerEmbedding() + opea_microservices["opea_service@local_multimodal_embedding"].start() diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/mm_embedding_mmei.py b/comps/embeddings/multimodal_embeddings/multimodal_langchain/mm_embedding_mmei.py new file mode 100644 index 000000000..fbd972a20 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/mm_embedding_mmei.py @@ -0,0 +1,84 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import os +import time + +import requests +from fastapi.responses import JSONResponse + +from comps import ( + CustomLogger, + EmbedDoc, + EmbedMultimodalDoc, + MultimodalDoc, + ServiceType, + TextDoc, + TextImageDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("multimodal_embedding_mmei_langchain") +logflag = os.getenv("LOGFLAG", False) +port = int(os.getenv("MM_EMBEDDING_PORT_MICROSERVICE", 6600)) +headers = {"Content-Type": "application/json"} + + +@register_microservice( + name="opea_service@multimodal_embedding_mmei_langchain", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=port, + input_datatype=MultimodalDoc, + output_datatype=EmbedMultimodalDoc, +) +@register_statistics(names=["opea_service@multimodal_embedding_mmei_langchain"]) +def embedding(input: MultimodalDoc) -> EmbedDoc: + start = time.time() + if logflag: + logger.info(input) + + json = {} + if isinstance(input, TextDoc): + json["text"] = input.text + elif isinstance(input, TextImageDoc): + json["text"] = input.text.text + img_bytes = input.image.url.load_bytes() + base64_img = base64.b64encode(img_bytes).decode("utf-8") + json["img_b64_str"] = base64_img + else: + return JSONResponse(status_code=400, content={"message": "Bad request!"}) + + # call multimodal embedding endpoint + try: + response = requests.post(mmei_embedding_endpoint, headers=headers, json=json) + if response.status_code != 200: + return JSONResponse(status_code=503, content={"message": "Multimodal embedding endpoint failed!"}) + + response_json = response.json() + embed_vector = response_json["embedding"] + if isinstance(input, TextDoc): + res = EmbedDoc(text=input.text, embedding=embed_vector) + elif isinstance(input, TextImageDoc): + res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector) + except requests.exceptions.ConnectionError: + res = JSONResponse(status_code=503, content={"message": "Multimodal embedding endpoint not started!"}) + statistics_dict["opea_service@multimodal_embedding_mmei_langchain"].append_latency(time.time() - start, None) + if logflag: + logger.info(res) + return res + + +if __name__ == "__main__": + url_endpoint = os.getenv("MMEI_EMBEDDING_HOST_ENDPOINT", "http://0.0.0.0") + port_endpoint = os.getenv("MMEI_EMBEDDING_PORT_ENDPOINT", "8080") + path_endpoint = os.getenv("MMEI_EMBEDDING_PATH_ENDPOINT", "/v1/encode") + + mmei_embedding_endpoint = os.getenv("MMEI_EMBEDDING_ENDPOINT", f"{url_endpoint}:{port_endpoint}{path_endpoint}") + logger.info(f"MMEI Gaudi Embedding initialized at {mmei_embedding_endpoint}") + opea_microservices["opea_service@multimodal_embedding_mmei_langchain"].start() diff --git a/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt b/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt new file mode 100644 index 000000000..cc9d77a43 --- /dev/null +++ b/comps/embeddings/multimodal_embeddings/multimodal_langchain/requirements.txt @@ -0,0 +1,14 @@ +docarray[full] +fastapi +huggingface_hub +langchain +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pydantic==2.8.2 +shortuuid +torch +torchvision +transformers +uvicorn diff --git a/comps/embeddings/neural-speed/README.md b/comps/embeddings/neural-speed/README.md new file mode 100644 index 000000000..d2d1fff72 --- /dev/null +++ b/comps/embeddings/neural-speed/README.md @@ -0,0 +1,35 @@ +# build Mosec endpoint docker image + +``` +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:neuralspeed -f comps/embeddings/neural-speed/neuralspeed-docker/Dockerfile . +``` + +# build embedding microservice docker image + +``` +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec:neuralspeed -f comps/embeddings/neural-speed/docker/Dockerfile . +``` + +Note: Please contact us to request model files before building images. + +# launch Mosec endpoint docker container + +``` +docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000 langchain-mosec:neuralspeed +``` + +# launch embedding microservice docker container + +``` +export MOSEC_EMBEDDING_ENDPOINT=http://{mosec_embedding_host_ip}:6001 +docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:neuralspeed +``` + +# run client test + +``` +curl localhost:6000/v1/embeddings \ + -X POST \ + -d '{"text":"Hello, world!"}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/embeddings/neural-speed/__init__.py b/comps/embeddings/neural-speed/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/neural-speed/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/neural-speed/docker/Dockerfile b/comps/embeddings/neural-speed/docker/Dockerfile new file mode 100644 index 000000000..3b495ad54 --- /dev/null +++ b/comps/embeddings/neural-speed/docker/Dockerfile @@ -0,0 +1,30 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/neural-speed/requirements.txt + +RUN pip3 install llmspec mosec msgspec httpx requests + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/embeddings/neural-speed + +ENTRYPOINT ["python", "embedding_neuralspeed_svc.py"] + diff --git a/comps/embeddings/neural-speed/docker/docker_compose_embedding.yaml b/comps/embeddings/neural-speed/docker/docker_compose_embedding.yaml new file mode 100644 index 000000000..72535a309 --- /dev/null +++ b/comps/embeddings/neural-speed/docker/docker_compose_embedding.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + embedding: + image: opea/embedding-langchain-mosec:neuralspeed + container_name: embedding-langchain-mosec-server + ports: + - "6000:6000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MOSEC_EMBEDDING_ENDPOINT: ${MOSEC_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/neural-speed/embedding_neuralspeed_svc.py b/comps/embeddings/neural-speed/embedding_neuralspeed_svc.py new file mode 100644 index 000000000..ca2d27d5f --- /dev/null +++ b/comps/embeddings/neural-speed/embedding_neuralspeed_svc.py @@ -0,0 +1,83 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import List, Optional + +import httpx +import msgspec +import requests +from langchain_community.embeddings import OpenAIEmbeddings +from langsmith import traceable + +from comps import ( + EmbedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + + +class MosecEmbeddings(OpenAIEmbeddings): + + def _get_len_safe_embeddings( + self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None + ) -> List[List[float]]: + _chunk_size = chunk_size or self.chunk_size + batched_embeddings: List[List[float]] = [] + response = self.client.create(input=texts, **self._invocation_params) + if not isinstance(response, dict): + response = response.model_dump() + batched_embeddings.extend(r["embedding"] for r in response["data"]) + + _cached_empty_embedding: Optional[List[float]] = None + + def empty_embedding() -> List[float]: + nonlocal _cached_empty_embedding + if _cached_empty_embedding is None: + average_embedded = self.client.create(input="", **self._invocation_params) + if not isinstance(average_embedded, dict): + average_embedded = average_embedded.model_dump() + _cached_empty_embedding = average_embedded["data"][0]["embedding"] + return _cached_empty_embedding + + return [e if e is not None else empty_embedding() for e in batched_embeddings] + + +@register_microservice( + name="opea_service@embedding_mosec", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc, +) +@traceable(run_type="embedding") +@register_statistics(names=["opea_service@embedding_mosec"]) +def embedding(input: TextDoc) -> EmbedDoc: + start = time.time() + req = { + "query": input.text, + } + request_url = MOSEC_EMBEDDING_ENDPOINT + "/inference" + resp = requests.post(request_url, data=msgspec.msgpack.encode(req)) + + embed_vector = msgspec.msgpack.decode(resp.content)["embeddings"] + res = EmbedDoc(text=req["query"][0], embedding=embed_vector) + statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None) + return res + + +if __name__ == "__main__": + MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:6001") + os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT + os.environ["OPENAI_API_KEY"] = "Dummy key" + MODEL_ID = os.environ.get("MODEL_ID", "BAAI/bge-base-en-v1.5") + embeddings = MosecEmbeddings(model=MODEL_ID) + print("NeuralSpeed Embedding Microservice Initialized.") + opea_microservices["opea_service@embedding_mosec"].start() diff --git a/comps/embeddings/neural-speed/neuralspeed-docker/Dockerfile b/comps/embeddings/neural-speed/neuralspeed-docker/Dockerfile new file mode 100644 index 000000000..13fbeec12 --- /dev/null +++ b/comps/embeddings/neural-speed/neuralspeed-docker/Dockerfile @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +From ubuntu:22.04 +ARG DEBIAN_FRONTEND=noninteractive + +ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive + +COPY comps /root/comps +COPY neural_speed-0.1.dev117+gafc0030.d20240815-cp310-cp310-linux_x86_64.whl /root/ +COPY bge-base-q8.bin /root/ + +RUN apt update && apt install -y python3 python3-pip +RUN pip3 install -r /root/comps/embeddings/neural-speed/neuralspeed-docker/requirements.txt +RUN pip3 install llmspec mosec msgspec httpx requests +RUN pip3 install /root/neural_speed-0.1.dev117+gafc0030.d20240815-cp310-cp310-linux_x86_64.whl + +RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-base-en-v1.5 --local-dir /root/bge-base-en-v1.5 + + +ENV LD_PRELOAD=/root/libstdc++.so.6 + + +WORKDIR /root/comps/embeddings/neural-speed/neuralspeed-docker + +CMD ["python3", "server.py"] diff --git a/comps/embeddings/neural-speed/neuralspeed-docker/client.py b/comps/embeddings/neural-speed/neuralspeed-docker/client.py new file mode 100644 index 000000000..cd718ca5e --- /dev/null +++ b/comps/embeddings/neural-speed/neuralspeed-docker/client.py @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from http import HTTPStatus + +import httpx +import msgspec +import requests + +input_text = "what a nice day" +req = { + "query": input_text, +} + +httpx_response = httpx.post("http://127.0.0.1:6001/inference", content=msgspec.msgpack.encode(req)) + +requests_response = requests.post("http://127.0.0.1:6001/inference", data=msgspec.msgpack.encode(req)) + +MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:6001") + +request_url = MOSEC_EMBEDDING_ENDPOINT + "/inference" +print(f"request_url = {request_url}") +resp_3 = requests.post(request_url, data=msgspec.msgpack.encode(req)) + +if httpx_response.status_code == HTTPStatus.OK and requests_response.status_code == HTTPStatus.OK: + print(f"OK: \n {msgspec.msgpack.decode(httpx_response.content)}") + print(f"OK: \n {msgspec.msgpack.decode(requests_response.content)}") + print(f"OK: \n {msgspec.msgpack.decode(resp_3.content)}") +else: + print(f"err[{httpx_response.status_code}] {httpx_response.text}") diff --git a/comps/embeddings/neural-speed/neuralspeed-docker/client_multibatch.py b/comps/embeddings/neural-speed/neuralspeed-docker/client_multibatch.py new file mode 100644 index 000000000..ed49b6322 --- /dev/null +++ b/comps/embeddings/neural-speed/neuralspeed-docker/client_multibatch.py @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from http import HTTPStatus +from threading import Thread + +import httpx +import msgspec + +req = { + "query": "Return the โ€˜thread identifierโ€™ of the current thread. This is a nonzero integer. Its value has no direct meaning; it is intended as a magic cookie to be used e.g. to index a dictionary of thread-specific data. Thread identifiers may be recycled when a thread exits and another thread is created.", +} +reqs = [] +BATCH = 32 +for i in range(BATCH): + reqs.append(msgspec.msgpack.encode(req)) + + +def post_func(threadIdx): + resp = httpx.post("http://127.0.0.1:6001/inference", content=reqs[threadIdx]) + ret = f"thread {threadIdx} \n" + if resp.status_code == HTTPStatus.OK: + ret += f"OK: {msgspec.msgpack.decode(resp.content)['embeddings'][:16]}" + else: + ret += f"err[{resp.status_code}] {resp.text}" + print(ret) + + +threads = [] +for i in range(BATCH): + t = Thread( + target=post_func, + args=[ + i, + ], + ) + threads.append(t) + +for i in range(BATCH): + threads[i].start() diff --git a/comps/embeddings/neural-speed/neuralspeed-docker/requirements.txt b/comps/embeddings/neural-speed/neuralspeed-docker/requirements.txt new file mode 100644 index 000000000..50dc540fc --- /dev/null +++ b/comps/embeddings/neural-speed/neuralspeed-docker/requirements.txt @@ -0,0 +1,16 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +accelerate +cmake +datasets +huggingface_hub +matplotlib +numpy +peft +protobuf<3.20 +py-cpuinfo +sentencepiece +tiktoken +torch +transformers +transformers_stream_generator +zipfile38 diff --git a/comps/embeddings/neural-speed/neuralspeed-docker/server.py b/comps/embeddings/neural-speed/neuralspeed-docker/server.py new file mode 100644 index 000000000..b47259968 --- /dev/null +++ b/comps/embeddings/neural-speed/neuralspeed-docker/server.py @@ -0,0 +1,81 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import time +from typing import Any, List + +import numpy +from mosec import Server, Worker, get_logger +from mosec.mixin import TypedMsgPackMixin +from msgspec import Struct +from neural_speed import Model +from transformers import AutoTokenizer + +logger = get_logger() + +INFERENCE_BATCH_SIZE = 32 +INFERENCE_MAX_WAIT_TIME = 30 +INFERENCE_WORKER_NUM = 1 +INFERENCE_CONTEXT = 512 + +TorchModel = "/root/bge-base-en-v1.5" +NS_Bin = "/root/bge-base-q8.bin" + +NS_Model = "bert" + + +class Request(Struct, kw_only=True): + query: str + + +class Response(Struct, kw_only=True): + embeddings: List[float] + + +class Inference(TypedMsgPackMixin, Worker): + + def __init__(self): + super().__init__() + self.tokenizer = AutoTokenizer.from_pretrained(TorchModel) + self.model = Model() + self.model.init_from_bin( + NS_Model, + NS_Bin, + batch_size=INFERENCE_BATCH_SIZE, + n_ctx=INFERENCE_CONTEXT + 2, + ) + + def forward(self, data: List[Request]) -> List[Response]: + batch = len(data) + sequences = [d.query for d in data] + inputs = self.tokenizer( + sequences, + padding=True, + truncation=True, + max_length=INFERENCE_CONTEXT, + return_tensors="pt", + ) + st = time.time() + ns_outputs = self.model( + **inputs, + reinit=True, + logits_all=True, + continuous_batching=False, + ignore_padding=True, + ) + logger.info(f"batch {batch} input shape {inputs.input_ids.shape} time {time.time()-st}") + ns_outputs = ns_outputs[:, 0] + ns_outputs = ns_outputs / numpy.linalg.norm(ns_outputs, axis=1, keepdims=True) + resps = [] + for i in range(batch): + resp = Response(embeddings=ns_outputs[i].tolist()) + resps.append(resp) + return resps + + +if __name__ == "__main__": + server = Server() + server.append_worker( + Inference, max_batch_size=INFERENCE_BATCH_SIZE, max_wait_time=INFERENCE_MAX_WAIT_TIME, num=INFERENCE_WORKER_NUM + ) + server.run() diff --git a/comps/embeddings/neural-speed/requirements.txt b/comps/embeddings/neural-speed/requirements.txt new file mode 100644 index 000000000..9fa1a059c --- /dev/null +++ b/comps/embeddings/neural-speed/requirements.txt @@ -0,0 +1,11 @@ +docarray[full] +fastapi +langchain +langchain_community +openai +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +shortuuid +uvicorn diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md new file mode 100644 index 000000000..44ee3d10c --- /dev/null +++ b/comps/finetuning/README.md @@ -0,0 +1,121 @@ +# LLM Fine-tuning Microservice + +LLM Fine-tuning microservice involves adapting a base model to a specific task or dataset to improve its performance on that task. + +# ๐Ÿš€1. Start Microservice with Python (Optional 1) + +## 1.1 Install Requirements + +```bash +python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +python -m pip install intel-extension-for-pytorch +python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ +pip install -r requirements.txt +``` + +## 1.2 Start Finetuning Service with Python Script + +### 1.2.1 Start Ray Cluster + +OneCCL and Intel MPI libraries should be dynamically linked in every node before Ray starts: + +```bash +source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh +``` + +Start Ray locally using the following command. + +```bash +ray start --head +``` + +For a multi-node cluster, start additional Ray worker nodes with below command. + +```bash +ray start --address='${head_node_ip}:6379' +``` + +### 1.2.2 Start Finetuning Service + +```bash +export HF_TOKEN=${your_huggingface_token} +python finetuning_service.py +``` + +# ๐Ÿš€2. Start Microservice with Docker (Optional 2) + +## 2.1 Setup on CPU + +### 2.1.1 Build Docker Image + +Build docker image with below command: + +```bash +export HF_TOKEN=${your_huggingface_token} +cd ../../ +docker build -t opea/finetuning:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg HF_TOKEN=$HF_TOKEN -f comps/finetuning/docker/Dockerfile_cpu . +``` + +### 2.1.2 Run Docker with CLI + +Start docker container with below command: + +```bash +docker run -d --name="finetuning-server" -p 8015:8015 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/finetuning:latest +``` + +## 2.2 Setup on Gaudi2 + +### 2.2.1 Build Docker Image + +Build docker image with below command: + +```bash +cd ../../ +docker build -t opea/finetuning-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/finetuning/docker/Dockerfile_hpu . +``` + +### 2.2.2 Run Docker with CLI + +Start docker container with below command: + +```bash +export HF_TOKEN=${your_huggingface_token} +docker run --runtime=habana -e HABANA_VISIBLE_DEVICES=all -p 8015:8015 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy -e HF_TOKEN=$HF_TOKEN opea/finetuning-gaudi:latest +``` + +# ๐Ÿš€3. Consume Finetuning Service + +## 3.1 Create fine-tuning job + +Assuming a training file `alpaca_data.json` is uploaded, it can be downloaded in [here](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json), the following script launches a finetuning job using `meta-llama/Llama-2-7b-chat-hf` as base model: + +```bash +# upload a training file +curl http://${your_ip}:8015/v1/finetune/upload_training_files -X POST -H "Content-Type: multipart/form-data" -F "files=@./alpaca_data.json" + +# create a finetuning job +curl http://${your_ip}:8015/v1/fine_tuning/jobs \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "training_file": "alpaca_data.json", + "model": "meta-llama/Llama-2-7b-chat-hf" + }' + +# list finetuning jobs +curl http://${your_ip}:8015/v1/fine_tuning/jobs -X GET + +# retrieve one finetuning job +curl http://localhost:8015/v1/fine_tuning/jobs/retrieve -X POST -H "Content-Type: application/json" -d '{ + "fine_tuning_job_id": ${fine_tuning_job_id}}' + +# cancel one finetuning job + +curl http://localhost:8015/v1/fine_tuning/jobs/cancel -X POST -H "Content-Type: application/json" -d '{ + "fine_tuning_job_id": ${fine_tuning_job_id}}' + +# list checkpoints of a finetuning job +curl http://${your_ip}:8015/v1/finetune/list_checkpoints -X POST -H "Content-Type: application/json" -d '{"fine_tuning_job_id": ${fine_tuning_job_id}}' + +``` diff --git a/comps/finetuning/docker/Dockerfile_cpu b/comps/finetuning/docker/Dockerfile_cpu new file mode 100644 index 000000000..1cb391af8 --- /dev/null +++ b/comps/finetuning/docker/Dockerfile_cpu @@ -0,0 +1,38 @@ +# Use the same python version with ray +FROM python:3.10.14 + +ARG HF_TOKEN + +ENV HF_TOKEN=$HF_TOKEN + +RUN apt-get update -y && apt-get install -y vim htop net-tools dnsutils + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/finetuning + +USER user + +ENV PATH=$PATH:/home/user/.local/bin + +RUN pip install --no-cache-dir --upgrade pip && \ + python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ + python -m pip install intel-extension-for-pytorch && \ + python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \ + pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/finetuning + +RUN echo PKGPATH=$(python3 -c "import pkg_resources; print(pkg_resources.get_distribution('oneccl-bind-pt').location)") >> run.sh && \ + echo 'export LD_LIBRARY_PATH=$PKGPATH/oneccl_bindings_for_pytorch/opt/mpi/lib/:$LD_LIBRARY_PATH' >> run.sh && \ + echo 'source $PKGPATH/oneccl_bindings_for_pytorch/env/setvars.sh' >> run.sh && \ + echo ray start --head >> run.sh && \ + echo python finetuning_service.py >> run.sh + +CMD bash run.sh \ No newline at end of file diff --git a/comps/finetuning/docker/Dockerfile_hpu b/comps/finetuning/docker/Dockerfile_hpu new file mode 100644 index 000000000..1277d76c1 --- /dev/null +++ b/comps/finetuning/docker/Dockerfile_hpu @@ -0,0 +1,31 @@ +# Use the same python version with ray +FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest + +ENV DEVICE="hpu" + +RUN apt-get update -y && apt-get install -y vim htop net-tools dnsutils + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +RUN chown -R user /home/user/comps/finetuning + +USER user + +ENV PATH=$PATH:/home/user/.local/bin + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/finetuning/requirements.txt && \ + pip install --no-cache-dir optimum-habana + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/finetuning + +ENTRYPOINT ["/bin/bash", "launch.sh"] + +# CMD ["/bin/bash"] + diff --git a/comps/finetuning/finetune_runner.py b/comps/finetuning/finetune_runner.py new file mode 100644 index 000000000..1ddfc4642 --- /dev/null +++ b/comps/finetuning/finetune_runner.py @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +from pydantic_yaml import parse_yaml_raw_as +from transformers import TrainerCallback, TrainerControl, TrainerState, TrainingArguments + +from comps.finetuning.llm_on_ray.finetune.finetune_config import FinetuneConfig + + +class FineTuneCallback(TrainerCallback): + def __init__(self) -> None: + super().__init__() + + def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): + print("FineTuneCallback:", args, state) + + +def main(): + parser = argparse.ArgumentParser(description="Runner for llm_on_ray-finetune") + parser.add_argument("--config_file", type=str, required=True, default=None) + args = parser.parse_args() + model_config_file = args.config_file + + with open(model_config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f).model_dump() + + callback = FineTuneCallback() + finetune_config["Training"]["callbacks"] = [callback] + + from comps.finetuning.llm_on_ray.finetune.finetune import main as llm_on_ray_finetune_main + + llm_on_ray_finetune_main(finetune_config) + + +if __name__ == "__main__": + main() diff --git a/comps/finetuning/finetuning_service.py b/comps/finetuning/finetuning_service.py new file mode 100644 index 000000000..031380a5d --- /dev/null +++ b/comps/finetuning/finetuning_service.py @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import urllib.parse +from typing import List, Optional, Union + +from fastapi import BackgroundTasks, File, UploadFile + +from comps import opea_microservices, register_microservice +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobsRequest +from comps.finetuning.handlers import ( + DATASET_BASE_PATH, + handle_cancel_finetuning_job, + handle_create_finetuning_jobs, + handle_list_finetuning_checkpoints, + handle_list_finetuning_jobs, + handle_retrieve_finetuning_job, + save_content_to_local_disk, +) + + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015) +def create_finetuning_jobs(request: FineTuningJobsRequest, background_tasks: BackgroundTasks): + return handle_create_finetuning_jobs(request, background_tasks) + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8015, methods=["GET"] +) +def list_finetuning_jobs(): + return handle_list_finetuning_jobs() + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/retrieve", host="0.0.0.0", port=8015 +) +def retrieve_finetuning_job(request: FineTuningJobIDRequest): + job = handle_retrieve_finetuning_job(request) + return job + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/cancel", host="0.0.0.0", port=8015 +) +def cancel_finetuning_job(request: FineTuningJobIDRequest): + job = handle_cancel_finetuning_job(request) + return job + + +@register_microservice( + name="opea_service@finetuning", + endpoint="/v1/finetune/upload_training_files", + host="0.0.0.0", + port=8015, +) +async def upload_training_files( + files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), +): + if files: + if not isinstance(files, list): + files = [files] + for file in files: + filename = urllib.parse.quote(file.filename, safe="") + save_path = os.path.join(DATASET_BASE_PATH, filename) + await save_content_to_local_disk(save_path, file) + + return {"status": 200, "message": "Training files uploaded."} + + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/finetune/list_checkpoints", host="0.0.0.0", port=8015 +) +def list_checkpoints(request: FineTuningJobIDRequest): + checkpoints = handle_list_finetuning_checkpoints(request) + return {"status": 200, "checkpoints": str(checkpoints)} + + +if __name__ == "__main__": + opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py new file mode 100644 index 000000000..6aa7e5d3e --- /dev/null +++ b/comps/finetuning/handlers.py @@ -0,0 +1,189 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import random +import time +import uuid +from pathlib import Path +from typing import Dict + +from fastapi import BackgroundTasks, HTTPException +from pydantic_yaml import parse_yaml_raw_as, to_yaml_file +from ray.job_submission import JobSubmissionClient + +from comps import CustomLogger +from comps.cores.proto.api_protocol import ( + FineTuningJob, + FineTuningJobIDRequest, + FineTuningJobList, + FineTuningJobsRequest, +) +from comps.finetuning.llm_on_ray.finetune.finetune_config import FinetuneConfig + +logger = CustomLogger("finetuning_handlers") + +MODEL_CONFIG_FILE_MAP = { + "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml", + "mistralai/Mistral-7B-v0.1": "./models/mistral-7b-v0.1.yaml", +} + +DATASET_BASE_PATH = "datasets" +JOBS_PATH = "jobs" +if not os.path.exists(DATASET_BASE_PATH): + os.mkdir(DATASET_BASE_PATH) + +if not os.path.exists(JOBS_PATH): + os.mkdir(JOBS_PATH) + +FineTuningJobID = str +CHECK_JOB_STATUS_INTERVAL = 5 # Check every 5 secs + +global ray_client +ray_client: JobSubmissionClient = None + +running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} +finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} + + +# Add a background task to periodicly update job status +def update_job_status(job_id: FineTuningJobID): + while True: + job_status = ray_client.get_job_status(finetuning_job_to_ray_job[job_id]) + status = str(job_status).lower() + # Ray status "stopped" is OpenAI status "cancelled" + status = "cancelled" if status == "stopped" else status + logger.info(f"Status of job {job_id} is '{status}'") + running_finetuning_jobs[job_id].status = status + if status == "finished" or status == "cancelled" or status == "failed": + break + time.sleep(CHECK_JOB_STATUS_INTERVAL) + + +def handle_create_finetuning_jobs(request: FineTuningJobsRequest, background_tasks: BackgroundTasks): + base_model = request.model + train_file = request.training_file + train_file_path = os.path.join(DATASET_BASE_PATH, train_file) + + model_config_file = MODEL_CONFIG_FILE_MAP.get(base_model) + if not model_config_file: + raise HTTPException(status_code=404, detail=f"Base model '{base_model}' not supported!") + + if not os.path.exists(train_file_path): + raise HTTPException(status_code=404, detail=f"Training file '{train_file}' not found!") + + with open(model_config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f) + + finetune_config.Dataset.train_file = train_file_path + + if request.hyperparameters is not None: + if request.hyperparameters.epochs != "auto": + finetune_config.Training.epochs = request.hyperparameters.epochs + + if request.hyperparameters.batch_size != "auto": + finetune_config.Training.batch_size = request.hyperparameters.batch_size + + if request.hyperparameters.learning_rate_multiplier != "auto": + finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier + + if os.getenv("HF_TOKEN", None): + finetune_config.General.config.use_auth_token = os.getenv("HF_TOKEN", None) + + job = FineTuningJob( + id=f"ft-job-{uuid.uuid4()}", + model=base_model, + created_at=int(time.time()), + training_file=train_file, + hyperparameters={ + "n_epochs": finetune_config.Training.epochs, + "batch_size": finetune_config.Training.batch_size, + "learning_rate_multiplier": finetune_config.Training.learning_rate, + }, + status="running", + seed=random.randint(0, 1000) if request.seed is None else request.seed, + ) + finetune_config.General.output_dir = os.path.join(JOBS_PATH, job.id) + if os.getenv("DEVICE", ""): + logger.info(f"specific device: {os.getenv('DEVICE')}") + finetune_config.Training.device = os.getenv("DEVICE") + + finetune_config_file = f"{JOBS_PATH}/{job.id}.yaml" + to_yaml_file(finetune_config_file, finetune_config) + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + + ray_job_id = ray_client.submit_job( + # Entrypoint shell command to execute + entrypoint=f"python finetune_runner.py --config_file {finetune_config_file}", + # Path to the local directory that contains the script.py file + runtime_env={"working_dir": "./"}, + ) + logger.info(f"Submitted Ray job: {ray_job_id} ...") + + running_finetuning_jobs[job.id] = job + finetuning_job_to_ray_job[job.id] = ray_job_id + + background_tasks.add_task(update_job_status, job.id) + + return job + + +def handle_list_finetuning_jobs(): + finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False) + + return finetuning_jobs_list + + +def handle_retrieve_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + return job + + +def handle_cancel_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + ray_job_id = finetuning_job_to_ray_job.get(fine_tuning_job_id) + if ray_job_id is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + ray_client.stop_job(ray_job_id) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + job.status = "cancelled" + return job + + +async def save_content_to_local_disk(save_path: str, content): + save_path = Path(save_path) + try: + if isinstance(content, str): + with open(save_path, "w", encoding="utf-8") as file: + file.write(content) + else: + with save_path.open("wb") as fout: + content = await content.read() + fout.write(content) + except Exception as e: + logger.info(f"Write file failed. Exception: {e}") + raise Exception(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") + + +def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + output_dir = os.path.join(JOBS_PATH, job.id) + checkpoints = [] + if os.path.exists(output_dir): + checkpoints = os.listdir(output_dir) + return checkpoints diff --git a/comps/finetuning/launch.sh b/comps/finetuning/launch.sh new file mode 100644 index 000000000..bb5042ac6 --- /dev/null +++ b/comps/finetuning/launch.sh @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if [[ -n "$RAY_PORT" ]];then + ray start --head --port $RAY_PORT +else + ray start --head + export RAY_PORT=8265 +fi + +export RAY_ADDRESS=http://127.0.0.1:$RAY_PORT +python finetuning_service.py diff --git a/comps/finetuning/llm_on_ray/common/__init__.py b/comps/finetuning/llm_on_ray/common/__init__.py new file mode 100644 index 000000000..954b7baa4 --- /dev/null +++ b/comps/finetuning/llm_on_ray/common/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +from .torch_config import TorchConfig diff --git a/comps/finetuning/llm_on_ray/common/common.py b/comps/finetuning/llm_on_ray/common/common.py new file mode 100644 index 000000000..ac01ae12e --- /dev/null +++ b/comps/finetuning/llm_on_ray/common/common.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import glob +import importlib +import os + +from comps import CustomLogger + +logger = CustomLogger("llm_on_ray") + + +def import_all_modules(basedir, prefix=None): + all_py_files = glob.glob(basedir + "/*.py") + modules = [os.path.basename(f) for f in all_py_files] + + for module in modules: + if not module.startswith("_"): + module = module.rstrip(".py") + if prefix is None: + module_name = module + else: + module_name = f"{prefix}.{module}" + try: + importlib.import_module(module_name) + except Exception: + logger.warning(f"import {module_name} error", exc_info=True) diff --git a/comps/finetuning/llm_on_ray/common/torch_config.py b/comps/finetuning/llm_on_ray/common/torch_config.py new file mode 100644 index 000000000..9e3f48a7c --- /dev/null +++ b/comps/finetuning/llm_on_ray/common/torch_config.py @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import os +import sys +from dataclasses import dataclass +from typing import Optional + +from ray.train._internal.worker_group import WorkerGroup +from ray.train.torch.config import TorchConfig as RayTorchConfig +from ray.train.torch.config import _TorchBackend + +# The package importlib_metadata is in a different place, depending on the Python version. +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata + + +@dataclass +class TorchConfig(RayTorchConfig): + device: Optional[str] = None + + @property + def backend_cls(self): + EnableCCLBackend.device = self.device + return EnableCCLBackend + + +def xpu_libs_import(): + """Try to import IPEX and oneCCL.""" + try: + import intel_extension_for_pytorch + except ImportError: + raise ImportError("Please install intel_extension_for_pytorch") + try: + ccl_version = importlib_metadata.version("oneccl_bind_pt") + if ccl_version >= "1.12": + import oneccl_bindings_for_pytorch + else: + import torch_ccl + except ImportError as ccl_not_exist: + raise ImportError("Please install torch-ccl") from ccl_not_exist + + +def hpu_libs_import(): + """Try to import habana frameworkfs for torch.""" + try: + import habana_frameworks.torch # noqa: F401 + except ImportError as habana_not_exist: + raise ImportError("Please install habana_frameworks") from habana_not_exist + + +def _set_torch_distributed_env_vars(device): + if device is not None: + os.environ["ACCELERATE_TORCH_DEVICE"] = device + + +class EnableCCLBackend(_TorchBackend): + device: Optional[str] = None + + def on_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): + libs_import = hpu_libs_import if self.device is not None and self.device.startswith("hpu") else xpu_libs_import + for i in range(len(worker_group)): + worker_group.execute_single_async(i, libs_import) + super().on_start(worker_group, backend_config) + + def on_training_start(self, worker_group: WorkerGroup, backend_config: RayTorchConfig): + super().on_training_start(worker_group, backend_config) + worker_group.execute(_set_torch_distributed_env_vars, self.device) diff --git a/comps/finetuning/llm_on_ray/finetune/__init__.py b/comps/finetuning/llm_on_ray/finetune/__init__.py new file mode 100644 index 000000000..0262e494a --- /dev/null +++ b/comps/finetuning/llm_on_ray/finetune/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. diff --git a/comps/finetuning/llm_on_ray/finetune/data_process.py b/comps/finetuning/llm_on_ray/finetune/data_process.py new file mode 100644 index 000000000..ab5efcc09 --- /dev/null +++ b/comps/finetuning/llm_on_ray/finetune/data_process.py @@ -0,0 +1,196 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +import copy +import re +from itertools import chain + +import torch + +IGNORE_INDEX = -100 + + +class DataProcessor: + # We used the following prompts for fine-tuning the Alpaca model. You can find reference doc form this URL(https://github.com/tatsu-lab/stanford_alpaca/blob/main/README.md#data-release) + def __init__(self, config, tokenizer): + self.tokenizer = tokenizer + self.end = tokenizer.eos_token + self.intro = ( + "Below is an instruction that describes a task. Write a response that appropriately completes the request." + ) + self.instruction = "### Instruction:\n" + self.input = "### Input:\n" + self.response = "### Response:\n" + self.padding_side = config["Dataset"].get("padding_side", "right") + self.truncation_side = config["Dataset"].get("truncation_side", "right") + self.max_length = self.max_seq_length = config["Dataset"].get("max_length", 512) + self.max_source_length = config["Dataset"].get("max_source_length", 384) + self.truncation = config["Dataset"].get("truncation", True) + self.padding = config["Dataset"].get("padding", True) + self.mask_input = config["Dataset"].get("mask_input", True) + self.mask_response = config["Dataset"].get("mask_response", True) + + def make_prompt(self, examples): + prompts = {} + prompts["prompt_sources"] = [] + prompts["prompt_targets"] = [] + for rec in examples: + instruction = rec["instruction"] + response = rec["input"] + context = rec.get("output") + if not instruction: + raise ValueError(f"Expected an instruction in: {rec}") + # if not response: + # raise ValueError(f"Expected a response in: {rec}") + if context: + prompt = ( + self.intro + + self.end + + "\n" + + self.instruction + + instruction + + self.input + + context + + self.end + + "\n" + + self.response + ) + prompts["prompt_sources"].append(prompt) + else: + prompt = self.intro + self.end + "\n" + self.instruction + instruction + self.end + "\n" + self.response + prompts["prompt_sources"].append(prompt) + prompt_response = response + self.end + prompts["prompt_targets"].append(prompt_response) + return prompts + + def __truncate_sequences(self, sequences, max_length): + """ + Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L40 + """ + words_to_cut = sum(list(map(len, sequences))) - max_length + if words_to_cut <= 0: + return sequences + + while words_to_cut > 0 and len(sequences) > 0: + words_to_cut -= len(sequences[0]) + sequences = sequences[1:] + return sequences + + def tokenize_by_neural_chat(self, examples): + """ + Copied from https://github.com/intel/intel-extension-for-transformers/blob/ae54f698b73a66e5729427cb19f69c33e1a5c34d/intel_extension_for_transformers/transformers/llm/finetuning/data_utils.py#L225 + The only differences are: + - using our own prompt style + - add left or right padding and truncation + - add mask_input and mask_response + """ + keys = list(examples.data.keys()) + if len(keys) != 2: + raise ValueError("Unsupported dataset format") + assistant_tokens = self.tokenizer.tokenize(self.response) + header = self.intro + self.end + "\n" + + examples["input_ids"] = [] + examples["labels"] = [] + examples["attention_mask"] = [] + for instruction, response in zip(examples[keys[0]], examples[keys[1]]): + convs = re.findall( + r"{0}.*?{2}|{1}.*?{2}".format(self.instruction, self.response, self.end), + instruction, + re.DOTALL, + ) + convs_tokens = [self.tokenizer.tokenize(conv) + self.tokenizer.tokenize("\n") for conv in convs] + header_tokens = self.tokenizer.tokenize(header) + self.tokenizer.tokenize("\n") + max_input = self.max_source_length - len(header_tokens) - len(assistant_tokens) + truncated_convs = self.__truncate_sequences(convs_tokens, max_input) + if len(truncated_convs) == 0: + truncated_convs = [convs_tokens[-1][: max_input - 3] + convs_tokens[-1][-3:]] + + prompt_tokens = [header_tokens] + truncated_convs + [assistant_tokens] + prompt_ids = [self.tokenizer.convert_tokens_to_ids(prompt_token) for prompt_token in prompt_tokens] + prompt_ids = list(chain(*prompt_ids)) + + resp_ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(response.strip())) + # keep last and eos_id + max_resp = self.max_seq_length - len(prompt_ids) - 1 + + # truncating response + if len(resp_ids) > max_resp: + if self.truncation_side == "right": + resp_ids = resp_ids[: max_resp - 1] + resp_ids[-1:] + else: + resp_ids = resp_ids[-max_resp:] + + # masking + input_ids = prompt_ids + resp_ids + [self.tokenizer.eos_token_id] + if self.mask_input: + labels = [IGNORE_INDEX] * len(prompt_ids) + resp_ids + [self.tokenizer.eos_token_id] + elif self.mask_response: + labels = prompt_ids + [IGNORE_INDEX] * len(resp_ids) + [self.tokenizer.eos_token_id] + else: + labels = input_ids + + # padding + input_len = len(input_ids) + pad_len = self.max_seq_length - input_len + if self.padding_side == "right": + input_ids = input_ids + [self.tokenizer.eos_token_id] * pad_len + labels = labels + [IGNORE_INDEX] * pad_len + attention_mask = [1] * input_len + [0] * pad_len + else: + input_ids = [self.tokenizer.eos_token_id] * pad_len + input_ids + labels = [IGNORE_INDEX] * pad_len + labels + attention_mask = [0] * pad_len + [1] * input_len + + assert len(input_ids) == self.max_seq_length + assert len(prompt_ids) <= self.max_source_length + assert len(labels) == len(input_ids) == len(attention_mask) + + examples["input_ids"].append(torch.tensor(input_ids)) + examples["labels"].append(labels) + examples["attention_mask"].append(attention_mask) + + return examples + + def tokenize(self, examples): + keys = list(examples.data.keys()) + if len(keys) != 2: + raise ValueError("Unsupported dataset format") + + examples["input_ids"] = [] + examples["labels"] = [] + examples["attention_mask"] = [] + for s, t in zip(examples[keys[0]], examples[keys[1]]): + results = self.tokenizer( + s + t, + padding=self.padding, + truncation=self.truncation, + return_tensors=None, + max_length=self.max_length, + ) + + input_ids = results["input_ids"] + input_len = len(input_ids) + labels = copy.deepcopy(input_ids) + if self.mask_input or self.mask_response: + sources_tokenized = self.tokenizer( + s, + padding=False, + truncation=True, + return_tensors=None, + max_length=self.max_length, + ) + input_id_len = len(sources_tokenized["input_ids"]) + # mask input + if self.mask_input: + labels[:input_id_len] = [IGNORE_INDEX] * input_id_len + # mask response + if self.mask_response: + labels[input_id_len:input_len] = [IGNORE_INDEX] * (input_len - input_id_len) + + examples["input_ids"].append(results["input_ids"]) + examples["labels"].append(labels) + examples["attention_mask"].append(results["attention_mask"]) + return examples diff --git a/comps/finetuning/llm_on_ray/finetune/finetune.py b/comps/finetuning/llm_on_ray/finetune/finetune.py new file mode 100644 index 000000000..03b8adfaa --- /dev/null +++ b/comps/finetuning/llm_on_ray/finetune/finetune.py @@ -0,0 +1,462 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +#!/usr/bin/env python + +import argparse +import copy +import os +import re +import sys +from itertools import chain +from typing import Any, Dict, Optional, Union + +import datasets +import ray +import torch +import transformers +from peft import LoraConfig, get_peft_model +from pydantic_yaml import parse_yaml_raw_as +from ray.air import FailureConfig, RunConfig +from ray.air.config import ScalingConfig +from ray.train.torch import TorchTrainer + +from comps import CustomLogger +from comps.finetuning.llm_on_ray import common +from comps.finetuning.llm_on_ray.finetune.data_process import DataProcessor +from comps.finetuning.llm_on_ray.finetune.finetune_config import FinetuneConfig + +logger = CustomLogger("llm_on_ray/finetune") + + +def adapt_transformers_to_device(config: Dict): + device = config["Training"]["device"] + if device in ["hpu"]: + from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + + # adapt transformers to gaudi + adapt_transformers_to_gaudi() + + +def set_seed(config: Dict): + seed = config["Training"].get("seed", None) + if seed is None: + return + device = config["Training"]["device"] + if device in ["cpu", "gpu"]: + from accelerate.utils import set_seed as _set_seed + + _set_seed(seed) + elif device in ["hpu"]: + from optimum.habana.utils import set_seed as _set_seed + + _set_seed(seed) + + +def convert_to_training_args(cls, config: Dict): + device = config["Training"]["device"] + accelerate_mode = config["Training"]["accelerate_mode"] + save_strategy = config["General"]["save_strategy"] + + args = { + "output_dir": config["General"]["output_dir"], + "report_to": config["General"]["report_to"], + "resume_from_checkpoint": config["General"]["resume_from_checkpoint"], + "gradient_checkpointing": config["General"]["enable_gradient_checkpointing"], + "save_strategy": save_strategy if save_strategy != "False" else "no", + "bf16": config["Training"]["mixed_precision"] == "bf16", + "num_train_epochs": config["Training"]["epochs"], + "per_device_train_batch_size": config["Training"]["batch_size"], + "per_device_eval_batch_size": config["Training"]["batch_size"], + "optim": config["Training"]["optimizer"], + "learning_rate": config["Training"]["learning_rate"], + "logging_steps": config["Training"]["logging_steps"], + "lr_scheduler_type": config["Training"]["lr_scheduler"], + "weight_decay": config["Training"]["weight_decay"], + "gradient_accumulation_steps": config["Training"]["gradient_accumulation_steps"], + "do_train": True, + } + + # set attr do_eval + vf = config["Dataset"].get("validation_file", None) + vsp = config["Dataset"].get("validation_split_percentage", 0) + if vf is not None or (vsp / 100 > 0.0 and vsp / 100 < 1.0): + args.update({"do_eval": True}) + + # set attr max_steps + if config["Training"]["max_train_steps"] is not None: + args.update({"max_steps": config["Training"]["max_train_steps"]}) + + # set attr for device cpu + if device == "cpu": + if hasattr(cls, "use_cpu"): + args.update({"use_cpu": True}) + if hasattr(cls, "no_cuda"): + args.update({"no_cuda": True}) + args.update({"use_ipex": True}) + + # set attr 'deepspeed' + if accelerate_mode == "DEEPSPEED": + args.update({"deepspeed": config["Training"]["deepspeed_config_file"]}) + + # set attr for FSDP + # if accelerate_mode == "FSDP": + # args.updatwe({}) + + # set attr for Intel Gaudi + if device == "hpu": + args.update({"use_habana": True}) + args.update({"use_lazy_mode": config["Training"]["hpu_execution_mode"] == "lazy"}) + args.update({"pipelining_fwd_bwd": True}) + + return cls(**args) + + +def convert_dtype(dtype: str) -> Optional[torch.dtype]: + supported_dtypes = { + "fp16": torch.float16, + "bf16": torch.bfloat16, + "no": None, + } + return supported_dtypes[dtype] + + +def load_tokenizer(config: Dict): + if config["General"].get("tokenizer_name") is not None: + tokenizer_name = config["General"].get("tokenizer_name") + else: + tokenizer_name = config["General"]["base_model"] + load_config = config["General"].get("config", {}) + # default padding side is right + padding_side = config["Dataset"].get("padding_side", "right") + # default truncation side is right + truncation_side = config["Dataset"].get("truncation_side", "right") + tokenizer = transformers.AutoTokenizer.from_pretrained( + tokenizer_name, padding_side=padding_side, truncation_side=truncation_side, **load_config + ) + return tokenizer + + +def load_dataset(config: Dict): + dataset_file = config["Dataset"].get("train_file", None) + if dataset_file is None: + return + + if os.path.exists(dataset_file): + # load from local file + def local_load(name, **load_config): + if os.path.isfile(name): + file = os.path.basename(os.path.abspath(name)) + path = os.path.dirname(os.path.abspath(name)) + dataset = datasets.load_dataset(path, data_files=file, **load_config) + else: + dataset = datasets.load_dataset(name, **load_config) + return dataset["train"] + + train_dataset = local_load(dataset_file) + validation_file = config["Dataset"].get("validation_file", None) + if validation_file is not None: + validation_dataset = local_load(validation_file) + return datasets.DatasetDict({"train": train_dataset, "validation": validation_dataset}) + + validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) + if validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0: + dataset_dict = train_dataset.train_test_split(test_size=validation_split_percentage / 100) + dataset_dict["validation"] = dataset_dict["test"] + return dataset_dict + + return datasets.DatasetDict({"train": train_dataset}) + else: + # try to download and load dataset from huggingface.co + load_config = config["General"].get("config", {}) + use_auth_token = load_config.get("use_auth_token", None) + raw_dataset = datasets.load_dataset(dataset_file, use_auth_token=use_auth_token) + + validation_split_percentage = config["Dataset"].get("validation_split_percentage", 0) + if "validation" not in raw_dataset.keys() and ( + validation_split_percentage / 100 > 0.0 and validation_split_percentage / 100 < 1.0 + ): + dataset_dict = raw_dataset["train"].train_test_split(test_size=validation_split_percentage / 100) + dataset_dict["validation"] = dataset_dict["test"] + return dataset_dict + + return raw_dataset + + +def tokenize_dataset(config: Dict, tokenizer, dataset): + group = config["Dataset"].get("group", True) + block_size = config["Dataset"].get("block_size", 512) + tokenizer.pad_token = tokenizer.eos_token + + processor = DataProcessor(config, tokenizer) + + for key in dataset: + prompts = processor.make_prompt(dataset[key]) + dataset[key] = datasets.Dataset.from_dict(prompts) + + column_names = list(dataset["train"].features) + tokenize_fn = ( + processor.tokenize_by_neural_chat + if config["Dataset"].get("data_preprocess_type", "") == "neural_chat" + else processor.tokenize + ) + + tokenized_dataset = dataset.map( + tokenize_fn, + remove_columns=column_names, + batched=True, + load_from_cache_file=False, + desc="Tokenize dataset", + ) + + if group: + + def group_texts(examples): + # Concatenate all texts. + concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + total_length = len(concatenated_examples[list(examples.keys())[0]]) + # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can + # customize this part to your needs. + if total_length >= block_size: + total_length = (total_length // block_size) * block_size + # Split by chunks of max_len. + result = { + k: [t[i : i + block_size] for i in range(0, total_length, block_size)] + for k, t in concatenated_examples.items() + } + return result + + tokenized_dataset = tokenized_dataset.map( + group_texts, + batched=True, + load_from_cache_file=False, + desc=f"Grouping texts in chunks of {block_size}", + ) + + return tokenized_dataset + + +def prepare_data_collator(config: Dict, tokenizer): + return transformers.DataCollatorForLanguageModeling( + tokenizer=tokenizer, mlm=False, return_tensors="pt", pad_to_multiple_of=8 + ) + + +def load_model(config: Dict): + model_name = config["General"]["base_model"] + model_dtype = convert_dtype(config["Training"].get("mixed_precision", "no")) + model_config = config["General"].get("config", {}) + model = transformers.AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=model_dtype, **model_config) + + lora_config = config["General"].get("lora_config", None) + if lora_config: + peft_config = LoraConfig(**lora_config) + model = get_peft_model(model, peft_config) + + egc = config["General"].get("enable_gradient_checkpointing", False) + if egc: + model.enable_input_require_grads() + model.gradient_checkpointing_enable() + model.config.use_cache = False + + model.to(dtype=model_dtype, device=torch.device(config["Training"]["device"])) + + return model + + +def get_trainer(config: Dict, model, tokenizer, tokenized_dataset, data_collator): + device = config["Training"]["device"] + if device in ["cpu", "gpu"]: + from transformers import Trainer, TrainingArguments + + training_args = convert_to_training_args(TrainingArguments, config) + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_dataset["train"], + eval_dataset=tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + tokenizer=tokenizer, + data_collator=data_collator, + ) + return training_args, trainer + elif device in ["hpu"]: + from optimum.habana import GaudiConfig + from optimum.habana.transformers import GaudiTrainer, GaudiTrainingArguments + + # If gaudi_config_name is provided, load gaudi_config from huggingface model hub(https://huggingface.co/Habana), otherwise use default gaudi_config + gaudi_config_name = config["General"].get("gaudi_config_name", None) + if gaudi_config_name is not None: + gaudi_config = GaudiConfig.from_pretrained(gaudi_config_name) + else: + gaudi_config = GaudiConfig() + gaudi_config.use_fused_adam = True + gaudi_config.use_fused_clip_norm = True + + training_args = convert_to_training_args(GaudiTrainingArguments, config) + trainer = GaudiTrainer( + model=model, + args=training_args, + gaudi_config=gaudi_config, + train_dataset=tokenized_dataset["train"], + eval_dataset=tokenized_dataset["validation"] if tokenized_dataset.get("validation") is not None else None, + tokenizer=tokenizer, + data_collator=data_collator, + ) + return training_args, trainer + return None + + +def train_func(config: Dict[str, Any]): + os.chdir(config["cwd"]) + + adapt_transformers_to_device(config) + + set_seed(config) + + tokenizer = load_tokenizer(config) + + dataset = load_dataset(config) + + max_train_samples = config["Dataset"].get("max_train_samples", 0) + if 0 < max_train_samples < len(dataset["train"]): + dataset["train"] = dataset["train"].select(range(max_train_samples)) + + max_eval_samples = config["Dataset"].get("max_eval_samples", 0) + if "validation" in dataset and 0 < max_eval_samples < len(dataset["validation"]): + dataset["validation"] = dataset["validation"].select(range(max_eval_samples)) + + tokenized_dataset = tokenize_dataset(config, tokenizer, dataset) + + data_collator = prepare_data_collator(config, tokenizer) + + model = load_model(config) + + training_args, trainer = get_trainer(config, model, tokenizer, tokenized_dataset, data_collator) + + logger.info("train start") + trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) + trainer.save_model() + logger.info("train finish") + + +def get_finetune_config(): + parser = argparse.ArgumentParser(description="Finetune a transformers model on a causal language modeling task") + parser.add_argument( + "--config_file", + type=str, + required=True, + default=None, + help="The name of the dataset to use (via the datasets library).", + ) + + # Print help if no arguments were provided + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + config_file = args.config_file + + with open(config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f) + return finetune_config.dict() + + +def main(external_config=None): + if not external_config: + config = get_finetune_config() + else: + config = external_config + + config["cwd"] = os.getcwd() + + num_training_workers = config["Training"].get("num_training_workers") + resources_per_worker = config["Training"].get("resources_per_worker") + + if num_training_workers > 1 and config["Training"].get("accelerate_mode", None) is None: + config["Training"]["accelerate_mode"] = "DDP" # will use DDP to accelerate if no method specified + + ccl_worker_count = 1 + device = config["Training"]["device"] + if device != "cpu": + ccl_worker_count = num_training_workers + + if not ray.is_initialized(): + runtime_env = { + "env_vars": { + "OMP_NUM_THREADS": str(resources_per_worker["CPU"]), + "CCL_ZE_IPC_EXCHANGE": "sockets", + "CCL_WORKER_COUNT": str(ccl_worker_count), + "CCL_LOG_LEVEL": "info", + "FI_TCP_IFACE": "lo", + "FI_PROVIDER": "tcp", + } + } + + if config["General"]["gpt_base_model"] is True: + runtime_env["pip"] = ["transformers==4.26.0"] + + if device == "gpu": + num_cpus = resources_per_worker["CPU"] * num_training_workers + 1 # additional 1 for head worker + ray.init(num_cpus=num_cpus, runtime_env=runtime_env) + else: + ray.init(runtime_env=runtime_env) + + logger.info(f"ray available resources = {ray.available_resources()}") + use_gpu = True if device == "gpu" else False + scaling_config = ScalingConfig( + num_workers=num_training_workers, + use_gpu=use_gpu, + resources_per_worker=resources_per_worker, + placement_strategy="SPREAD", + ) + + # if try to use Intel GPU, convert device to 'xpu' + # due to accelerate internal use 'xpu' represent Intel GPU + if device == "gpu": + from accelerate.utils import is_xpu_available + + if is_xpu_available(): + device = "xpu" + + if config.get("torch_config", None) is None: + backend = None + if device == "cpu" or device == "xpu" or device == "gpu": + backend = "ccl" + elif device == "hpu": + backend = "hccl" + torch_config = common.TorchConfig(backend=backend, device=device) + else: + customer_torch_config = config.get("torch_config") + torch_config = common.TorchConfig(**customer_torch_config, device=device) + + if config.get("failure_config", None) is None: + failure_config = FailureConfig() + else: + customer_failure_config = config.get("failure_config") + failure_config = FailureConfig(**customer_failure_config) + + if config.get("run_config", None) is None: + run_config = RunConfig(failure_config=failure_config) + else: + customer_run_config = config.get("run_config") + if customer_run_config.get("failure_config", None) is None: + customer_run_config["failure_config"] = failure_config + run_config = RunConfig(**customer_run_config) + + trainer = TorchTrainer( + train_func, + train_loop_config=config, + scaling_config=scaling_config, + torch_config=torch_config, + run_config=run_config, + ) + results = trainer.fit() + if external_config is not None: + return results + + +if __name__ == "__main__": + main() diff --git a/comps/finetuning/llm_on_ray/finetune/finetune_config.py b/comps/finetuning/llm_on_ray/finetune/finetune_config.py new file mode 100644 index 000000000..391c6e6c8 --- /dev/null +++ b/comps/finetuning/llm_on_ray/finetune/finetune_config.py @@ -0,0 +1,156 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright 2023 The LLM-on-Ray Authors. + +from typing import List, Optional, Union + +from pydantic import BaseModel, validator + +PRECISION_BF16 = "bf16" +PRECISION_FP16 = "fp16" +PRECISION_NO = "no" + +DEVICE_CPU = "cpu" +DEVICE_HPU = "hpu" +DEVICE_GPU = "gpu" + +ACCELERATE_STRATEGY_DDP = "DDP" +ACCELERATE_STRATEGY_FSDP = "FSDP" +ACCELERATE_STRATEGY_DEEPSPEED = "DEEPSPEED" + + +class GeneralConfig(BaseModel): + trust_remote_code: bool + use_auth_token: Optional[str] + + +class LoraConfig(BaseModel): + task_type: str + r: int + lora_alpha: int + lora_dropout: float + target_modules: Optional[List[str]] = None + + +class General(BaseModel): + base_model: str + tokenizer_name: Optional[str] = None + gaudi_config_name: Optional[str] = None + gpt_base_model: bool + output_dir: str + report_to: str = "none" + resume_from_checkpoint: Optional[str] = None + save_strategy: str = "no" + config: GeneralConfig + lora_config: Optional[LoraConfig] = None + enable_gradient_checkpointing: bool = False + + @validator("report_to") + def check_report_to(cls, v: str): + assert v in ["none", "tensorboard"] + return v + + +class Dataset(BaseModel): + train_file: str + validation_file: Optional[str] + validation_split_percentage: int + max_length: int = 512 + group: bool = True + block_size: int = 512 + shuffle: bool = False + max_source_length: int = 384 + padding_side: str = "right" + truncation_side: str = "right" + max_seq_length: int = 512 + truncation: bool = True + padding: bool = True + mask_input: bool = True + mask_response: bool = True + data_preprocess_type: str = "neural_chat" + max_train_samples: int = 0 + max_eval_samples: int = 0 + + +class RayResourceConfig(BaseModel): + CPU: int + GPU: int = 0 + HPU: int = 0 + + +class Training(BaseModel): + optimizer: str + batch_size: int + epochs: int + max_train_steps: Optional[int] = None + learning_rate: float + lr_scheduler: str + weight_decay: float + device: str = DEVICE_CPU + hpu_execution_mode: str = "lazy" + num_training_workers: int + resources_per_worker: RayResourceConfig + accelerate_mode: str = ACCELERATE_STRATEGY_DDP + mixed_precision: str = PRECISION_NO + gradient_accumulation_steps: int = 1 + logging_steps: int = 10 + deepspeed_config_file: str = "" + + @validator("device") + def check_device(cls, v: str): + # will convert to lower case + if v: + assert v.lower() in [DEVICE_CPU, DEVICE_GPU, DEVICE_HPU] + return v.lower() + + @validator("hpu_execution_mode") + def check_hpu_execution_mode(cls, v: str): + if v: + assert v in ["lazy", "eager", "eager.compile"] + return v + + @validator("accelerate_mode") + def check_accelerate_mode(cls, v: str): + if v: + assert v in [ + ACCELERATE_STRATEGY_DDP, + ACCELERATE_STRATEGY_FSDP, + ACCELERATE_STRATEGY_DEEPSPEED, + ] + return v + + @validator("mixed_precision") + def check_mixed_precision(cls, v: str): + if v: + assert v in [PRECISION_BF16, PRECISION_FP16, PRECISION_NO] + return v + + @validator("logging_steps") + def check_logging_steps(cls, v: int): + assert v > 0 + return v + + # @model_validator(mode='after') + # def check_device_and_accelerate_mode(self) -> "Training": + # dev = self.device + # res = self.resources_per_worker + # mode = self.accelerate_mode + # if dev == "CPU": + # if res.GPU is not None and res.GPU > 0: + # raise ValueError("Please not specified GPU resource when use CPU only in Ray.") + # if mode != "CPU_DDP": + # raise ValueError("Please specified CPU related accelerate mode when use CPU only in Ray.") + # elif dev == "GPU": + # if res.GPU is None or res.GPU == 0: + # raise ValueError("Please specified GPU resource when use GPU to fine tune in Ray.") + # if mode not in ["GPU_DDP", "GPU_FSDP"]: + # raise ValueError("Please speicifed GPU related accelerate mode when use GPU to fine tune in Ray.") + + # return self + + +class FinetuneConfig(BaseModel): + General: General + Dataset: Dataset + Training: Training diff --git a/comps/finetuning/models/llama-2-7b-chat-hf.yaml b/comps/finetuning/models/llama-2-7b-chat-hf.yaml new file mode 100644 index 000000000..d6ae5f34d --- /dev/null +++ b/comps/finetuning/models/llama-2-7b-chat-hf.yaml @@ -0,0 +1,39 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +General: + base_model: meta-llama/Llama-2-7b-chat-hf + output_dir: "./tmp" + gpt_base_model: false + save_strategy: no + config: + trust_remote_code: false + use_auth_token: null + lora_config: + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: + - q_proj + - v_proj + enable_gradient_checkpointing: false +Dataset: + train_file: examples/data/sample_finetune_data_small.jsonl + group: false + validation_file: null + validation_split_percentage: 5 +Training: + optimizer: adamw_torch + batch_size: 2 + epochs: 3 + learning_rate: 1.0e-05 + lr_scheduler: linear + weight_decay: 0.0 + mixed_precision: bf16 + device: cpu + num_training_workers: 1 + resources_per_worker: + CPU: 32 + gradient_accumulation_steps: 1 + logging_steps: 10 diff --git a/comps/finetuning/models/mistral-7b-v0.1.yaml b/comps/finetuning/models/mistral-7b-v0.1.yaml new file mode 100644 index 000000000..4334fa37e --- /dev/null +++ b/comps/finetuning/models/mistral-7b-v0.1.yaml @@ -0,0 +1,45 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +General: + base_model: mistralai/Mistral-7B-v0.1 + output_dir: "./tmp" + gpt_base_model: false + save_strategy: no + config: + trust_remote_code: false + use_auth_token: null + lora_config: + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: + - q_proj + - k_proj + - v_proj + - o_proj + - gate_proj + - up_proj + - down_proj + - lm_head + enable_gradient_checkpointing: false +Dataset: + train_file: examples/data/sample_finetune_data_small.jsonl + validation_file: null + validation_split_percentage: 5 +Training: + optimizer: adamw_torch + batch_size: 2 + epochs: 3 + learning_rate: 1.0e-05 + lr_scheduler: linear + weight_decay: 0.0 + mixed_precision: bf16 + device: cpu + num_training_workers: 2 + resources_per_worker: + CPU: 32 + accelerate_mode: DDP + gradient_accumulation_steps: 1 + logging_steps: 10 diff --git a/comps/finetuning/requirements.txt b/comps/finetuning/requirements.txt new file mode 100644 index 000000000..4255a3716 --- /dev/null +++ b/comps/finetuning/requirements.txt @@ -0,0 +1,19 @@ +aiohttp +datasets +docarray +fastapi +httpx +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +peft +prometheus-fastapi-instrumentator +pydantic==2.8.2 +pydantic_yaml +python-multipart +pyyaml +ray[all] +requests +shortuuid +transformers +uvicorn diff --git a/comps/guardrails/llama_guard/README.md b/comps/guardrails/llama_guard/README.md index 94bdcd952..019aab5e3 100644 --- a/comps/guardrails/llama_guard/README.md +++ b/comps/guardrails/llama_guard/README.md @@ -20,30 +20,30 @@ Any content that is detected in the following categories is determined as unsafe - Regulated or Controlled Substances - Suicide & Self Harm -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the Guardrails microservice, you need to install python packages first. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start TGI Gaudi Service +### 1.2 Start TGI Gaudi Service ```bash export HF_TOKEN=${your_hf_api_token} export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gaurdrails" +export LANGCHAIN_PROJECT="opea/guardrails" volume=$PWD/data model_id="meta-llama/Meta-Llama-Guard-2-8B" docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 ``` -## 1.3 Verify the TGI Gaudi Service +### 1.3 Verify the TGI Gaudi Service ```bash curl 127.0.0.1:8088/generate \ @@ -52,7 +52,7 @@ curl 127.0.0.1:8088/generate \ -H 'Content-Type: application/json' ``` -## 1.4 Start Guardrails Service +### 1.4 Start Guardrails Service Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](## 1.2 Start TGI Gaudi Service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2: @@ -65,11 +65,11 @@ export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" python langchain/guardrails_tgi.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker. -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -79,29 +79,29 @@ export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088" export LLM_MODEL_ID=${your_hf_llm_model} ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile . ``` -## 2.3 Run Docker with CLI +### 2.3 Run Docker with CLI ```bash docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails-tgi:latest ``` -## 2.4 Run Docker with Docker Compose +### 2.4 Run Docker with Docker Compose ```bash cd langchain/docker docker compose -f docker_compose_guardrails.yaml up -d ``` -# ๐Ÿš€3. Consume Guardrails Service +## ๐Ÿš€3. Consume Guardrails Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:9090/v1/health_check\ @@ -109,7 +109,7 @@ curl http://localhost:9090/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume Guardrails Service +### 3.2 Consume Guardrails Service ```bash curl http://localhost:9090/v1/guardrails\ diff --git a/comps/guardrails/llama_guard/docker/Dockerfile b/comps/guardrails/llama_guard/docker/Dockerfile index aaec44a07..491a4171f 100644 --- a/comps/guardrails/llama_guard/docker/Dockerfile +++ b/comps/guardrails/llama_guard/docker/Dockerfile @@ -9,8 +9,7 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/guardrails/llama_guard/guardrails_tgi.py b/comps/guardrails/llama_guard/guardrails_tgi.py index 96a89b8c8..ecbcb7778 100644 --- a/comps/guardrails/llama_guard/guardrails_tgi.py +++ b/comps/guardrails/llama_guard/guardrails_tgi.py @@ -2,13 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 import os +from typing import List, Union from langchain_community.utilities.requests import JsonRequestsWrapper from langchain_huggingface import ChatHuggingFace from langchain_huggingface.llms import HuggingFaceEndpoint -from langsmith import traceable -from comps import ServiceType, TextDoc, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("guardrails_tgi") +logflag = os.getenv("LOGFLAG", False) DEFAULT_MODEL = "meta-llama/LlamaGuard-7b" @@ -59,23 +62,31 @@ def get_tgi_service_model_id(endpoint_url, default=DEFAULT_MODEL): endpoint="/v1/guardrails", host="0.0.0.0", port=9090, - input_datatype=TextDoc, + input_datatype=Union[GeneratedDoc, TextDoc], output_datatype=TextDoc, ) -@traceable(run_type="llm") -def safety_guard(input: TextDoc) -> TextDoc: - response_input_guard = llm_engine_hf.invoke([{"role": "user", "content": input.text}]).content +def safety_guard(input: Union[GeneratedDoc, TextDoc]) -> TextDoc: + if logflag: + logger.info(input) + if isinstance(input, GeneratedDoc): + messages = [{"role": "user", "content": input.prompt}, {"role": "assistant", "content": input.text}] + else: + messages = [{"role": "user", "content": input.text}] + response_input_guard = llm_engine_hf.invoke(messages).content + if "unsafe" in response_input_guard: unsafe_dict = get_unsafe_dict(llm_engine_hf.model_id) policy_violation_level = response_input_guard.split("\n")[1].strip() policy_violations = unsafe_dict[policy_violation_level] - print(f"Violated policies: {policy_violations}") + if logflag: + logger.info(f"Violated policies: {policy_violations}") res = TextDoc( text=f"Violated policies: {policy_violations}, please check your input.", downstream_black_list=[".*"] ) else: res = TextDoc(text=input.text) - + if logflag: + logger.info(res) return res @@ -93,5 +104,5 @@ def safety_guard(input: TextDoc) -> TextDoc: ) # chat engine for server-side prompt templating llm_engine_hf = ChatHuggingFace(llm=llm_guard, model_id=safety_guard_model) - print("guardrails - router] LLM initialized.") + logger.info("guardrails - router] LLM initialized.") opea_microservices["opea_service@guardrails_tgi"].start() diff --git a/comps/guardrails/llama_guard/requirements.txt b/comps/guardrails/llama_guard/requirements.txt index 5fd992e66..9f35ee74d 100644 --- a/comps/guardrails/llama_guard/requirements.txt +++ b/comps/guardrails/llama_guard/requirements.txt @@ -1,9 +1,9 @@ docarray[full] fastapi -huggingface_hub +# Fix for issue with langchain-huggingface not using InferenceClient `base_url` kwarg +huggingface-hub<=0.24.0 langchain-community langchain-huggingface -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/guardrails/pii_detection/custom/README 2.md b/comps/guardrails/pii_detection/custom/README 2.md index ee43ef9a1..a94a99919 100644 --- a/comps/guardrails/pii_detection/custom/README 2.md +++ b/comps/guardrails/pii_detection/custom/README 2.md @@ -19,7 +19,7 @@ The classifiler model is used together with an embedding model to make predictio Currently this strategy can detect both personal sensitive and business sensitive information such as financial figures and performance reviews. Please refer to the [model card](<(https://huggingface.co/Intel/business_safety_logistic_regression_classifier)>) to see the performance of our demo model on the Patronus EnterprisePII dataset. -# Input and output +## Input and output Users can send a list of files, a list of text strings, or a list of urls to the microservice, and the microservice will return a list of True or False for each piece of text following the original sequence. @@ -27,15 +27,15 @@ For a concrete example of what input should look like, please refer to [Consume The output will be a list of booleans, which can be parsed and used as conditions in a bigger application. -# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ +## ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start PII Detection Microservice with Python Script +### 1.2 Start PII Detection Microservice with Python Script Start pii detection microservice with below command. @@ -43,24 +43,24 @@ Start pii detection microservice with below command. python pii_detection.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Prepare PII detection model +### 2.1 Prepare PII detection model export HUGGINGFACEHUB_API_TOKEN=${HP_TOKEN} -## 2.1.1 use LLM endpoint (will add later) +#### 2.1.1 use LLM endpoint (will add later) intro placeholder -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../../ # back to GenAIComps/ folder docker build -t opea/guardrails-pii-detection:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/custom/docker/Dockerfile . ``` -## 2.3 Run Docker with CLI +### 2.3 Run Docker with CLI ```bash docker run -d --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-pii-detection:latest @@ -72,13 +72,13 @@ docker run -d --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p docker run --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p 6357:6357 -v ./comps/guardrails/pii_detection/:/home/user/comps/guardrails/pii_detection/custom/ --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-pii-detection:latest ``` -# ๐Ÿš€3. Get Status of Microservice +## ๐Ÿš€3. Get Status of Microservice ```bash docker container logs -f guardrails-pii-detection-endpoint ``` -# ๐Ÿš€4. Consume Microservice +## ๐Ÿš€4. Consume Microservice Once microservice starts, user can use below script to invoke the microservice for pii detection. diff --git a/comps/guardrails/pii_detection/custom/docker/Dockerfile b/comps/guardrails/pii_detection/custom/docker/Dockerfile index f7a69757c..b29ff09ba 100644 --- a/comps/guardrails/pii_detection/custom/docker/Dockerfile +++ b/comps/guardrails/pii_detection/custom/docker/Dockerfile @@ -10,8 +10,7 @@ ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/guardrails/pii_detection/custom/pii_detection.py b/comps/guardrails/pii_detection/custom/pii_detection.py index a5352e615..d8851bc68 100644 --- a/comps/guardrails/pii_detection/custom/pii_detection.py +++ b/comps/guardrails/pii_detection/custom/pii_detection.py @@ -8,7 +8,6 @@ from pathlib import Path from fastapi import File, Form, HTTPException, UploadFile -from langsmith import traceable cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../../") @@ -18,7 +17,7 @@ from tqdm import tqdm -from comps import DocPath, opea_microservices, register_microservice +from comps import CustomLogger, DocPath, opea_microservices, register_microservice from comps.guardrails.pii_detection.data_utils import document_loader, parse_html from comps.guardrails.pii_detection.pii.pii_utils import ( PIIDetector, @@ -35,11 +34,23 @@ save_file_to_local_disk, ) +logger = CustomLogger("guardrails-pii-detection") +logflag = os.getenv("LOGFLAG", False) + def get_pii_detection_inst(strategy="dummy", settings=None): if strategy == "ner": +<<<<<<< HEAD:comps/guardrails/pii_detection/custom/pii_detection.py + return PIIDetectorWithNER() + elif strategy == "ml": +======= + if logflag: + logger.info("invoking NER detector.......") return PIIDetectorWithNER() elif strategy == "ml": + if logflag: + logger.info("invoking ML detector.......") +>>>>>>> upstream/main:comps/guardrails/pii_detection/pii_detection.py return PIIDetectorWithML() elif strategy == "llm": return PIIDetectorWithLLM() @@ -55,7 +66,8 @@ def file_based_pii_detect(file_list: List[DocPath], strategy, enable_ray=False, if enable_ray: num_cpus = get_max_cpus(len(file_list)) - print(f"per task num_cpus: {num_cpus}") + if logflag: + logger.info(f"per task num_cpus: {num_cpus}") log_name = generate_log_name(file_list) ds = rayds_initialization(file_list, document_loader, lazy_mode=True, num_cpus=num_cpus) @@ -82,7 +94,8 @@ def _parse_html(link): if enable_ray: num_cpus = get_max_cpus(len(link_list)) - print(f"per task num_cpus: {num_cpus}") + if logflag: + logger.info(f"per task num_cpus: {num_cpus}") log_name = generate_log_name(link_list) ds = rayds_initialization(link_list, _parse_html, lazy_mode=True, num_cpus=num_cpus) @@ -93,9 +106,15 @@ def _parse_html(link): for link in tqdm(link_list, total=len(link_list)): with Timer(f"read document {link}."): data = _parse_html(link) +<<<<<<< HEAD:comps/guardrails/pii_detection/custom/pii_detection.py if debug: print("content is: ", data) with Timer(f"detect pii on document {link} to Redis."): +======= + if debug or logflag: + logger.info("content is: ", data) + with Timer(f"detect pii on document {link}"): +>>>>>>> upstream/main:comps/guardrails/pii_detection/pii_detection.py ret.append(pii_detector.detect_pii(data)) return ret @@ -106,7 +125,8 @@ def text_based_pii_detect(text_list: List[str], strategy, enable_ray=False, debu if enable_ray: num_cpus = get_max_cpus(len(text_list)) - print(f"per task num_cpus: {num_cpus}") + if logflag: + logger.info(f"per task num_cpus: {num_cpus}") log_name = generate_log_name(text_list) ds = rayds_initialization(text_list, None, lazy_mode=True, num_cpus=num_cpus) @@ -115,9 +135,15 @@ def text_based_pii_detect(text_list: List[str], strategy, enable_ray=False, debu else: ret = [] for data in tqdm(text_list, total=len(text_list)): +<<<<<<< HEAD:comps/guardrails/pii_detection/custom/pii_detection.py if debug: print("content is: ", data) with Timer(f"detect pii on document {data[:50]} to Redis."): +======= + if debug or logflag: + logger.info("content is: ", data) + with Timer(f"detect pii on document {data[:50]}"): +>>>>>>> upstream/main:comps/guardrails/pii_detection/pii_detection.py ret.append(pii_detector.detect_pii(data)) return ret @@ -125,11 +151,34 @@ def text_based_pii_detect(text_list: List[str], strategy, enable_ray=False, debu @register_microservice( name="opea_service@guardrails-pii-detection", endpoint="/v1/piidetect", host="0.0.0.0", port=6357 ) +<<<<<<< HEAD:comps/guardrails/pii_detection/custom/pii_detection.py async def pii_detection(files: List[UploadFile] = File(None), link_list: str = Form(None), text_list: str = Form(None)): if not files and not link_list and not text_list: raise HTTPException(status_code=400, detail="Either files, link_list, or text_list must be provided.") strategy = "ner" # Default strategy +======= +async def pii_detection( + files: List[UploadFile] = File(None), + link_list: str = Form(None), + text_list: str = Form(None), + strategy: str = Form(None), +): + if logflag: + logger.info(files) + logger.info(link_list) + logger.info(text_list) + logger.info(strategy) + if not files and not link_list and not text_list: + raise HTTPException(status_code=400, detail="Either files, link_list, or text_list must be provided.") + + if strategy is None: + strategy = "ner" + + if logflag: + logger.info("PII detection using strategy: ", strategy) + +>>>>>>> upstream/main:comps/guardrails/pii_detection/pii_detection.py pip_requirement = ["detect-secrets", "phonenumbers", "gibberish-detector"] if files: @@ -151,7 +200,10 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = file_based_pii_detect(saved_path_list, strategy, enable_ray=enable_ray) - return {"status": 200, "message": json.dumps(ret)} + result = {"status": 200, "message": json.dumps(ret)} + if logflag: + logger.info(result) + return result except Exception as e: raise HTTPException(status_code=400, detail=f"An error occurred: {e}") @@ -164,7 +216,10 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = text_based_pii_detect(text_list, strategy, enable_ray=enable_ray) - return {"status": 200, "message": json.dumps(ret)} + result = {"status": 200, "message": json.dumps(ret)} + if logflag: + logger.info(result) + return result except json.JSONDecodeError: raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") except Exception as e: @@ -179,7 +234,10 @@ async def pii_detection(files: List[UploadFile] = File(None), link_list: str = F if enable_ray: prepare_env(enable_ray=enable_ray, pip_requirements=pip_requirement, comps_path=comps_path) ret = link_based_pii_detect(link_list, strategy, enable_ray=enable_ray) - return {"status": 200, "message": json.dumps(ret)} + result = {"status": 200, "message": json.dumps(ret)} + if logflag: + logger.info(result) + return result except json.JSONDecodeError: raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") except Exception as e: diff --git a/comps/guardrails/toxicity_detection/README.md b/comps/guardrails/toxicity_detection/README.md new file mode 100644 index 000000000..caeda6726 --- /dev/null +++ b/comps/guardrails/toxicity_detection/README.md @@ -0,0 +1,88 @@ +# โ˜ฃ๏ธ๐Ÿ’ฅ๐Ÿ›ก๏ธToxicity Detection Microservice + +## Introduction + +Toxicity Detection Microservice allows AI Application developers to safeguard user input and LLM output from harmful language in a RAG environment. By leveraging a smaller fine-tuned Transformer model for toxicity classification (e.g. DistilledBERT, RoBERTa, etc.), we maintain a lightweight guardrails microservice without significantly sacrificing performance making it readily deployable on both Intel Gaudi and Xeon. + +Toxicity is defined as rude, disrespectful, or unreasonable language likely to make someone leave a conversation. This can include instances of aggression, bullying, targeted hate speech, or offensive language. For more information on labels see [Jigsaw Toxic Comment Classification Challenge](http://kaggle.com/c/jigsaw-toxic-comment-classification-challenge). + +## Future Development + +- Add a RoBERTa (125M params) toxicity model fine-tuned on Gaudi2 with ToxicChat and Jigsaw dataset in an optimized serving framework. + +# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start Toxicity Detection Microservice with Python Script + +```bash +python toxicity_detection.py +``` + +# ๐Ÿš€2. Start Microservice with Docker (Option 2) + +## 2.1 Prepare toxicity detection model + +export HUGGINGFACEHUB_API_TOKEN=${HP_TOKEN} + +## 2.2 Build Docker Image + +```bash +cd ../../../ # back to GenAIComps/ folder +docker build -t opea/guardrails-toxicity-detection:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/toxicity_detection/docker/Dockerfile . +``` + +## 2.3 Run Docker Container with Microservice + +```bash +docker run -d --rm --runtime=runc --name="guardrails-toxicity-detection-endpoint" -p 9091:9091 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-toxicity-detection:latest +``` + +# ๐Ÿš€3. Get Status of Microservice + +```bash +docker container logs -f guardrails-toxicity-detection-endpoint +``` + +# ๐Ÿš€4. Consume Microservice Pre-LLM/Post-LLM + +Once microservice starts, users can use examples (bash or python) below to apply toxicity detection for both user's query (Pre-LLM) or LLM's response (Post-LLM) + +**Bash:** + +```bash +curl localhost:9091/v1/toxicity + -X POST + -d '{"text":"How to poison my neighbor'\''s dog without being caught?"}' + -H 'Content-Type: application/json' +``` + +Example Output: + +```bash +"\nI'm sorry, but your query or LLM's response is TOXIC with an score of 0.97 (0-1)!!!\n" +``` + +**Python Script:** + +```python +import requests +import json + +proxies = {"http": ""} +url = "http://localhost:9091/v1/toxicity" +data = {"text": "How to poison my neighbor'''s dog without being caught?"} + +try: + resp = requests.post(url=url, data=data, proxies=proxies) + print(resp.text) + resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes + print("Request successful!") +except requests.exceptions.RequestException as e: + print("An error occurred:", e) +``` diff --git a/comps/guardrails/toxicity_detection/docker/Dockerfile b/comps/guardrails/toxicity_detection/docker/Dockerfile new file mode 100644 index 000000000..369b5e5af --- /dev/null +++ b/comps/guardrails/toxicity_detection/docker/Dockerfile @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/guardrails/toxicity_detection/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/guardrails/toxicity_detection/ + +ENTRYPOINT ["python", "toxicity_detection.py"] diff --git a/comps/guardrails/toxicity_detection/requirements.txt b/comps/guardrails/toxicity_detection/requirements.txt new file mode 100644 index 000000000..64bfa169c --- /dev/null +++ b/comps/guardrails/toxicity_detection/requirements.txt @@ -0,0 +1,15 @@ +aiohttp +docarray[full] +fastapi +httpx +huggingface_hub +langchain-community +langchain-huggingface +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pyyaml +requests +shortuuid +uvicorn diff --git a/comps/guardrails/toxicity_detection/toxicity_detection.py b/comps/guardrails/toxicity_detection/toxicity_detection.py new file mode 100644 index 000000000..df965505f --- /dev/null +++ b/comps/guardrails/toxicity_detection/toxicity_detection.py @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from transformers import pipeline + +from comps import ServiceType, TextDoc, opea_microservices, register_microservice + + +@register_microservice( + name="opea_service@toxicity_detection", + service_type=ServiceType.GUARDRAIL, + endpoint="/v1/toxicity", + host="0.0.0.0", + port=9091, + input_datatype=TextDoc, + output_datatype=TextDoc, +) +def llm_generate(input: TextDoc): + input_text = input.text + toxic = toxicity_pipeline(input_text) + print("done") + if toxic[0]["label"] == "toxic": + return TextDoc(text="Violated policies: toxicity, please check your input.", downstream_black_list=[".*"]) + else: + return TextDoc(text=input_text) + + +if __name__ == "__main__": + model = "citizenlab/distilbert-base-multilingual-cased-toxicity" + toxicity_pipeline = pipeline("text-classification", model=model, tokenizer=model) + opea_microservices["opea_service@toxicity_detection"].start() diff --git a/comps/intent_detection/README.md b/comps/intent_detection/README.md new file mode 100644 index 000000000..fa9062bb6 --- /dev/null +++ b/comps/intent_detection/README.md @@ -0,0 +1,99 @@ +# Intent Detection Microservice by TGI + +# ๐Ÿš€1. Start Microservice with Python๏ผˆOption 1๏ผ‰ + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start TGI Service + +```bash +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms" +docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} +``` + +## 1.3 Verify the TGI Service + +```bash +curl http://${your_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +## 1.4 Setup Environment Variables + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:8008" +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/intent" +``` + +## 1.5 Start Intent Detection Microservice with Python Script + +Start intent detection microservice with below command. + +```bash +cd /your_project_path/GenAIComps/ +cp comps/intent_detection/langchain/intent_detection.py . +python intent_detection.py +``` + +# ๐Ÿš€2. Start Microservice with Docker (Option 2) + +## 2.1 Start TGI Service + +Please refer to 1.2. + +## 2.2 Setup Environment Variables + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:8008" +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/intent" +``` + +## 2.3 Build Docker Image + +```bash +cd /your_project_path/GenAIComps +docker build --no-cache -t opea/llm-tgi:latest -f comps/intent_detection/langchain/Dockerfile . +``` + +## 2.4 Run Docker with CLI (Option A) + +```bash +docker run -it --name="intent-tgi-server" --net=host --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/llm-tgi:latest +``` + +## 2.5 Run with Docker Compose (Option B) + +```bash +cd /your_project_path/GenAIComps/comps/intent_detection/langchain +export LLM_MODEL_ID=${your_hf_llm_model} +export http_proxy=${your_http_proxy} +export https_proxy=${your_http_proxy} +export TGI_LLM_ENDPOINT="http://tgi-service:80" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export LANGCHAIN_API_KEY=${your_langchain_api_key} +docker compose -f docker_compose_intent.yaml up -d +``` + +# ๐Ÿš€3. Consume Microservice + +Once intent detection microservice is started, user can use below command to invoke the microservice. + +```bash +curl http://${your_ip}:9000/v1/chat/intent\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"streaming":false}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/intent_detection/langchain/Dockerfile b/comps/intent_detection/langchain/Dockerfile new file mode 100644 index 000000000..297b1b88f --- /dev/null +++ b/comps/intent_detection/langchain/Dockerfile @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/intent_detection/langchain/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/intent_detection/langchain +ENTRYPOINT ["python", "intent_detection.py"] diff --git a/comps/intent_detection/langchain/docker_compose_intent.yaml b/comps/intent_detection/langchain/docker_compose_intent.yaml new file mode 100644 index 000000000..2a1524273 --- /dev/null +++ b/comps/intent_detection/langchain/docker_compose_intent.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + tgi_service: + image: ghcr.io/huggingface/text-generation-inference:1.4 + container_name: tgi-service + ports: + - "8008:80" + volumes: + - "./data:/data" + shm_size: 1g + command: --model-id ${LLM_MODEL_ID} + llm: + image: opea/llm-tgi:latest + container_name: intent-tgi-server + ports: + - "9000:9000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/intent_detection/langchain/intent_detection.py b/comps/intent_detection/langchain/intent_detection.py new file mode 100644 index 000000000..bf2e430c6 --- /dev/null +++ b/comps/intent_detection/langchain/intent_detection.py @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from langchain import LLMChain, PromptTemplate +from langchain_community.llms import HuggingFaceEndpoint +from langsmith import traceable + +from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps.intent_detection.langchain.template import IntentTemplate + + +@register_microservice( + name="opea_service@llm_intent", + service_type=ServiceType.LLM, + endpoint="/v1/chat/intent", + host="0.0.0.0", + port=9000, +) +@traceable(run_type="llm") +def llm_generate(input: LLMParamsDoc): + llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") + llm = HuggingFaceEndpoint( + endpoint_url=llm_endpoint, + max_new_tokens=input.max_new_tokens, + top_k=input.top_k, + top_p=input.top_p, + typical_p=input.typical_p, + temperature=input.temperature, + repetition_penalty=input.repetition_penalty, + streaming=input.streaming, + timeout=600, + ) + + prompt = PromptTemplate(template=IntentTemplate.generate_intent_template, input_variables=["query"]) + + llm_chain = LLMChain(prompt=prompt, llm=llm) + + response = llm_chain.invoke(input.query) + response = response["text"] + print("response", response) + return GeneratedDoc(text=response, prompt=input.query) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_intent"].start() diff --git a/comps/intent_detection/langchain/requirements.txt b/comps/intent_detection/langchain/requirements.txt new file mode 100644 index 000000000..55cf47ae7 --- /dev/null +++ b/comps/intent_detection/langchain/requirements.txt @@ -0,0 +1,9 @@ +docarray[full] +fastapi +huggingface_hub +langchain==0.1.16 +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +shortuuid diff --git a/comps/intent_detection/langchain/template.py b/comps/intent_detection/langchain/template.py new file mode 100644 index 000000000..1a425ce43 --- /dev/null +++ b/comps/intent_detection/langchain/template.py @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +class IntentTemplate: + def generate_intent_template(query): + return f"""Please identify the intent of the user query. You may only respond with "chitchat" or "QA" without explanations or engaging in conversation. +### User Query: {query}, ### Response: """ diff --git a/comps/knowledgegraphs/README.md b/comps/knowledgegraphs/README.md index 248e46eba..db8140b9c 100755 --- a/comps/knowledgegraphs/README.md +++ b/comps/knowledgegraphs/README.md @@ -16,9 +16,9 @@ A prerequisite for using this microservice is that users must have a knowledge g Overall, this microservice provides efficient support for applications related with graph dataset, especially for answering multi-part questions, or any other conditions including comples relationship between entities. -# ๐Ÿš€1. Start Microservice with Docker +## ๐Ÿš€1. Start Microservice with Docker -## 1.1 Setup Environment Variables +### 1.1 Setup Environment Variables ```bash export NEO4J_ENDPOINT="neo4j://${your_ip}:7687" @@ -30,7 +30,7 @@ export LLM_MODEL="meta-llama/Llama-2-7b-hf" export AGENT_LLM="HuggingFaceH4/zephyr-7b-beta" ``` -## 1.2 Start Neo4j Service +### 1.2 Start Neo4j Service ```bash docker pull neo4j @@ -43,7 +43,7 @@ docker run --rm \ neo4j ``` -## 1.3 Start LLM Service for "rag"/"query" mode +### 1.3 Start LLM Service for "rag"/"query" mode You can start any LLM microserve, here we take TGI as an example. @@ -70,7 +70,7 @@ curl $LLM_ENDPOINT/generate \ -H 'Content-Type: application/json' ``` -## 1.4 Start Microservice +### 1.4 Start Microservice ```bash cd ../.. @@ -93,9 +93,9 @@ docker run --rm \ opea/knowledge_graphs:latest ``` -# ๐Ÿš€2. Consume Knowledge Graph Service +## ๐Ÿš€2. Consume Knowledge Graph Service -## 2.1 Cypher mode +### 2.1 Cypher mode ```bash curl http://${your_ip}:8060/v1/graphs \ @@ -107,7 +107,7 @@ curl http://${your_ip}:8060/v1/graphs \ Example output: ![Cypher Output](doc/output_cypher.png) -## 2.2 Rag mode +### 2.2 Rag mode ```bash curl http://${your_ip}:8060/v1/graphs \ @@ -119,7 +119,7 @@ curl http://${your_ip}:8060/v1/graphs \ Example output: ![Cypher Output](doc/output_rag.png) -## 2.3 Query mode +### 2.3 Query mode First example: diff --git a/comps/knowledgegraphs/langchain/docker/Dockerfile b/comps/knowledgegraphs/langchain/docker/Dockerfile index dd96f0dbe..655b44d24 100755 --- a/comps/knowledgegraphs/langchain/docker/Dockerfile +++ b/comps/knowledgegraphs/langchain/docker/Dockerfile @@ -7,8 +7,7 @@ ARG ARCH="cpu" # Set this to "cpu" or "gpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/knowledgegraphs/langchain/knowledge_graph.py b/comps/knowledgegraphs/langchain/knowledge_graph.py index 9ed2c5b65..3b5b2882e 100755 --- a/comps/knowledgegraphs/langchain/knowledge_graph.py +++ b/comps/knowledgegraphs/langchain/knowledge_graph.py @@ -22,9 +22,11 @@ from langchain_community.graphs import Neo4jGraph from langchain_community.llms import HuggingFaceEndpoint from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langsmith import traceable -from comps import GeneratedDoc, GraphDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, GraphDoc, ServiceType, opea_microservices, register_microservice + +logger = CustomLogger("knowledge_graph") +logflag = os.getenv("LOGFLAG", False) def get_retriever(input, neo4j_endpoint, neo4j_username, neo4j_password, llm): @@ -106,7 +108,8 @@ def get_agent(vector_qa, cypher_chain, llm_repo_id): port=8060, ) def graph_query(input: GraphDoc) -> GeneratedDoc: - print(input) + if logflag: + logger.info(input) ## Connect to Neo4j neo4j_endpoint = os.getenv("NEO4J_ENDPOINT", "neo4j://localhost:7687") @@ -155,6 +158,8 @@ def graph_query(input: GraphDoc) -> GeneratedDoc: result = agent_executor.invoke({"input": input.text})["output"] else: result = "Please specify strtype as one of cypher, rag, query." + if logflag: + logger.info(result) return GeneratedDoc(text=result, prompt=input.text) diff --git a/comps/knowledgegraphs/requirements.txt b/comps/knowledgegraphs/requirements.txt index ecb5228af..39b351530 100755 --- a/comps/knowledgegraphs/requirements.txt +++ b/comps/knowledgegraphs/requirements.txt @@ -8,7 +8,6 @@ langchain langchain_community==0.2.5 langchain_openai langchainhub -langsmith neo4j numpy opentelemetry-api diff --git a/comps/llms/README.md b/comps/llms/README.md index 64f7678df..86fc79e7a 100644 --- a/comps/llms/README.md +++ b/comps/llms/README.md @@ -6,19 +6,19 @@ A prerequisite for using this microservice is that users must have a LLM text ge Overall, this microservice offers a streamlined way to integrate large language model inference into applications, requiring minimal setup from the user beyond initiating a TGI/vLLM/Ray/PredictionGuard service and configuring the necessary environment variables. This allows for the seamless processing of queries and documents to generate intelligent, context-aware responses. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the LLM microservice, you need to install python packages first. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start LLM Service +### 1.2 Start LLM Service -### 1.2.1 Start TGI Service +#### 1.2.1 Start TGI Service ```bash export HF_TOKEN=${your_hf_api_token} @@ -28,28 +28,28 @@ export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms" docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} ``` -### 1.2.2 Start vLLM Service +#### 1.2.2 Start vLLM Service ```bash export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" +docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data opea/vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80" ``` -## 1.2.3 Start Ray Service +### 1.2.3 Start Ray Service ```bash export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export TRUST_REMOTE_CODE=True -docker run -it --runtime=habana --name ray_serve_service -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -p 8008:80 -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE ray_serve:habana /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path ${your_hf_llm_model} --chat_processor ${your_hf_chatprocessor}" +docker run -it --runtime=habana --name ray_serve_service -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -p 8008:80 -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE opea/llm-ray:latest /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path ${your_hf_llm_model} --chat_processor ${your_hf_chatprocessor}" ``` ## 1.2.3 Start Prediction Guard Service Not Applicable if using open access LLMs already hosted in Intel Tiber Developer Cloud on Gaudi (or your self-hosted version of Prediction Guard). You will only need your Prediction Guard API key, which will be referenced below. -## 1.3 Verify the LLM Service +### 1.3 Verify the LLM Service -### 1.3.1 Verify the TGI Service +#### 1.3.1 Verify the TGI Service ```bash curl http://${your_ip}:8008/generate \ @@ -58,7 +58,7 @@ curl http://${your_ip}:8008/generate \ -H 'Content-Type: application/json' ``` -### 1.3.2 Verify the vLLM Service +#### 1.3.2 Verify the vLLM Service ```bash curl http://${your_ip}:8008/v1/completions \ @@ -71,7 +71,7 @@ curl http://${your_ip}:8008/v1/completions \ }' ``` -### 1.3.3 Verify the Ray Service +#### 1.3.3 Verify the Ray Service ```bash curl http://${your_ip}:8008/v1/chat/completions \ @@ -109,23 +109,23 @@ curl -X POST https://api.predictionguard.com/chat/completions - Streaming Versio }' ``` -## 1.4 Start LLM Service with Python Script +### 1.4 Start LLM Service with Python Script -### 1.4.1 Start the TGI Service +#### 1.4.1 Start the TGI Service ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:8008" python text-generation/tgi/llm.py ``` -### 1.4.2 Start the vLLM Service +#### 1.4.2 Start the vLLM Service ```bash export vLLM_LLM_ENDPOINT="http://${your_ip}:8008" python text-generation/vllm/llm.py ``` -### 1.4.3 Start the Ray Service +#### 1.4.3 Start the Ray Service ```bash export RAY_Serve_ENDPOINT="http://${your_ip}:8008" @@ -139,11 +139,11 @@ export PREDICTIONGUARD_API_KEY={your_pg_api_key} python text-generation/predictionguard/llm_predictionguard.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -183,16 +183,16 @@ In order to start Prediction Guard and LLM services, you need to setup the follo export PREDICTIONGUARD_API_KEY={your_pg_api_key} ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image -### 2.2.1 TGI +#### 2.2.1 TGI ```bash cd ../../ docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . ``` -### 2.2.2 vLLM +#### 2.2.2 vLLM Build vllm docker. @@ -207,7 +207,7 @@ cd ../../../../ docker build -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice . ``` -### 2.2.3 Ray Serve +#### 2.2.3 Ray Serve Build Ray Serve docker. @@ -303,9 +303,9 @@ cd text-genetation/ray_serve docker compose -f docker_compose_llm.yaml up -d ``` -# ๐Ÿš€3. Consume LLM Service +## ๐Ÿš€3. Consume LLM Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:9000/v1/health_check\ @@ -313,7 +313,7 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume LLM Service +### 3.2 Consume LLM Service You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`. @@ -333,7 +333,7 @@ curl http://${your_ip}:9000/v1/chat/completions \ -H 'Content-Type: application/json' ``` -## 4. Validated Model +### 4. Validated Model | Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | Ray | Prediction Guard | | ---------------------------- | --------- | -------- | ---------- | --- | ---------------- | diff --git a/comps/llms/faq-generation/tgi/Dockerfile b/comps/llms/faq-generation/tgi/Dockerfile index 0c1ee8e7e..0d6bb9d61 100644 --- a/comps/llms/faq-generation/tgi/Dockerfile +++ b/comps/llms/faq-generation/tgi/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -24,4 +22,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/llms/faq-generation/tgi -ENTRYPOINT ["python", "llm.py"] +ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/faq-generation/tgi/README.md b/comps/llms/faq-generation/tgi/README.md index 45f1ad1bd..9673ee7fe 100644 --- a/comps/llms/faq-generation/tgi/README.md +++ b/comps/llms/faq-generation/tgi/README.md @@ -2,11 +2,11 @@ This microservice interacts with the TGI LLM server to generate FAQs from Input Text.[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. -# ๐Ÿš€1. Start Microservice with Docker +## ๐Ÿš€1. Start Microservice with Docker If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI service with docker. -## 1.1 Setup Environment Variables +### 1.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -18,7 +18,7 @@ export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} ``` -## 1.2 Build Docker Image +### 1.2 Build Docker Image ```bash cd ../../../../ @@ -32,7 +32,7 @@ To start a docker container, you have two options: You can choose one as needed. -## 1.3 Run Docker with CLI (Option A) +### 1.3 Run Docker with CLI (Option A) ```bash docker run -d -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${LLM_MODEL_ID} @@ -42,16 +42,16 @@ docker run -d -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.i docker run -d --name="llm-faqgen-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/llm-faqgen-tgi:latest ``` -## 1.4 Run Docker with Docker Compose (Option B) +### 1.4 Run Docker with Docker Compose (Option B) ```bash cd text-generation/tgi docker compose -f docker_compose_llm.yaml up -d ``` -# ๐Ÿš€3. Consume LLM Service +## ๐Ÿš€3. Consume LLM Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:9000/v1/health_check\ @@ -59,7 +59,7 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume FAQGen LLM Service +### 3.2 Consume FAQGen LLM Service ```bash curl http://${your_ip}:9000/v1/faqgen \ diff --git a/comps/llms/faq-generation/tgi/entrypoint.sh b/comps/llms/faq-generation/tgi/entrypoint.sh new file mode 100644 index 000000000..d60eddd36 --- /dev/null +++ b/comps/llms/faq-generation/tgi/entrypoint.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +pip --no-cache-dir install -r requirements-runtime.txt + +python llm.py diff --git a/comps/llms/faq-generation/tgi/llm.py b/comps/llms/faq-generation/tgi/llm.py index af23a74af..0b4d70e85 100644 --- a/comps/llms/faq-generation/tgi/llm.py +++ b/comps/llms/faq-generation/tgi/llm.py @@ -9,12 +9,13 @@ from langchain.prompts import PromptTemplate from langchain.text_splitter import CharacterTextSplitter from langchain_community.llms import HuggingFaceEndpoint -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + +logger = CustomLogger("llm_faqgen") +logflag = os.getenv("LOGFLAG", False) -@traceable(run_type="tool") def post_process_text(text: str): if text == " ": return "data: @#$\n\n" @@ -33,8 +34,9 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") llm = HuggingFaceEndpoint( endpoint_url=llm_endpoint, @@ -74,6 +76,8 @@ async def stream_generator(): else: response = llm_chain.invoke(input.query) response = response["result"].split("")[0].split("\n")[0] + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/llms/faq-generation/tgi/requirements-runtime.txt b/comps/llms/faq-generation/tgi/requirements-runtime.txt new file mode 100644 index 000000000..225adde27 --- /dev/null +++ b/comps/llms/faq-generation/tgi/requirements-runtime.txt @@ -0,0 +1 @@ +langserve diff --git a/comps/llms/faq-generation/tgi/requirements.txt b/comps/llms/faq-generation/tgi/requirements.txt index 623a8f667..78b323c73 100644 --- a/comps/llms/faq-generation/tgi/requirements.txt +++ b/comps/llms/faq-generation/tgi/requirements.txt @@ -2,8 +2,6 @@ docarray[full] fastapi huggingface_hub langchain==0.1.16 -langserve -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/summarization/tgi/Dockerfile b/comps/llms/summarization/tgi/Dockerfile index c1e1fdcca..da449312b 100644 --- a/comps/llms/summarization/tgi/Dockerfile +++ b/comps/llms/summarization/tgi/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/llms/summarization/tgi/README.md b/comps/llms/summarization/tgi/README.md index 9e5858b4b..c8cc9c44a 100644 --- a/comps/llms/summarization/tgi/README.md +++ b/comps/llms/summarization/tgi/README.md @@ -3,24 +3,24 @@ In this microservice, we utilize LangChain to implement summarization strategies and facilitate LLM inference using Text Generation Inference on Intel Xeon and Gaudi2 processors. [Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the LLM microservice, you need to install python packages first. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start LLM Service +### 1.2 Start LLM Service ```bash export HF_TOKEN=${your_hf_api_token} docker run -p 8008:80 -v ./data:/data --name llm-docsum-tgi --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} ``` -## 1.3 Verify the TGI Service +### 1.3 Verify the TGI Service ```bash curl http://${your_ip}:8008/generate \ @@ -29,18 +29,18 @@ curl http://${your_ip}:8008/generate \ -H 'Content-Type: application/json' ``` -## 1.4 Start LLM Service with Python Script +### 1.4 Start LLM Service with Python Script ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:8008" python llm.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -50,7 +50,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008" export LLM_MODEL_ID=${your_hf_llm_model} ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ @@ -64,21 +64,21 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="llm-docsum-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-docsum-tgi:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash docker compose -f docker_compose_llm.yaml up -d ``` -# ๐Ÿš€3. Consume LLM Service +## ๐Ÿš€3. Consume LLM Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:9000/v1/health_check\ @@ -86,7 +86,7 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume LLM Service +### 3.2 Consume LLM Service ```bash curl http://${your_ip}:9000/v1/chat/docsum \ diff --git a/comps/llms/summarization/tgi/llm.py b/comps/llms/summarization/tgi/llm.py index a9db6248d..80c5d3924 100644 --- a/comps/llms/summarization/tgi/llm.py +++ b/comps/llms/summarization/tgi/llm.py @@ -8,12 +8,13 @@ from langchain.docstore.document import Document from langchain.text_splitter import CharacterTextSplitter from langchain_huggingface import HuggingFaceEndpoint -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + +logger = CustomLogger("llm_docsum") +logflag = os.getenv("LOGFLAG", False) -@traceable(run_type="tool") def post_process_text(text: str): if text == " ": return "data: @#$\n\n" @@ -32,8 +33,9 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") llm = HuggingFaceEndpoint( endpoint_url=llm_endpoint, @@ -60,7 +62,8 @@ async def stream_generator(): _serializer = WellKnownLCSerializer() async for chunk in llm_chain.astream_log(docs): data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") - print(f"[docsum - text_summarize] data: {data}") + if logflag: + logger.info(f"[docsum - text_summarize] data: {data}") yield f"data: {data}\n\n" yield "data: [DONE]\n\n" @@ -68,6 +71,8 @@ async def stream_generator(): else: response = llm_chain.invoke(input.query) response = response["result"].split("")[0].split("\n")[0] + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/llms/summarization/tgi/requirements.txt b/comps/llms/summarization/tgi/requirements.txt index c6c151f6e..e074ba8c8 100644 --- a/comps/llms/summarization/tgi/requirements.txt +++ b/comps/llms/summarization/tgi/requirements.txt @@ -6,7 +6,6 @@ langchain-huggingface langchain-openai langchain_community langchainhub -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/native/Dockerfile b/comps/llms/text-generation/native/Dockerfile deleted file mode 100644 index 9d7d1e094..000000000 --- a/comps/llms/text-generation/native/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# HABANA environment -FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest as hpu - -ENV LANG=en_US.UTF-8 -ARG REPO=https://github.com/huggingface/optimum-habana.git -ARG REPO_VER=v1.11.1 - -RUN apt-get update && \ - apt-get install git-lfs && \ - git-lfs install && \ - apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev \ - vim - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY comps /home/user/comps -COPY comps/llm/text-generation/qwen2/qwen2.patch /home/user/qwen2.patch - -SHELL ["/bin/bash", "--login", "-c"] -RUN git clone --single-branch -b ${REPO_VER} ${REPO} /optimum-habana - -ENV PYTHONPATH=/root:/home/user - -RUN cd /optimum-habana && git apply /qwen2.patch && \ - cd /optimum-habana/examples/text-generation && pip install -r requirements.txt && \ - cd /optimum-habana && python setup.py install - -WORKDIR /home/user/comps/llms/text-generation/qwen2 - -ENTRYPOINT ["python", "llm.py"] diff --git a/comps/llms/text-generation/native/README.md b/comps/llms/text-generation/native/README.md new file mode 100644 index 000000000..a4fcc74c3 --- /dev/null +++ b/comps/llms/text-generation/native/README.md @@ -0,0 +1,61 @@ +# LLM Native Microservice + +LLM Native microservice uses [optimum-habana](https://github.com/huggingface/optimum-habana) for model initialization and warm-up, focusing solely on large language models (LLMs). It operates without frameworks like TGI/VLLM, using PyTorch directly for inference, and supports only non-streaming formats. This streamlined approach optimizes performance on Habana hardware. + +## ๐Ÿš€1. Start Microservice + +If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a Native LLM service with docker. + +### 1.1 Setup Environment Variables + +In order to start Native LLM service, you need to setup the following environment variables first. + +```bash +export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" +``` + +### 1.2 Build Docker Image + +```bash +cd ../../../../ +docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/docker/Dockerfile . +``` + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +### 1.3 Run Docker with CLI (Option A) + +```bash +docker run -d --runtime=habana --name="llm-native-server" -p 9000:9000 -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e TOKENIZERS_PARALLELISM=false -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} opea/llm-native:latest +``` + +### 1.4 Run Docker with Docker Compose (Option B) + +```bash +cd docker +docker compose -f docker_compose_llm.yaml up -d +``` + +## ๐Ÿš€2. Consume LLM Service + +### 2.1 Check Service Status + +```bash +curl http://${your_ip}:9000/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' +``` + +### 2.2 Consume LLM Service + +```bash +curl http://${your_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/llms/text-generation/native/docker/Dockerfile b/comps/llms/text-generation/native/docker/Dockerfile new file mode 100644 index 000000000..f7d32cdb9 --- /dev/null +++ b/comps/llms/text-generation/native/docker/Dockerfile @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu + +ENV LANG=en_US.UTF-8 +ARG REPO=https://github.com/huggingface/optimum-habana.git +ARG REPO_VER=v1.12.1 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + git-lfs \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +RUN git lfs install + +COPY comps /home/user/comps + +RUN pip install --upgrade-strategy eager optimum[habana] && \ + pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + +RUN git clone ${REPO} /home/user/optimum-habana && \ + cd /home/user/optimum-habana && git checkout ${REPO_VER} && \ + cd examples/text-generation && pip install -r requirements.txt && \ + cd /home/user/comps/llms/text-generation/native && pip install -r requirements.txt && \ + pip install --upgrade --force-reinstall pydantic + +ENV PYTHONPATH=/root:/home/user + +WORKDIR /home/user/comps/llms/text-generation/native + +ENTRYPOINT ["python", "llm.py"] diff --git a/comps/llms/text-generation/native/docker/docker_compose_llm.yaml b/comps/llms/text-generation/native/docker/docker_compose_llm.yaml new file mode 100644 index 000000000..f3a36e5bb --- /dev/null +++ b/comps/llms/text-generation/native/docker/docker_compose_llm.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + llm: + image: opea/llm-native:latest + container_name: llm-native-server + ports: + - "9000:9000" + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + TOKENIZERS_PARALLELISM: false + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/llms/text-generation/native/llm.py b/comps/llms/text-generation/native/llm.py index 4f407ccd6..6008a91b8 100644 --- a/comps/llms/text-generation/native/llm.py +++ b/comps/llms/text-generation/native/llm.py @@ -11,87 +11,161 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sys +sys.path.append("/test/GenAIComps/") + +import logging import os -from datetime import datetime +import threading +import time import torch -from fastapi.responses import StreamingResponse -from langsmith import traceable +from langchain_core.prompts import PromptTemplate +from template import ChatTemplate, args_dict, input_sentences from utils import initialize_model -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import ( + GeneratedDoc, + LLMParamsDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, +) +logflag = os.getenv("LOGFLAG", False) -def warmup(): - input_sentences = ["DeepSpeed is a machine learning framework", "He is working on", "He has a", "He got all"] - input_tokens = tokenizer.batch_encode_plus(input_sentences, return_tensors="pt", padding=True) - for t in input_tokens: - if torch.is_tensor(input_tokens[t]): - input_tokens[t] = input_tokens[t].to("hpu") - for i in range(3): - print(f"Current time: {datetime.now()}") - print(f"Warming up {i+1}...") - outputs = model.generate( - **input_tokens, - generation_config=generation_config, - lazy_mode=True, - hpu_graphs=True, - profiling_steps=0, - profiling_warmup_steps=0, - ).cpu() - res = tokenizer.batch_decode(outputs, skip_special_tokens=True) - print(f"res: {res}") +logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) -@register_microservice( - name="opea_service@llm_qwen", - service_type=ServiceType.LLM, - endpoint="/v1/chat/completions", - host="0.0.0.0", - port=8000, -) -@traceable(run_type="llm") -def llm_generate(input: LLMParamsDoc): - input_query = input.query - input_tokens = tokenizer.batch_encode_plus([input_query], return_tensors="pt", padding=True) +class Args: + def __init__(self, **entries): + self.__dict__.update(entries) + + +model = None +assistant_model = None +tokenizer = None +generation_config = None +args = Args(**args_dict) +initialization_lock = threading.Lock() +initialized = False + + +def generate( + input_query: list, + device="hpu", + use_lazy_mode=True, + use_hpu_graphs=True, + profiling_steps=0, + profiling_warmup_steps=0, + ignore_eos=True, + profiling_record_shapes=False, +): + """Generates sequences from the input sentences and returns them.""" + logger.info(f"[llm - generate] starting to inference with prompt {input_query}") + encode_t0 = time.perf_counter() + + # Tokenization + input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True) + encode_duration = time.perf_counter() - encode_t0 + logger.info(f"[llm - generate] input tokenized: {input_tokens}") + + # Move inputs to target device(s) for t in input_tokens: + logger.info(f"[llm - generate] t: {t}") if torch.is_tensor(input_tokens[t]): - input_tokens[t] = input_tokens[t].to("hpu") + logger.info("[llm - generate] input[t] is tensor") + logger.info(f"[llm - generate] device: {model.device}") + input_tokens[t] = input_tokens[t].to(model.device) + + logger.info("[llm - generate] inputs transferred.") - print(f"[llm - qwen] Current time: {datetime.now()}") - output = model.generate( + iteration_times = [] + outputs = model.generate( **input_tokens, generation_config=generation_config, - lazy_mode=True, - hpu_graphs=True, - profiling_steps=0, - profiling_warmup_steps=0, + assistant_model=assistant_model, + lazy_mode=use_lazy_mode, + hpu_graphs=use_hpu_graphs, + profiling_steps=profiling_steps, + profiling_warmup_steps=profiling_warmup_steps, + ignore_eos=ignore_eos, + iteration_times=iteration_times, + profiling_record_shapes=profiling_record_shapes, ).cpu() - res = tokenizer.batch_decode(output, skip_special_tokens=True)[0] - print(f"[llm - qwen] res: {res}") - return res + logger.info("[llm - generate] result generated") + first_token_time = iteration_times[0] + encode_duration + result = tokenizer.batch_decode(outputs, skip_special_tokens=True) + logger.info(f"[llm - generate] result: {result}") + logger.info(f"[llm - generate] Time to first token = {first_token_time*1000}ms") + return result -if __name__ == "__main__": - model, tokenizer, generation_config = initialize_model( - model_name_or_path="Qwen/Qwen1.5-7B-Chat", max_new_tokens=128 - ) - import habana_frameworks.torch.hpu as torch_hpu +def initialize(): + global model, assistant_model, tokenizer, generation_config, initialized + with initialization_lock: + if not initialized: + # initialize model and tokenizer + import habana_frameworks.torch.hpu as torch_hpu + from optimum.habana.utils import HabanaProfile + + model, assistant_model, tokenizer, generation_config = initialize_model(args, logger) + logger.info("[llm] model and tokenizer initialized.") + + # compilation and model warmup + HabanaProfile.disable() + logger.info("[llm - native] Graph compilation...") + for _ in range(args.warmup): + generate(input_sentences) + logger.info("[llm - native] model warm up finished.") + torch_hpu.synchronize() + HabanaProfile.enable() + logger.info("[llm - native] Ready to inference") + res = generate(["What is Deep Learning?"]) + logger.info(f"[llm - native] test result: {res}") + initialized = True - print("[llm - qwen] model and tokenizer initialized.") - from optimum.habana.utils import HabanaProfile +@register_microservice( + name="opea_service@llm_native", + service_type=ServiceType.LLM, + endpoint="/v1/chat/completions", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@llm_native"]) +def llm_generate(input: LLMParamsDoc): + initialize() + if logflag: + logger.info(input) + prompt = input.query + prompt_template = None + if input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.query) + else: + logger.info(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + else: + if input.documents: + prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) + res = generate([prompt]) - # compilation stage disable profiling - HabanaProfile.disable() - # Compilation - print("Graph compilation...") - warmup() - print("[llm - qwen] model warm up finished.") + if logflag: + logger.info(f"[llm - native] inference result: {res}") + return GeneratedDoc(text=res[0], prompt=input.query) - torch_hpu.synchronize() - HabanaProfile.enable() - print("[llm - qwen] Ready to inference") - opea_microservices["opea_service@llm_qwen"].start() +if __name__ == "__main__": + opea_microservices["opea_service@llm_native"].start() diff --git a/comps/llms/text-generation/native/qwen2.patch b/comps/llms/text-generation/native/qwen2.patch deleted file mode 100644 index 9b5d93567..000000000 --- a/comps/llms/text-generation/native/qwen2.patch +++ /dev/null @@ -1,127 +0,0 @@ -diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py -index b086c80..e0e5a9f 100644 ---- a/examples/text-generation/run_lm_eval.py -+++ b/examples/text-generation/run_lm_eval.py -@@ -75,13 +75,13 @@ class HabanaModelAdapter(lm_eval.base.BaseLM): - self.options = options - self._device = args.device - self.model_inputs = {"use_cache": self.options.use_cache} -- if self.model.config.model_type in ["llama", "falcon"]: -+ if self.model.config.model_type in ["llama", "falcon", "qwen2"]: - self.model_inputs.update( - { - "reuse_cache": self.options.reuse_cache, - } - ) -- if self.model.config.model_type == "llama": -+ if self.model.config.model_type in ["llama","mistral","qwen2"]: - self.model_inputs.update( - { - "attn_softmax_bf16": self.options.attn_softmax_bf16, -diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py -index 8bce0ae..c29f458 100644 ---- a/examples/text-generation/utils.py -+++ b/examples/text-generation/utils.py -@@ -234,7 +234,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger): - - model = deepspeed.init_inference(model, **ds_inference_kwargs) - model = model.module -- if model.config.model_type in ["llama", "falcon"]: -+ if model.config.model_type in ["llama", "falcon","qwen2"]: - patch_scoped_linear_all_reduce(model) - - if args.quant_config: -diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py -index 0d50470..94cc7eb 100755 ---- a/optimum/habana/transformers/generation/utils.py -+++ b/optimum/habana/transformers/generation/utils.py -@@ -740,7 +740,7 @@ class GaudiGenerationMixin(GenerationMixin): - ) - model_kwargs["kv_cache_len"] = calculated_max_length - -- if self.config.model_type in ["llama", "falcon"]: -+ if self.config.model_type in ["llama", "falcon","qwen2"]: - if self.config.max_position_embeddings < calculated_max_length: - unwrap_deepspeed_model(self).update_sincos_cache(seq_len=calculated_max_length) - -diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py -index 6dc40a7..b5044af 100644 ---- a/optimum/habana/transformers/modeling_utils.py -+++ b/optimum/habana/transformers/modeling_utils.py -@@ -55,6 +55,9 @@ from .models import ( - GaudiOPTForCausalLM, - GaudiOPTLearnedPositionalEmbedding, - GaudiPhiForCausalLM, -+ GaudiQwen2Model, -+ GaudiQwen2Attention, -+ GaudiQwen2MLP, - _gaudi_wav2vec2_compute_mask_indices, - _gaudi_wav2vec2_mask_hidden_states, - gaudi_albert_forward, -@@ -118,6 +121,7 @@ from .models import ( - gaudi_phi_attention_forward, - gaudi_phi_decoder_layer_forward, - gaudi_phi_model_forward, -+ gaudi_qwen2_rmsnorm_forward, - gaudi_rot_matmul, - gaudi_rot_vec_mul, - gaudi_SpeechT5Attention_forward, -@@ -367,3 +371,11 @@ def adapt_transformers_to_gaudi(): - transformers.models.speecht5.modeling_speecht5.SpeechT5SpeechDecoderPrenet.forward = ( - gaudi_SpeechT5SpeechDecoderPrenet_forward - ) -+ -+ # Optimization for qwen2 on Gaudi -+ transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM = GaudiQwen2ForCausalLM -+ transformers.models.qwen2.modeling_qwen2.Qwen2Model = GaudiQwen2Model -+ transformers.models.qwen2.modeling_qwen2.Qwen2Attention = GaudiQwen2Attention -+ transformers.models.qwen2.modeling_qwen2.Qwen2MLP = GaudiQwen2MLP -+ transformers.models.qwen2.modeling_qwen2.Qwen2DecoderLayer = GaudiQwen2DecoderLayer -+ transformers.models.qwen2.modeling_qwen2.Qwen2RMSNorm.forward = gaudi_qwen2_rmsnorm_forward -diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py -index 1582d3f..41fdfdc 100644 ---- a/optimum/habana/transformers/models/__init__.py -+++ b/optimum/habana/transformers/models/__init__.py -@@ -122,6 +122,14 @@ from .phi import ( - gaudi_phi_decoder_layer_forward, - gaudi_phi_model_forward, - ) -+from .qwen2 import ( -+ GaudiQwen2Attention, -+ GaudiQwen2DecoderLayer, -+ GaudiQwen2ForCausalLM, -+ GaudiQwen2MLP, -+ GaudiQwen2Model, -+ gaudi_qwen2_rmsnorm_forward, -+) - from .speecht5 import ( - gaudi_generate_speech, - gaudi_SpeechT5Attention_forward, -diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py -index dc6e136..7dfebaa 100644 ---- a/optimum/habana/transformers/trainer.py -+++ b/optimum/habana/transformers/trainer.py -@@ -916,9 +916,9 @@ class GaudiTrainer(Trainer): - if step % args.gradient_accumulation_steps == 0: - self.control = self.callback_handler.on_step_begin(args, self.state, self.control) - -- # attn_softmax_bf16 and use_flash_attention is enabled only for llama -+ # attn_softmax_bf16 and use_flash_attention is enabled only for llama and qwen2 - if hasattr(self.model, "generation_config") and self.model.generation_config is not None: -- if self.model.config.model_type == "llama": -+ if self.model.config.model_type in ["llama", "qwen2"]: - if self.model.generation_config.attn_softmax_bf16: - inputs["attn_softmax_bf16"] = True - if self.model.generation_config.use_flash_attention: -@@ -1799,9 +1799,9 @@ class GaudiTrainer(Trainer): - if batch_size is None: - batch_size = observed_batch_size - -- # attn_softmax_bf16 and use_flash_attention are enabled only for llama -+ # attn_softmax_bf16 and use_flash_attention are enabled only for llama and qwen2 - if hasattr(self.model, "generation_config") and self.model.generation_config is not None: -- if self.model.config.model_type == "llama": -+ if self.model.config.model_type in ["llama", "qwen2"]: - if self.model.generation_config.attn_softmax_bf16: - inputs["attn_softmax_bf16"] = True - if self.model.generation_config.use_flash_attention: diff --git a/comps/llms/text-generation/native/requirements.txt b/comps/llms/text-generation/native/requirements.txt index e8473a80c..806f2d29f 100644 --- a/comps/llms/text-generation/native/requirements.txt +++ b/comps/llms/text-generation/native/requirements.txt @@ -1,10 +1,10 @@ -docarray[full] +docarray fastapi -langsmith +httpx +langchain_core opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator shortuuid -transformers uvicorn diff --git a/comps/llms/text-generation/native/template.py b/comps/llms/text-generation/native/template.py new file mode 100644 index 000000000..c43205a0a --- /dev/null +++ b/comps/llms/text-generation/native/template.py @@ -0,0 +1,99 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import re + + +class ChatTemplate: + @staticmethod + def generate_rag_prompt(question, documents): + context_str = "\n".join(documents) + if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: + # chinese context + template = """ +### ไฝ ๅฐ†ๆ‰ฎๆผ”ไธ€ไธชไนไบŽๅŠฉไบบใ€ๅฐŠ้‡ไป–ไบบๅนถ่ฏšๅฎž็š„ๅŠฉๆ‰‹๏ผŒไฝ ็š„็›ฎๆ ‡ๆ˜ฏๅธฎๅŠฉ็”จๆˆท่งฃ็ญ”้—ฎ้ข˜ใ€‚ๆœ‰ๆ•ˆๅœฐๅˆฉ็”จๆฅ่‡ชๆœฌๅœฐ็Ÿฅ่ฏ†ๅบ“็š„ๆœ็ดข็ป“ๆžœใ€‚็กฎไฟไฝ ็š„ๅ›ž็ญ”ไธญๅชๅŒ…ๅซ็›ธๅ…ณไฟกๆฏใ€‚ๅฆ‚ๆžœไฝ ไธ็กฎๅฎš้—ฎ้ข˜็š„็ญ”ๆกˆ๏ผŒ่ฏท้ฟๅ…ๅˆ†ไบซไธๅ‡†็กฎ็š„ไฟกๆฏใ€‚ +### ๆœ็ดข็ป“ๆžœ๏ผš{context} +### ้—ฎ้ข˜๏ผš{question} +### ๅ›ž็ญ”๏ผš +""" + else: + template = """ +### You are a helpful, respectful and honest assistant to help the user with questions. \ +Please refer to the search results obtained from the local knowledge base. \ +But be careful to not incorporate the information that you think is not relevant to the question. \ +If you don't know the answer to a question, please don't share false information. \n +### Search results: {context} \n +### Question: {question} \n +### Answer: +""" + return template.format(context=context_str, question=question) + + +input_sentences = [ + "DeepSpeed is a machine learning framework", + "He is working on", + "He has a", + "He got all", + "Everyone is happy and I can", + "The new movie that got Oscar this year", + "In the far far distance from our galaxy,", + "Peace is the only way", +] + + +llm_model = os.getenv("LLM_NATIVE_MODEL", "Qwen/Qwen2-7B-Instruct") +args_dict = { + "device": "hpu", + "model_name_or_path": llm_model, + "bf16": True, + "max_new_tokens": 100, + "max_input_tokens": 0, + "batch_size": 1, + "warmup": 3, + "n_iterations": 5, + "local_rank": 0, + "use_kv_cache": True, + "use_hpu_graphs": True, + "dataset_name": None, + "column_name": None, + "do_sample": False, + "num_beams": 1, + "trim_logits": False, + "seed": 27, + "profiling_warmup_steps": 0, + "profiling_steps": 0, + "profiling_record_shapes": False, + "prompt": None, + "bad_words": None, + "force_words": None, + "assistant_model": None, + "peft_model": None, + "num_return_sequences": 1, + "token": None, + "model_revision": "main", + "attn_softmax_bf16": False, + "output_dir": None, + "bucket_size": -1, + "bucket_internal": False, + "dataset_max_samples": -1, + "limit_hpu_graphs": False, + "reuse_cache": False, + "verbose_workers": False, + "simulate_dyn_prompt": None, + "reduce_recompile": False, + "use_flash_attention": False, + "flash_attention_recompute": False, + "flash_attention_causal_mask": False, + "flash_attention_fast_softmax": False, + "book_source": False, + "torch_compile": False, + "ignore_eos": True, + "temperature": 1.0, + "top_p": 1.0, + "const_serialization_path": None, + "disk_offload": False, + "trust_remote_code": False, + "quant_config": "", + "world_size": 0, +} diff --git a/comps/llms/text-generation/native/utils.py b/comps/llms/text-generation/native/utils.py index 3eef7a6e2..04cebfbd4 100644 --- a/comps/llms/text-generation/native/utils.py +++ b/comps/llms/text-generation/native/utils.py @@ -1,10 +1,11 @@ -# Copyright (c) 2024 Intel Corporation +# coding=utf-8 +# Copyright 2022 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -12,11 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +############################################################################### +# Copyright (C) 2020-2021 Habana Labs, Ltd. an Intel Company +############################################################################### import copy +import glob import os import shutil +import tempfile import time +from pathlib import Path import torch from optimum.habana.checkpoint_utils import ( @@ -26,66 +33,376 @@ model_on_meta, write_checkpoints_json, ) -from optimum.habana.utils import check_habana_frameworks_version, check_optimum_habana_min_version, set_seed +from optimum.habana.utils import ( + check_habana_frameworks_version, + check_optimum_habana_min_version, + get_habana_frameworks_version, + set_seed, +) from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from transformers.utils import check_min_version -def setup_env(): +def adjust_batch(batch, size): + curr_size = batch["input_ids"].shape[1] + if curr_size >= size: + adjusted_batch = { + "input_ids": batch["input_ids"][:, :size], + "attention_mask": batch["attention_mask"][:, :size], + } + else: + adjusted_batch = {} + for k in batch.keys(): + last_colm = batch[k][:, -1] + expanded = last_colm.tile((size - curr_size, 1)).T + adjusted_batch[k] = torch.concat([batch[k], expanded], 1) + assert adjusted_batch["input_ids"].shape[1] == size + assert adjusted_batch["attention_mask"].shape[1] == size + return adjusted_batch + + +def override_print(enable): + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if force or enable: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def override_logger(logger, enable): + logger_info = logger.info + + def info(*args, **kwargs): + force = kwargs.pop("force", False) + if force or enable: + logger_info(*args, **kwargs) + + logger.info = info + + +def count_hpu_graphs(): + return len(glob.glob(".graph_dumps/*PreGraph*")) + + +def override_prints(enable, logger): + override_print(enable) + override_logger(logger, enable) + + +def setup_distributed(args): + args.local_rank = int(os.getenv("LOCAL_RANK", "0")) + args.world_size = int(os.getenv("WORLD_SIZE", "0")) + args.global_rank = int(os.getenv("RANK", "0")) + + +def setup_inference(args, model): + import habana_frameworks.torch.core as htcore + + habana_version = get_habana_frameworks_version() + + print("Initializing inference mode") + # Keeping the if-else here for back compat. TODO remove later + if habana_version.major >= 1 and habana_version.minor >= 16: + htcore.hpu_initialize(model, mark_only_scales_as_const=True) + else: + const_marking = os.getenv("ENABLE_CONST_MARKING", "True") + if const_marking == "True": + htcore.hpu_initialize(model) + return model + + +def setup_const_serialization(const_serialization_path): + import uuid + + const_serialization_path = os.path.join(const_serialization_path + uuid.uuid4().hex) + os.makedirs(const_serialization_path) + from habana_frameworks.torch.hpu import enable_const_section_serialization + + print("Serializing const params to {}".format(const_serialization_path)) + enable_const_section_serialization(const_serialization_path, True) + + +def setup_env(args): # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.34.0") check_optimum_habana_min_version("1.9.0.dev0") # TODO: SW-167588 - WA for memory issue in hqt prep_model os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE") + if args.global_rank == 0 and not args.torch_compile: + os.environ.setdefault("GRAPH_VISUALIZATION", "true") + shutil.rmtree(".graph_dumps", ignore_errors=True) + + if args.world_size > 0: + os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0") + os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true") + + if args.use_hpu_graphs and args.limit_hpu_graphs and not args.reuse_cache and args.bucket_internal: + # Based upon above conditions and below env variable, + # we can call HPU graphs clear_inputs(). + os.environ.setdefault("PT_HPUGRAPH_DISABLE_TENSOR_CACHE", "1") + # Tweak generation so that it runs faster on Gaudi from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi adapt_transformers_to_gaudi() -def setup_device(): - import habana_frameworks.torch.core as htcore +def setup_device(args): + if args.device == "hpu": + import habana_frameworks.torch.core as htcore + + if args.quant_config: + htcore.hpu_set_env() + return torch.device(args.device) + - return torch.device("hpu") +# patching LinearAllreduce to use ScopedLinearAllReduce +def patch_scoped_linear_all_reduce(model): + from deepspeed.module_inject.layers import LinearAllreduce + from optimum.habana.transformers.models.modeling_all_models import ScopedLinearAllReduce + + for name, module in model.named_children(): + if type(module) is LinearAllreduce: + SL = ScopedLinearAllReduce(mod=module) + setattr(model, name, SL) + patch_scoped_linear_all_reduce(module) def get_torch_compiled_model(model): - model.model = torch.compile(model.model, backend="hpu_backend") + model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True}) return model -def setup_model(model_name_or_path, model_dtype, model_kwargs): - model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=model_dtype, **model_kwargs) - model = model.eval().to("hpu") +def setup_model(args, model_dtype, model_kwargs, logger): + logger.info("Single-device run.") + if args.assistant_model is None: + assistant_model = None + else: + logger.info(f"Using asssitant model {args.assistant_model}.") + if args.disk_offload: + from accelerate import infer_auto_device_map, init_empty_weights + + config = AutoConfig.from_pretrained(args.model_name_or_path) + with init_empty_weights(): + model = AutoModelForCausalLM.from_config(config) + max_memory = {"cpu": "10GiB"} + device_map = infer_auto_device_map(model, max_memory=max_memory, dtype=model_dtype) + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + device_map=device_map, + offload_folder="/tmp/offload_folder/", + offload_state_dict=True, + torch_dtype=model_dtype, + **model_kwargs, + ) + else: + if args.assistant_model is not None: + assistant_model = AutoModelForCausalLM.from_pretrained( + args.assistant_model, torch_dtype=model_dtype, **model_kwargs + ) + if args.peft_model is not None: + model = peft_model(args, model_dtype, logger, **model_kwargs) + else: + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs + ) + if args.quant_config: + import habana_quantization_toolkit + + habana_quantization_toolkit.prep_model(model) + if args.assistant_model is not None: + habana_quantization_toolkit.quantize_model(assistant_model) + + model = model.eval().to(args.device) + if args.assistant_model is not None: + assistant_model = assistant_model.eval().to(args.device) + + if args.use_hpu_graphs: + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + from optimum.habana.transformers.trainer import _is_peft_model + + if check_habana_frameworks_version("1.13.0") and model.config.model_type == "falcon": + model = wrap_in_hpu_graph(model, hash_with_views=False) + else: + model = wrap_in_hpu_graph(model) + if args.assistant_model is not None: + assistant_model = wrap_in_hpu_graph(assistant_model) + if _is_peft_model(model): + model.base_model = wrap_in_hpu_graph(model.base_model) - from habana_frameworks.torch.hpu import wrap_in_hpu_graph + if args.torch_compile and model.config.model_type == "llama": + model = get_torch_compiled_model(model) + # if args.assistant_model is not None: + # assistant_model = get_torch_compiled_model(assistant_model) + return model, assistant_model + + +def setup_distributed_model(args, model_dtype, model_kwargs, logger): + import deepspeed - if check_habana_frameworks_version("1.13.0") and model.config.model_type == "falcon": - model = wrap_in_hpu_graph(model, hash_with_views=False) + logger.info("DeepSpeed is enabled.") + deepspeed.init_distributed(dist_backend="hccl") + config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + load_to_meta = model_on_meta(config) + + if args.assistant_model is None: + assistant_model = None else: - model = wrap_in_hpu_graph(model) + logger.info(f"Using asssitant model {args.assistant_model}.") - if model.config.model_type == "llama": + if load_to_meta: + # Construct model with fake meta tensors, later will be replaced on devices during ds-inference ckpt load + with deepspeed.OnDevice(dtype=model_dtype, device="meta"): + model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype) + + # Model loaded to meta is managed differently + checkpoints_json = tempfile.NamedTemporaryFile(suffix=".json", mode="+w") + + # For PEFT models, write the merged model on disk to be able to load it on the meta device + if args.peft_model is not None: + merged_model_dir = "/tmp/text_generation_merged_peft_model" + if args.local_rank == 0: + if Path(merged_model_dir).is_dir(): + shutil.rmtree(merged_model_dir) + peft_model(args, model_dtype, logger, **model_kwargs).save_pretrained(merged_model_dir) + torch.distributed.barrier() + + write_checkpoints_json( + merged_model_dir if args.peft_model is not None else args.model_name_or_path, + args.local_rank, + checkpoints_json, + token=args.token, + ) + else: + # TODO: revisit placement on CPU when auto-injection is possible + with deepspeed.OnDevice(dtype=model_dtype, device="cpu"): + if args.peft_model is not None: + model = peft_model(args, model_dtype, logger, **model_kwargs) + else: + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs + ) + model.eval() + + if args.assistant_model is not None: + assistant_model = AutoModelForCausalLM.from_pretrained( + args.assistant_model, torch_dtype=model_dtype, **model_kwargs + ).eval() + + # Initialize the model + ds_inference_kwargs = {"dtype": model_dtype} + ds_inference_kwargs["tensor_parallel"] = {"tp_size": args.world_size} + ds_inference_kwargs["enable_cuda_graph"] = args.use_hpu_graphs + ds_inference_kwargs["injection_policy"] = get_ds_injection_policy(config) + if load_to_meta: + ds_inference_kwargs["checkpoint"] = checkpoints_json.name + + model = deepspeed.init_inference(model, **ds_inference_kwargs) + model = model.module + if model.config.model_type in ["llama", "falcon", "qwen2"]: + patch_scoped_linear_all_reduce(model) + + if args.quant_config: + import habana_quantization_toolkit + + habana_quantization_toolkit.prep_model(model) + if args.assistant_model is not None: + habana_quantization_toolkit.prep_model(assistant_model) + + if args.torch_compile and model.config.model_type == "llama": model = get_torch_compiled_model(model) + # if args.assistant_model is not None: + # assistant_model = get_torch_compiled_model(assistant_model) + return model, assistant_model - return model +def peft_model(args, model_dtype, logger, **model_kwargs): + import importlib.util + + if importlib.util.find_spec("peft") is None: + raise ImportError("The `peft` package is not installed, please run: `pip install peft`.") + from peft import AutoPeftModelForCausalLM + from peft.config import PeftConfigMixin + + base_model_name = PeftConfigMixin.from_pretrained( + args.peft_model, + token=model_kwargs["token"] if "token" in model_kwargs else None, + ).base_model_name_or_path + + base_model_is_local = Path(base_model_name).is_dir() + if not base_model_is_local: + # Check if the base model path to a remote repository on the HF Hub exists + from huggingface_hub import list_repo_files + + try: + list_repo_files(base_model_name) + base_model_is_remote = True + except Exception: + base_model_is_remote = False + + if base_model_is_local or base_model_is_remote: + model = AutoPeftModelForCausalLM.from_pretrained(args.peft_model, torch_dtype=model_dtype, **model_kwargs) + else: + # Since the base model doesn't exist locally nor remotely, use `args.model_name_or_path` as the base model + logger.warning( + f"The base model `{base_model_name}` of the LoRA configuration associated" + f" to `{args.peft_model}` does not exist locally or remotely. Using " + f"`--model_name_or_path {args.model_name_or_path}` as a fall back for the base model." + ) + from peft import PeftModel + + model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + model = PeftModel.from_pretrained(model, args.peft_model, torch_dtype=model_dtype, **model_kwargs) + if hasattr(model, "merge_and_unload"): + model = model.merge_and_unload() + if model_dtype == torch.bfloat16: + model = model.to(torch.bfloat16) + return model + else: + from optimum.habana.peft.peft_model import gaudi_generate, gaudi_prepare_inputs_for_generation + + model.__class__.generate = gaudi_generate + model.__class__.prepare_inputs_for_generation = gaudi_prepare_inputs_for_generation + return model -def setup_tokenizer(model_name_or_path, model): + +def setup_tokenizer(args, model, assistant_model): tokenizer_kwargs = { - "revision": "main", - "token": None, + "revision": args.model_revision, + "token": args.token, + "trust_remote_code": args.trust_remote_code, } - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, **tokenizer_kwargs) + if args.bad_words is not None or args.force_words is not None: + tokenizer_kwargs["add_prefix_space"] = True + tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, **tokenizer_kwargs) if not model.config.is_encoder_decoder: tokenizer.padding_side = "left" - # Some models like GPT2 do not have a PAD token so we have to set it if necessary + if model.config.model_type == "llama": # unwind broken decapoda-research config model.generation_config.pad_token_id = 0 model.generation_config.bos_token_id = 1 model.generation_config.eos_token_id = 2 + if assistant_model is not None: + assistant_model.generation_config.pad_token_id = 0 + assistant_model.generation_config.bos_token_id = 1 + assistant_model.generation_config.eos_token_id = 2 + tokenizer.bos_token_id = model.generation_config.bos_token_id + tokenizer.eos_token_id = model.generation_config.eos_token_id + tokenizer.pad_token_id = model.generation_config.pad_token_id + tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id) + tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) + tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) + if model.config.model_type == "persimmon": + model.generation_config.pad_token_id = model.generation_config.eos_token_id + if assistant_model is not None: + assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id tokenizer.bos_token_id = model.generation_config.bos_token_id tokenizer.eos_token_id = model.generation_config.eos_token_id tokenizer.pad_token_id = model.generation_config.pad_token_id @@ -93,54 +410,112 @@ def setup_tokenizer(model_name_or_path, model): tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id) tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id) + # Some models like GPT2 do not have a PAD token so we have to set it if necessary if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model.generation_config.pad_token_id = model.generation_config.eos_token_id - return tokenizer, model + if assistant_model is not None: + assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id + return tokenizer, model, assistant_model -def setup_generation_config(model, tokenizer, max_new_tokens): + +def setup_generation_config(args, model, assistant_model, tokenizer): bad_words_ids = None force_words_ids = None + if args.bad_words is not None: + bad_words_ids = [tokenizer.encode(bad_word, add_special_tokens=False) for bad_word in args.bad_words] + if args.force_words is not None: + force_words_ids = [tokenizer.encode(force_word, add_special_tokens=False) for force_word in args.force_words] is_optimized = model_is_optimized(model.config) + # Generation configuration generation_config = copy.deepcopy(model.generation_config) - generation_config.max_new_tokens = max_new_tokens - generation_config.use_cache = True - generation_config.static_shapes = is_optimized - generation_config.bucket_size = -1 - generation_config.bucket_internal = True - generation_config.do_sample = True - generation_config.num_beams = 1 + generation_config.max_new_tokens = args.max_new_tokens + generation_config.use_cache = args.use_kv_cache + generation_config.static_shapes = is_optimized and assistant_model is None + generation_config.bucket_size = args.bucket_size if is_optimized else -1 + generation_config.bucket_internal = args.bucket_internal + generation_config.do_sample = args.do_sample + generation_config.num_beams = args.num_beams generation_config.bad_words_ids = bad_words_ids generation_config.force_words_ids = force_words_ids - generation_config.num_return_sequences = 1 - generation_config.trim_logits = True - generation_config.attn_softmax_bf16 = True - generation_config.limit_hpu_graphs = True - generation_config.reuse_cache = False - generation_config.reduce_recompile = False - generation_config.use_flash_attention = False - generation_config.flash_attention_recompute = True - generation_config.flash_attention_causal_mask = True + generation_config.num_return_sequences = args.num_return_sequences + generation_config.trim_logits = args.trim_logits + generation_config.attn_softmax_bf16 = args.attn_softmax_bf16 + generation_config.limit_hpu_graphs = args.limit_hpu_graphs + generation_config.reuse_cache = args.reuse_cache + generation_config.reduce_recompile = args.reduce_recompile + if generation_config.reduce_recompile: + assert generation_config.bucket_size > 0 + generation_config.use_flash_attention = args.use_flash_attention + generation_config.flash_attention_recompute = args.flash_attention_recompute + generation_config.flash_attention_causal_mask = args.flash_attention_causal_mask + generation_config.flash_attention_fast_softmax = args.flash_attention_fast_softmax + generation_config.trust_remote_code = args.trust_remote_code + return generation_config -def initialize_model(model_name_or_path, max_new_tokens=128): +def exclude_hpu_graph_configs(args): + # Excluded configs for batch size 1 for hpu graph + if args.batch_size == 1 and args.limit_hpu_graphs: + if "falcon-180B" in args.model_name_or_path or "falcon-180b" in args.model_name_or_path: + return False + if args.world_size == 2 or args.world_size == 4 or args.world_size == 8: + if args.quant_config: + if args.max_input_tokens >= 8192 and args.max_new_tokens >= 128: + return False + else: + if args.max_input_tokens >= 4096 and args.max_new_tokens >= 128: + return False + return True + else: + return False + + +def initialize_model(args, logger): init_start = time.perf_counter() - setup_env() - setup_device() - set_seed(17) - get_repo_root(model_name_or_path, local_rank=0, token=None) - model_dtype = torch.bfloat16 + setup_distributed(args) + if exclude_hpu_graph_configs(args): + args.limit_hpu_graphs = False + override_prints(args.global_rank == 0 or args.verbose_workers, logger) + setup_env(args) + setup_device(args) + set_seed(args.seed) + get_repo_root(args.model_name_or_path, local_rank=args.local_rank, token=args.token) + if args.assistant_model is not None: + get_repo_root(args.assistant_model, local_rank=args.local_rank, token=args.token) + use_deepspeed = args.world_size > 0 + if use_deepspeed or args.bf16: + model_dtype = torch.bfloat16 + else: + model_dtype = torch.float + args.attn_softmax_bf16 = False - model_kwargs = {"revision": "main", "token": None, "device_map": "auto", "offload_folder": "/tmp/offload_folder/"} + model_kwargs = { + "revision": args.model_revision, + "token": args.token, + "trust_remote_code": args.trust_remote_code, + } + if args.trust_remote_code: + logger.warning("`trust_remote_code` is set, there is no guarantee this model works properly and it may fail") - model = setup_model(model_name_or_path, model_dtype, model_kwargs) - tokenizer, model = setup_tokenizer(model_name_or_path, model) - generation_config = setup_generation_config(model, tokenizer, max_new_tokens) + model, assistant_model = ( + setup_model(args, model_dtype, model_kwargs, logger) + if not use_deepspeed + else setup_distributed_model(args, model_dtype, model_kwargs, logger) + ) + tokenizer, model, assistant_model = setup_tokenizer(args, model, assistant_model) + generation_config = setup_generation_config(args, model, assistant_model, tokenizer) + if args.const_serialization_path: + setup_const_serialization(args.const_serialization_path) + if args.quant_config: + model = setup_inference(args, model) init_end = time.perf_counter() - print(f"Model initialization took {(init_end - init_start):.3f}s") - return model, tokenizer, generation_config + logger.info(f"Args: {args}") + logger.info(f"device: {args.device}, n_hpu: {args.world_size}, bf16: {model_dtype == torch.bfloat16}") + logger.info(f"Model initialization took {(init_end - init_start):.3f}s") + return model, assistant_model, tokenizer, generation_config diff --git a/comps/llms/text-generation/ollama/Dockerfile b/comps/llms/text-generation/ollama/Dockerfile index 876ca1eef..bf78ff394 100644 --- a/comps/llms/text-generation/ollama/Dockerfile +++ b/comps/llms/text-generation/ollama/Dockerfile @@ -1,14 +1,12 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + curl \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim \ - curl + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/llms/text-generation/ollama/README.md b/comps/llms/text-generation/ollama/README.md index 1ad636098..5a86e8c61 100644 --- a/comps/llms/text-generation/ollama/README.md +++ b/comps/llms/text-generation/ollama/README.md @@ -2,9 +2,9 @@ [Ollama](https://github.com/ollama/ollama) allows you to run open-source large language models, such as Llama 3, locally. Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. It's the best choice to deploy large language models on AIPC locally. -# Get Started +## Get Started -## Setup +### Setup Follow [these instructions](https://github.com/ollama/ollama) to set up and run a local Ollama instance. @@ -15,27 +15,35 @@ Follow [these instructions](https://github.com/ollama/ollama) to set up and run Note: Special settings are necessary to pull models behind the proxy. -```bash -sudo vim /etc/systemd/system/ollama.service -``` +- Step1: Modify the ollama service configure file. -Add your proxy to the above configure file. + ```bash + sudo vim /etc/systemd/system/ollama.service + ``` -```markdown -[Service] -Environment="http_proxy=${your_proxy}" -Environment="https_proxy=${your_proxy}" -``` + Add your proxy to the above configure file. + + ```markdown + [Service] + Environment="http_proxy=${your_proxy}" + Environment="https_proxy=${your_proxy}" + ``` + +- Step2: Restart the ollama service. + ```bash + sudo systemctl daemon-reload + sudo systemctl restart ollama + ``` -## Usage +### Usage Here are a few ways to interact with pulled local models: -### In the terminal +#### In the terminal All of your local models are automatically served on localhost:11434. Run ollama run to start interacting via the command line directly. -### API access +#### API access Send an application/json request to the API endpoint of Ollama to interact. @@ -46,20 +54,20 @@ curl http://localhost:11434/api/generate -d '{ }' ``` -# Build Docker Image +## Build Docker Image ```bash cd GenAIComps/ docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/Dockerfile . ``` -# Run the Ollama Microservice +## Run the Ollama Microservice ```bash -docker run --network host opea/llm-ollama:latest +docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llm-ollama:latest ``` -# Consume the Ollama Microservice +## Consume the Ollama Microservice ```bash curl http://127.0.0.1:9000/v1/chat/completions -X POST -d '{"model": "llama3", "query":"What is Deep Learning?","max_new_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' -H 'Content-Type: application/json' diff --git a/comps/llms/text-generation/ollama/llm.py b/comps/llms/text-generation/ollama/llm.py index 5374cfa69..06d02461c 100644 --- a/comps/llms/text-generation/ollama/llm.py +++ b/comps/llms/text-generation/ollama/llm.py @@ -5,9 +5,11 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import Ollama -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + +logger = CustomLogger("llm_ollama") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -17,11 +19,12 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) ollama = Ollama( base_url=ollama_endpoint, - model=input.model, + model=input.model if input.model else model_name, num_predict=input.max_new_tokens, top_k=input.top_k, top_p=input.top_p, @@ -36,17 +39,22 @@ async def stream_generator(): async for text in ollama.astream(input.query): chat_response += text chunk_repr = repr(text.encode("utf-8")) - print(f"[llm - chat_stream] chunk:{chunk_repr}") + if logflag: + logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") yield f"data: {chunk_repr}\n\n" - print(f"[llm - chat_stream] stream response: {chat_response}") + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") else: response = ollama.invoke(input.query) + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) if __name__ == "__main__": ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") + model_name = os.getenv("OLLAMA_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") opea_microservices["opea_service@llm_ollama"].start() diff --git a/comps/llms/text-generation/ollama/requirements.txt b/comps/llms/text-generation/ollama/requirements.txt index e224aaaa5..78b323c73 100644 --- a/comps/llms/text-generation/ollama/requirements.txt +++ b/comps/llms/text-generation/ollama/requirements.txt @@ -2,7 +2,6 @@ docarray[full] fastapi huggingface_hub langchain==0.1.16 -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/ray_serve/llm.py b/comps/llms/text-generation/ray_serve/llm.py new file mode 100644 index 000000000..c86025625 --- /dev/null +++ b/comps/llms/text-generation/ray_serve/llm.py @@ -0,0 +1,82 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from fastapi.responses import StreamingResponse +from langchain_openai import ChatOpenAI + +from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + + +def post_process_text(text: str): + if text == " ": + return "data: @#$\n\n" + if text == "\n": + return "data:
\n\n" + if text.isspace(): + return None + new_text = text.replace(" ", "@#$") + return f"data: {new_text}\n\n" + + +@register_microservice( + name="opea_service@llm_ray", + service_type=ServiceType.LLM, + endpoint="/v1/chat/completions", + host="0.0.0.0", + port=9000, +) +def llm_generate(input: LLMParamsDoc): + llm_endpoint = os.getenv("RAY_Serve_ENDPOINT", "http://localhost:8080") + llm_model = os.getenv("LLM_MODEL", "Llama-2-7b-chat-hf") + if "/" in llm_model: + llm_model = llm_model.split("/")[-1] + llm = ChatOpenAI( + openai_api_base=llm_endpoint + "/v1", + model_name=llm_model, + openai_api_key=os.getenv("OPENAI_API_KEY", "not_needed"), + max_tokens=input.max_new_tokens, + temperature=input.temperature, + streaming=input.streaming, + request_timeout=600, + ) + + if input.streaming: + + async def stream_generator(): + chat_response = "" + async for text in llm.astream(input.query): + text = text.content + chat_response += text + processed_text = post_process_text(text) + if text and processed_text: + if "" in text: + res = text.split("")[0] + if res != "": + yield res + break + yield processed_text + print(f"[llm - chat_stream] stream response: {chat_response}") + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + response = llm.invoke(input.query) + response = response.content + return GeneratedDoc(text=response, prompt=input.query) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_ray"].start() diff --git a/comps/llms/text-generation/ray_serve/requirements.txt b/comps/llms/text-generation/ray_serve/requirements.txt new file mode 100644 index 000000000..d97ce32e2 --- /dev/null +++ b/comps/llms/text-generation/ray_serve/requirements.txt @@ -0,0 +1,13 @@ +docarray[full] +fastapi +huggingface_hub +langchain==0.1.16 +langchain_openai +openai +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +ray[serve]>=2.10 +shortuuid +transformers diff --git a/comps/llms/text-generation/tgi/Dockerfile b/comps/llms/text-generation/tgi/Dockerfile index 545af59df..6797f8603 100644 --- a/comps/llms/text-generation/tgi/Dockerfile +++ b/comps/llms/text-generation/tgi/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md index 6c9607ca9..f34dd0374 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/text-generation/tgi/README.md @@ -2,27 +2,24 @@ [Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the LLM microservice, you need to install python packages first. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start LLM Service +### 1.2 Start LLM Service ```bash export HF_TOKEN=${your_hf_api_token} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms" docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} ``` -## 1.3 Verify the TGI Service +### 1.3 Verify the TGI Service ```bash curl http://${your_ip}:8008/generate \ @@ -31,18 +28,18 @@ curl http://${your_ip}:8008/generate \ -H 'Content-Type: application/json' ``` -## 1.4 Start LLM Service with Python Script +### 1.4 Start LLM Service with Python Script ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:8008" python text-generation/tgi/llm.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker. -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables In order to start TGI and LLM services, you need to setup the following environment variables first. @@ -50,12 +47,9 @@ In order to start TGI and LLM services, you need to setup the following environm export HF_TOKEN=${your_hf_api_token} export TGI_LLM_ENDPOINT="http://${your_ip}:8008" export LLM_MODEL_ID=${your_hf_llm_model} -export LANGCHAIN_TRACING_V2=true -export LANGCHAIN_API_KEY=${your_langchain_api_key} -export LANGCHAIN_PROJECT="opea/llms" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ @@ -69,22 +63,22 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-tgi:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash cd text-generation/tgi docker compose -f docker_compose_llm.yaml up -d ``` -# ๐Ÿš€3. Consume LLM Service +## ๐Ÿš€3. Consume LLM Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:9000/v1/health_check\ @@ -92,7 +86,7 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume LLM Service +### 3.2 Consume LLM Service You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`. @@ -110,9 +104,21 @@ curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' + +# custom chat template +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true, "chat_template":"### You are a helpful, respectful and honest assistant to help the user with questions.\n### Context: {context}\n### Question: {question}\n### Answer:"}' \ + -H 'Content-Type: application/json' + +# consume with SearchedDoc +curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?","retrieved_docs":[{"text":"Deep Learning is a ..."},{"text":"Deep Learning is b ..."}]}' \ + -H 'Content-Type: application/json' ``` -## 4. Validated Model +### 4. Validated Model | Model | TGI | | ------------------------- | --- | diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml index c1ab98dcc..36269aeea 100644 --- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml +++ b/comps/llms/text-generation/tgi/docker_compose_llm.yaml @@ -12,9 +12,16 @@ services: volumes: - "./data:/data" shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${LLM_MODEL_ID} llm: - image: opea/gen-ai-comps:llm-tgi-server + image: opea/llm-tgi:latest container_name: llm-tgi-server ports: - "9000:9000" @@ -25,7 +32,6 @@ services: https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} HF_TOKEN: ${HF_TOKEN} - LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} restart: unless-stopped networks: diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py index e267c21dc..d0ad2dbf1 100644 --- a/comps/llms/text-generation/tgi/llm.py +++ b/comps/llms/text-generation/tgi/llm.py @@ -3,20 +3,35 @@ import os import time +from typing import Union from fastapi.responses import StreamingResponse from huggingface_hub import AsyncInferenceClient -from langsmith import traceable +from langchain_core.prompts import PromptTemplate +from openai import OpenAI +from template import ChatTemplate from comps import ( + CustomLogger, GeneratedDoc, LLMParamsDoc, + SearchedDoc, ServiceType, opea_microservices, register_microservice, register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse + +logger = CustomLogger("llm_tgi") +logflag = os.getenv("LOGFLAG", False) + +llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +llm = AsyncInferenceClient( + model=llm_endpoint, + timeout=600, +) @register_microservice( @@ -26,38 +41,83 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@llm_tgi"]) -async def llm_generate(input: LLMParamsDoc): +async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]): + if logflag: + logger.info(input) + prompt_template = None + if not isinstance(input, SearchedDoc) and input.chat_template: + prompt_template = PromptTemplate.from_template(input.chat_template) + input_variables = prompt_template.input_variables + stream_gen_time = [] start = time.time() - if input.streaming: - - async def stream_generator(): - chat_response = "" - text_generation = await llm.text_generation( - prompt=input.query, - stream=input.streaming, - max_new_tokens=input.max_new_tokens, - repetition_penalty=input.repetition_penalty, - temperature=input.temperature, - top_k=input.top_k, - top_p=input.top_p, - ) - async for text in text_generation: - stream_gen_time.append(time.time() - start) - chat_response += text - chunk_repr = repr(text.encode("utf-8")) - print(f"[llm - chat_stream] chunk:{chunk_repr}") - yield f"data: {chunk_repr}\n\n" - print(f"[llm - chat_stream] stream response: {chat_response}") - statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) - yield "data: [DONE]\n\n" - - return StreamingResponse(stream_generator(), media_type="text/event-stream") - else: - response = await llm.text_generation( - prompt=input.query, + + if isinstance(input, SearchedDoc): + if logflag: + logger.info("[ SearchedDoc ] input from retriever microservice") + prompt = input.initial_query + if input.retrieved_docs: + docs = [doc.text for doc in input.retrieved_docs] + if logflag: + logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}") + prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs) + # use default llm parameters for inferencing + new_input = LLMParamsDoc(query=prompt) + if logflag: + logger.info(f"[ SearchedDoc ] final input: {new_input}") + text_generation = await llm.text_generation( + prompt=prompt, + stream=new_input.streaming, + max_new_tokens=new_input.max_new_tokens, + repetition_penalty=new_input.repetition_penalty, + temperature=new_input.temperature, + top_k=new_input.top_k, + top_p=new_input.top_p, + ) + if new_input.streaming: + + async def stream_generator(): + chat_response = "" + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[ SearchedDoc ] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[ SearchedDoc ] stream response: {chat_response}") + statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) + if logflag: + logger.info(text_generation) + return GeneratedDoc(text=text_generation, prompt=new_input.query) + + elif isinstance(input, LLMParamsDoc): + if logflag: + logger.info("[ LLMParamsDoc ] input from rerank microservice") + prompt = input.query + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.query, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.query) + else: + logger.info( + f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents) + + text_generation = await llm.text_generation( + prompt=prompt, stream=input.streaming, max_new_tokens=input.max_new_tokens, repetition_penalty=input.repetition_penalty, @@ -65,14 +125,130 @@ async def stream_generator(): top_k=input.top_k, top_p=input.top_p, ) - statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) - return GeneratedDoc(text=response, prompt=input.query) + if input.streaming: + + async def stream_generator(): + chat_response = "" + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[ LLMParamsDoc ] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}") + statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) + if logflag: + logger.info(text_generation) + return GeneratedDoc(text=text_generation, prompt=input.query) + + else: + if logflag: + logger.info("[ ChatCompletionRequest ] input in opea format") + client = OpenAI( + api_key="EMPTY", + base_url=llm_endpoint + "/v1", + ) + + if isinstance(input.messages, str): + prompt = input.messages + if prompt_template: + if sorted(input_variables) == ["context", "question"]: + prompt = prompt_template.format(question=input.messages, context="\n".join(input.documents)) + elif input_variables == ["question"]: + prompt = prompt_template.format(question=input.messages) + else: + logger.info( + f"[ ChatCompletionRequest ] {prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) + else: + if input.documents: + # use rag default template + prompt = ChatTemplate.generate_rag_prompt(input.messages, input.documents) + + chat_completion = client.completions.create( + model="tgi", + prompt=prompt, + best_of=input.best_of, + echo=input.echo, + frequency_penalty=input.frequency_penalty, + logit_bias=input.logit_bias, + logprobs=input.logprobs, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + seed=input.seed, + stop=input.stop, + stream=input.stream, + suffix=input.suffix, + temperature=input.temperature, + top_p=input.top_p, + user=input.user, + ) + else: + if input.messages[0]["role"] == "system": + if "{context}" in input.messages[0]["content"]: + if input.documents is None or input.documents == []: + input.messages[0]["content"].format(context="") + else: + input.messages[0]["content"].format(context="\n".join(input.documents)) + else: + if prompt_template: + system_prompt = prompt_template + if input_variables == ["context"]: + system_prompt = prompt_template.format(context="\n".join(input.documents)) + else: + logger.info( + f"[ ChatCompletionRequest ] {prompt_template} not used, only support 1 input variables ['context']" + ) + + input.messages.insert(0, {"role": "system", "content": system_prompt}) + + chat_completion = client.chat.completions.create( + model="tgi", + messages=input.messages, + frequency_penalty=input.frequency_penalty, + logit_bias=input.logit_bias, + logprobs=input.logprobs, + top_logprobs=input.top_logprobs, + max_tokens=input.max_tokens, + n=input.n, + presence_penalty=input.presence_penalty, + response_format=input.response_format, + seed=input.seed, + service_tier=input.service_tier, + stop=input.stop, + stream=input.stream, + stream_options=input.stream_options, + temperature=input.temperature, + top_p=input.top_p, + tools=input.tools, + tool_choice=input.tool_choice, + parallel_tool_calls=input.parallel_tool_calls, + user=input.user, + ) + + if input.stream: + + def stream_generator(): + for c in chat_completion: + if logflag: + logger.info(c) + yield f"data: {c.model_dump_json()}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + if logflag: + logger.info(chat_completion) + return chat_completion if __name__ == "__main__": - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - llm = AsyncInferenceClient( - model=llm_endpoint, - timeout=600, - ) opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/text-generation/tgi/requirements.txt index 1e62d477d..6b6f11ee4 100644 --- a/comps/llms/text-generation/tgi/requirements.txt +++ b/comps/llms/text-generation/tgi/requirements.txt @@ -3,7 +3,7 @@ docarray[full] fastapi httpx huggingface_hub -langsmith +openai==1.35.13 opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/tgi/template.py b/comps/llms/text-generation/tgi/template.py new file mode 100644 index 000000000..447efcc67 --- /dev/null +++ b/comps/llms/text-generation/tgi/template.py @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import re + + +class ChatTemplate: + @staticmethod + def generate_rag_prompt(question, documents): + context_str = "\n".join(documents) + if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: + # chinese context + template = """ +### ไฝ ๅฐ†ๆ‰ฎๆผ”ไธ€ไธชไนไบŽๅŠฉไบบใ€ๅฐŠ้‡ไป–ไบบๅนถ่ฏšๅฎž็š„ๅŠฉๆ‰‹๏ผŒไฝ ็š„็›ฎๆ ‡ๆ˜ฏๅธฎๅŠฉ็”จๆˆท่งฃ็ญ”้—ฎ้ข˜ใ€‚ๆœ‰ๆ•ˆๅœฐๅˆฉ็”จๆฅ่‡ชๆœฌๅœฐ็Ÿฅ่ฏ†ๅบ“็š„ๆœ็ดข็ป“ๆžœใ€‚็กฎไฟไฝ ็š„ๅ›ž็ญ”ไธญๅชๅŒ…ๅซ็›ธๅ…ณไฟกๆฏใ€‚ๅฆ‚ๆžœไฝ ไธ็กฎๅฎš้—ฎ้ข˜็š„็ญ”ๆกˆ๏ผŒ่ฏท้ฟๅ…ๅˆ†ไบซไธๅ‡†็กฎ็š„ไฟกๆฏใ€‚ +### ๆœ็ดข็ป“ๆžœ๏ผš{context} +### ้—ฎ้ข˜๏ผš{question} +### ๅ›ž็ญ”๏ผš +""" + else: + template = """ +### You are a helpful, respectful and honest assistant to help the user with questions. \ +Please refer to the search results obtained from the local knowledge base. \ +But be careful to not incorporate the information that you think is not relevant to the question. \ +If you don't know the answer to a question, please don't share false information. \n +### Search results: {context} \n +### Question: {question} \n +### Answer: +""" + return template.format(context=context_str, question=question) diff --git a/comps/llms/text-generation/vllm-openvino/README.md b/comps/llms/text-generation/vllm-openvino/README.md index 48f8f3305..d26a7f569 100644 --- a/comps/llms/text-generation/vllm-openvino/README.md +++ b/comps/llms/text-generation/vllm-openvino/README.md @@ -1,5 +1,10 @@ # Use vLLM with OpenVINO +vLLM powered by OpenVINO supports all LLM models from [vLLM supported models list](https://github.com/vllm-project/vllm/blob/main/docs/source/models/supported_models.rst) and can perform optimal model serving on all x86-64 CPUs with, at least, AVX2 support. OpenVINO vLLM backend supports the following advanced vLLM features: + +- Prefix caching (`--enable-prefix-caching`) +- Chunked prefill (`--enable-chunked-prefill`) + ## Build Docker Image To build the docker image, run the command @@ -59,15 +64,19 @@ export vLLM_LLM_ENDPOINT="http://xxx.xxx.xxx.xxx:8000" export LLM_MODEL= # example: export LLM_MODEL="meta-llama/Llama-2-7b-hf" ``` -## Use Int-8 Weights Compression +## Performance tips + +vLLM OpenVINO backend uses the following environment variables to control behavior: + +- `VLLM_OPENVINO_KVCACHE_SPACE` to specify the KV Cache size (e.g, `VLLM_OPENVINO_KVCACHE_SPACE=40` means 40 GB space for KV cache), larger setting will allow vLLM running more requests in parallel. This parameter should be set based on the hardware configuration and memory management pattern of users. + +- `VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8` to control KV cache precision. By default, FP16 / BF16 is used depending on platform. -Weights int-8 compression is disabled by default. For better performance and lower memory consumption, the weights compression can be enabled by setting the environment variable `VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=1`. -To pass the variable in docker, use `-e VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=1` as an additional argument to `docker run` command in the examples above. +- `VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON` to enable U8 weights compression during model loading stage. By default, compression is turned off. -The variable enables weights compression logic described in [optimum-intel 8-bit weights quantization](https://huggingface.co/docs/optimum/intel/optimization_ov#8-bit). -Hence, even if the variable is enabled, the compression is applied only for models starting with a certain size and avoids compression of too small models due to a significant accuracy drop. +To enable better TPOT / TTFT latency, you can use vLLM's chunked prefill feature (`--enable-chunked-prefill`). Based on the experiments, the recommended batch size is `256` (`--max-num-batched-tokens`) -## Use UInt-8 KV cache Compression +OpenVINO best known configuration is: -KV cache uint-8 compression is disabled by default. For better performance and lower memory consumption, the KV cache compression can be enabled by setting the environment variable `VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8`. -To pass the variable in docker, use `-e VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8` as an additional argument to `docker run` command in the examples above. + $ VLLM_OPENVINO_KVCACHE_SPACE=100 VLLM_OPENVINO_CPU_KV_CACHE_PRECISION=u8 VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON \ + python3 vllm/benchmarks/benchmark_throughput.py --model meta-llama/Llama-2-7b-chat-hf --dataset vllm/benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json --enable-chunked-prefill --max-num-batched-tokens 256 diff --git a/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh b/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh index 1b3e159fc..4566263bc 100755 --- a/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh +++ b/comps/llms/text-generation/vllm-openvino/build_vllm_openvino.sh @@ -3,7 +3,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -git clone --branch openvino-model-executor https://github.com/ilya-lavrenov/vllm.git +BASEDIR="$( cd "$( dirname "$0" )" && pwd )" +git clone https://github.com/vllm-project/vllm.git vllm cd ./vllm/ docker build -t vllm:openvino -f Dockerfile.openvino . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +cd $BASEDIR && rm -rf vllm diff --git a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh index 887c31629..575088876 100755 --- a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh +++ b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh @@ -42,5 +42,20 @@ port_number=${port:-$default_port} # Set the Huggingface cache directory variable HF_CACHE_DIR=$HOME/.cache/huggingface -# Start the model server using Openvino as the backend inference engine. Provide the container name that is unique and meaningful, typically one that includes the model name. -docker run --rm --name="vllm-openvino-server" -p $port_number:$port_number -v $HF_CACHE_DIR:/root/.cache/huggingface vllm:openvino --model $model_name --port $port_number --disable-log-requests --swap-space $swap_space +# Start the model server using Openvino as the backend inference engine. +# Provide the container name that is unique and meaningful, typically one that includes the model name. + +docker run -d --rm --name="vllm-openvino-server" \ + -p $port_number:80 \ + --ipc=host \ + -e HTTPS_PROXY=$https_proxy \ + -e HTTP_PROXY=$https_proxy \ + -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ + -v $HOME/.cache/huggingface:/home/user/.cache/huggingface \ + vllm:openvino /bin/bash -c "\ + cd / && \ + export VLLM_CPU_KVCACHE_SPACE=50 && \ + python3 -m vllm.entrypoints.openai.api_server \ + --model \"$model_name\" \ + --host 0.0.0.0 \ + --port 80" diff --git a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh index 9e9fe3b71..8c4c13d3b 100755 --- a/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/build_docker_vllmray.sh @@ -5,7 +5,7 @@ cd ../../../../ docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana \ + -t opea/vllm_ray:habana \ --network=host \ --build-arg http_proxy=${http_proxy} \ --build-arg https_proxy=${https_proxy} \ diff --git a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice index 10d6500a1..516ad1a4b 100644 --- a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice +++ b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice @@ -1,23 +1,11 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -34,4 +22,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/llms/text-generation/vllm-ray -ENTRYPOINT ["python", "llm.py"] \ No newline at end of file +ENTRYPOINT ["python", "llm.py"] diff --git a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray index 53e900716..34105e2b0 100644 --- a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray +++ b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray @@ -1,15 +1,15 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest -FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu ENV LANG=en_US.UTF-8 -WORKDIR /root/vllm-ray +WORKDIR /home/user/vllm-ray # copy the source code to the package directory -COPY comps/llms/text-generation/vllm-ray /root/vllm-ray +COPY comps/llms/text-generation/vllm-ray /home/user/vllm-ray RUN pip install --upgrade-strategy eager optimum[habana] && \ pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.1 @@ -21,11 +21,11 @@ RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/ service ssh restart ENV no_proxy=localhost,127.0.0.1 -ENV PYTHONPATH=$PYTHONPATH:/root:/root/vllm-ray +ENV PYTHONPATH=$PYTHONPATH:/root:/home/user/vllm-ray # Required by DeepSpeed ENV RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES=1 ENV PT_HPU_LAZY_ACC_PAR_MODE=0 -ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true \ No newline at end of file +ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true diff --git a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml index a3ae3ec04..76d3423f1 100644 --- a/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm-ray/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-ray-service: - image: vllm_ray:habana + image: opea/vllm_ray:habana container_name: vllm-ray-gaudi-server ports: - "8006:8000" diff --git a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh index fcff33265..895e6a066 100755 --- a/comps/llms/text-generation/vllm-ray/launch_vllmray.sh +++ b/comps/llms/text-generation/vllm-ray/launch_vllmray.sh @@ -39,5 +39,5 @@ docker run -d --rm \ -e HTTPS_PROXY=$https_proxy \ -e HTTP_PROXY=$https_proxy \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - vllm_ray:habana \ + opea/vllm_ray:habana \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $model_name --tensor_parallel_size $parallel_number --enforce_eager $enforce_eager" diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index dc0c4b669..e7efe6527 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -16,21 +16,11 @@ from fastapi.responses import StreamingResponse from langchain_openai import ChatOpenAI -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice - -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" +logger = CustomLogger("llm_vllm_ray") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -40,8 +30,9 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) llm_endpoint = os.getenv("vLLM_RAY_ENDPOINT", "http://localhost:8006") llm_model = os.getenv("LLM_MODEL", "meta-llama/Llama-2-7b-chat-hf") llm = ChatOpenAI( @@ -56,26 +47,23 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: - async def stream_generator(): + def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + for text in llm.stream(input.query): text = text.content chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text - print(f"[llm - chat_stream] stream response: {chat_response}") + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") else: response = llm.invoke(input.query) response = response.content + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/llms/text-generation/vllm-ray/requirements.txt b/comps/llms/text-generation/vllm-ray/requirements.txt index 2e8b8c578..55b308768 100644 --- a/comps/llms/text-generation/vllm-ray/requirements.txt +++ b/comps/llms/text-generation/vllm-ray/requirements.txt @@ -3,15 +3,13 @@ fastapi huggingface_hub langchain==0.1.16 langchain_openai -langserve -langsmith openai opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk prometheus-fastapi-instrumentator ray[serve]>=2.10 -setuptools==69.5.1 +setuptools shortuuid transformers uvicorn diff --git a/comps/llms/text-generation/vllm-xft/README.md b/comps/llms/text-generation/vllm-xft/README.md index 68931d4ca..4b39709a8 100644 --- a/comps/llms/text-generation/vllm-xft/README.md +++ b/comps/llms/text-generation/vllm-xft/README.md @@ -1,24 +1,26 @@ +# vLLM-xFT + vLLM-xFT is a fork of vLLM to integrate the xfastertransformer backend, maintaining compatibility with most of the official vLLM's features. For usage of vllm-xFT, please refer to [xFasterTransformer/vllm-xft](https://github.com/intel/xFasterTransformer/blob/main/serving/vllm-xft.md) -# ๐Ÿš€ Start Microservice with Docker +## ๐Ÿš€ Start Microservice with Docker -## 1 Build Docker Image +### 1 Build Docker Image ```bash cd ../../../ docker build -t opea/llm-vllm-xft:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-xft/docker/Dockerfile . ``` -## 2 Run Docker with CLI +### 2 Run Docker with CLI ```bash docker run -it -p 9000:9000 -v /home/sdp/Qwen2-7B-Instruct/:/Qwen2-7B-Instruct/ -e vLLM_LLM_ENDPOINT="http://localhost:18688" -e HF_DATASET_DIR="/Qwen2-7B-Instruct/" -e OUTPUT_DIR="./output" -e TOKEN_PATH="/Qwen2-7B-Instruct/" -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy --ipc=host opea/llm-vllm-xft:latest ``` -# ๐Ÿš€3. Consume LLM Service +## ๐Ÿš€3. Consume LLM Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:9000/v1/health_check\ @@ -26,7 +28,7 @@ curl http://${your_ip}:9000/v1/health_check\ -H 'Content-Type: application/json' ``` -## 3.2 Consume LLM Service +### 3.2 Consume LLM Service You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`. diff --git a/comps/llms/text-generation/vllm-xft/docker/Dockerfile b/comps/llms/text-generation/vllm-xft/docker/Dockerfile index db682e04f..3742bcb2f 100644 --- a/comps/llms/text-generation/vllm-xft/docker/Dockerfile +++ b/comps/llms/text-generation/vllm-xft/docker/Dockerfile @@ -58,13 +58,13 @@ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/oneCCL \ RUN echo "source /usr/local/oneCCL/env/setvars.sh" >> ~/.bashrc -WORKDIR /root/ +WORKDIR /home/user/ RUN rm -rf /tmp/oneCCL RUN git clone https://github.com/intel/xFasterTransformer.git SHELL ["/bin/bash", "-c"] -WORKDIR /root/xFasterTransformer +WORKDIR /home/user/xFasterTransformer RUN git checkout ${TAG} \ && export "LD_LIBRARY_PATH=/usr/local/mklml_lnx_2019.0.5.20190502/lib:$LD_LIBRARY_PATH" \ && export "PATH=/usr/bin/python3.8:$PATH" \ @@ -75,24 +75,23 @@ RUN git checkout ${TAG} \ && pip install --no-cache-dir dist/* RUN mkdir -p /usr/local/xft/lib \ - && cp /root/xFasterTransformer/build/libxfastertransformer.so /usr/local/xft/lib \ - && cp /root/xFasterTransformer/build/libxft_comm_helper.so /usr/local/xft/lib \ - && cp -r /root/xFasterTransformer/include /usr/local/xft/ \ + && cp /home/user/xFasterTransformer/build/libxfastertransformer.so /usr/local/xft/lib \ + && cp /home/user/xFasterTransformer/build/libxft_comm_helper.so /usr/local/xft/lib \ + && cp -r /home/user/xFasterTransformer/include /usr/local/xft/ \ && mkdir -p /usr/local/include/xft/ \ && ln -s /usr/local/xft/include /usr/local/include/xft/include RUN echo "export \$(python -c 'import xfastertransformer as xft; print(xft.get_env())')" >> ~/.bashrc -COPY comps /root/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /root/comps/llms/text-generation/vllm-xft/requirements.txt + pip install --no-cache-dir -r /home/user/comps/llms/text-generation/vllm-xft/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/root -RUN chmod +x /root/comps/llms/text-generation/vllm-xft/run.sh +RUN chmod +x /home/user/comps/llms/text-generation/vllm-xft/run.sh -WORKDIR /root/comps/llms/text-generation/vllm-xft/ - -ENTRYPOINT ["/root/comps/llms/text-generation/vllm-xft/run.sh"] +WORKDIR /home/user/comps/llms/text-generation/vllm-xft/ +ENTRYPOINT ["/home/user/comps/llms/text-generation/vllm-xft/run.sh"] diff --git a/comps/llms/text-generation/vllm-xft/llm.py b/comps/llms/text-generation/vllm-xft/llm.py index 02446baa6..07d892bde 100644 --- a/comps/llms/text-generation/vllm-xft/llm.py +++ b/comps/llms/text-generation/vllm-xft/llm.py @@ -5,9 +5,11 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import VLLMOpenAI -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import CustomLogger, GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice + +logger = CustomLogger("llm_vllm_xft") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -17,8 +19,9 @@ host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) llm_endpoint = os.getenv("vLLM_LLM_ENDPOINT", "http://localhost:18688") llm = VLLMOpenAI( openai_api_key="EMPTY", @@ -38,14 +41,18 @@ def stream_generator(): for text in llm.stream(input.query): chat_response += text chunk_repr = repr(text.encode("utf-8")) - print(f"[llm - chat_stream] chunk:{chunk_repr}") + if logflag: + logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") yield f"data: {chunk_repr}\n\n" - print(f"[llm - chat_stream] stream response: {chat_response}") + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") else: response = llm.invoke(input.query) + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/llms/text-generation/vllm-xft/requirements.txt b/comps/llms/text-generation/vllm-xft/requirements.txt index bc9f457c4..a4accaed2 100644 --- a/comps/llms/text-generation/vllm-xft/requirements.txt +++ b/comps/llms/text-generation/vllm-xft/requirements.txt @@ -1,7 +1,6 @@ docarray[full] fastapi langchain==0.1.16 -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/llms/text-generation/vllm/README.md b/comps/llms/text-generation/vllm/README.md index 1445d1bd1..3f0184ed9 100644 --- a/comps/llms/text-generation/vllm/README.md +++ b/comps/llms/text-generation/vllm/README.md @@ -50,6 +50,12 @@ bash ./build_docker_vllm.sh hpu Set `hw_mode` to `hpu`. +Note: If you want to enable tensor parallel, please set `setuptools==69.5.1` in Dockerfile.hpu before build docker with following command. + +``` +sed -i "s/RUN pip install setuptools/RUN pip install setuptools==69.5.1/g" docker/Dockerfile.hpu +``` + #### Launch vLLM service on single node For small model, we can just use single node. diff --git a/comps/llms/text-generation/vllm/build_docker_vllm.sh b/comps/llms/text-generation/vllm/build_docker_vllm.sh index 3680f076c..c1037a5c7 100644 --- a/comps/llms/text-generation/vllm/build_docker_vllm.sh +++ b/comps/llms/text-generation/vllm/build_docker_vllm.sh @@ -30,9 +30,9 @@ fi # Build the docker image for vLLM based on the hardware mode if [ "$hw_mode" = "hpu" ]; then - docker build -f docker/Dockerfile.hpu -t vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f docker/Dockerfile.hpu -t opea/vllm:hpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy else git clone https://github.com/vllm-project/vllm.git cd ./vllm/ - docker build -f Dockerfile.cpu -t vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy + docker build -f Dockerfile.cpu -t opea/vllm:cpu --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy fi diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu index c7093d4c0..a1ab93a15 100644 --- a/comps/llms/text-generation/vllm/docker/Dockerfile.hpu +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.hpu @@ -1,18 +1,23 @@ -# FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest -FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 -ENV LANG=en_US.UTF-8 +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +ENV LANG=en_US.UTF-8 +RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + service ssh restart +USER user WORKDIR /root RUN pip install --upgrade-strategy eager optimum[habana] RUN pip install -v git+https://github.com/HabanaAI/vllm-fork.git@cf6952d -RUN pip install setuptools==69.5.1 - -RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - service ssh restart +RUN pip install setuptools ENV no_proxy=localhost,127.0.0.1 @@ -20,4 +25,4 @@ ENV PT_HPU_LAZY_ACC_PAR_MODE=0 ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true -CMD ["/bin/bash"] \ No newline at end of file +CMD ["/bin/bash"] diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.microservice b/comps/llms/text-generation/vllm/docker/Dockerfile.microservice index ccd977273..ebc1638e0 100644 --- a/comps/llms/text-generation/vllm/docker/Dockerfile.microservice +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.microservice @@ -7,8 +7,7 @@ ARG ARCH="cpu" # Set this to "cpu" or "gpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/llms/text-generation/vllm/docker_compose_llm.yaml b/comps/llms/text-generation/vllm/docker_compose_llm.yaml index 818fdf54a..205c9293a 100644 --- a/comps/llms/text-generation/vllm/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: vllm-service: - image: vllm:hpu + image: opea/vllm:hpu container_name: vllm-gaudi-server ports: - "8008:80" diff --git a/comps/llms/text-generation/vllm/launch_microservice.sh b/comps/llms/text-generation/vllm/launch_microservice.sh index 6e8246601..01bd0f6f5 100644 --- a/comps/llms/text-generation/vllm/launch_microservice.sh +++ b/comps/llms/text-generation/vllm/launch_microservice.sh @@ -10,4 +10,5 @@ docker run -d --rm \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ -e LLM_MODEL=$LLM_MODEL \ + -e LOGFLAG=$LOGFLAG \ opea/llm-vllm:latest diff --git a/comps/llms/text-generation/vllm/launch_vllm_service.sh b/comps/llms/text-generation/vllm/launch_vllm_service.sh index 0b225023c..0c7ed90de 100644 --- a/comps/llms/text-generation/vllm/launch_vllm_service.sh +++ b/comps/llms/text-generation/vllm/launch_vllm_service.sh @@ -38,7 +38,7 @@ volume=$PWD/data # Build the Docker run command based on hardware mode if [ "$hw_mode" = "hpu" ]; then - docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " + docker run -d --rm --runtime=habana --name="vllm-service" -p $port_number:80 -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/vllm:hpu /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $model_name --tensor-parallel-size $parallel_number --host 0.0.0.0 --port 80 --block-size $block_size --max-num-seqs $max_num_seqs --max-seq_len-to-capture $max_seq_len_to_capture " else - docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 vllm:cpu --model $model_name --host 0.0.0.0 --port 80 + docker run -d --rm --name="vllm-service" -p $port_number:80 --network=host -v $volume:/data -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e VLLM_CPU_KVCACHE_SPACE=40 opea/vllm:cpu --model $model_name --host 0.0.0.0 --port 80 fi diff --git a/comps/llms/text-generation/vllm/llm.py b/comps/llms/text-generation/vllm/llm.py index ea8691f1a..c730dd66b 100644 --- a/comps/llms/text-generation/vllm/llm.py +++ b/comps/llms/text-generation/vllm/llm.py @@ -5,9 +5,19 @@ from fastapi.responses import StreamingResponse from langchain_community.llms import VLLMOpenAI -from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, opea_telemetry, register_microservice +from comps import ( + CustomLogger, + GeneratedDoc, + LLMParamsDoc, + ServiceType, + opea_microservices, + opea_telemetry, + register_microservice, +) + +logger = CustomLogger("llm_vllm") +logflag = os.getenv("LOGFLAG", False) @opea_telemetry @@ -29,8 +39,9 @@ def post_process_text(text: str): host="0.0.0.0", port=9000, ) -@traceable(run_type="llm") def llm_generate(input: LLMParamsDoc): + if logflag: + logger.info(input) llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") llm = VLLMOpenAI( @@ -51,12 +62,15 @@ def stream_generator(): chat_response += text chunk_repr = repr(text.encode("utf-8")) yield f"data: {chunk_repr}\n\n" - print(f"[llm - chat_stream] stream response: {chat_response}") + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") else: response = llm.invoke(input.query) + if logflag: + logger.info(response) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/llms/utils/lm-eval/Dockerfile.cpu b/comps/llms/utils/lm-eval/Dockerfile.cpu index 933a523a5..5f419bfbf 100644 --- a/comps/llms/utils/lm-eval/Dockerfile.cpu +++ b/comps/llms/utils/lm-eval/Dockerfile.cpu @@ -1,22 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + ARG UBUNTU_VER=22.04 FROM ubuntu:${UBUNTU_VER} as devel -ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git -ARG BRANCH=main ENV LANG=C.UTF-8 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ aspell \ aspell-en \ build-essential \ + git \ python3 \ - python3-pip \ python3-dev \ python3-distutils \ - git \ - vim \ + python3-pip \ wget +USER user + +ARG REPO_COMPS=https://github.com/opea-project/GenAIComps.git +ARG BRANCH=main RUN git clone --single-branch --branch=${BRANCH} ${REPO_COMPS} /home/user/GenAIComps/ && \ cd /home/user/GenAIComps/ && python3 setup.py install && \ pip install --no-cache-dir -r /home/user/GenAIComps/comps/llms/utils/lm-eval/requirements.txt diff --git a/comps/llms/utils/lm-eval/self_hosted_hf.py b/comps/llms/utils/lm-eval/self_hosted_hf.py index b5eebaa2b..441605be0 100644 --- a/comps/llms/utils/lm-eval/self_hosted_hf.py +++ b/comps/llms/utils/lm-eval/self_hosted_hf.py @@ -10,7 +10,10 @@ from docarray import BaseDoc from evals.evaluation.lm_evaluation_harness.lm_eval.models.huggingface import HFLM, GaudiHFModelAdapter -from comps import ServiceType, opea_microservices, opea_telemetry, register_microservice +from comps import CustomLogger, ServiceType, opea_microservices, opea_telemetry, register_microservice + +logger = CustomLogger("self_hosted_hf") +logflag = os.getenv("LOGFLAG", False) lm_eval.api.registry.MODEL_REGISTRY["hf"] = HFLM lm_eval.api.registry.MODEL_REGISTRY["gaudi-hf"] = GaudiHFModelAdapter @@ -46,6 +49,8 @@ class LLMCompletionDoc(BaseDoc): ) @opea_telemetry def llm_generate(input: LLMCompletionDoc): + if logflag: + logger.info(input) global llm batched_inputs = torch.tensor(input.batched_inputs, dtype=torch.long, device=llm.device) with torch.no_grad(): @@ -56,12 +61,14 @@ def llm_generate(input: LLMCompletionDoc): # Check if per-token argmax is exactly equal to continuation greedy_tokens = logits.argmax(dim=-1) logprobs = torch.gather(logits, 2, batched_inputs[:, 1:].unsqueeze(-1)).squeeze(-1) - - return { + result = { "greedy_tokens": greedy_tokens.detach().cpu().tolist(), "logprobs": logprobs.detach().cpu().tolist(), "batched_inputs": input.batched_inputs, } + if logflag: + logger.info(result) + return result if __name__ == "__main__": diff --git a/comps/lvms/Dockerfile_tgi b/comps/lvms/Dockerfile_tgi new file mode 100644 index 000000000..c6412ac5e --- /dev/null +++ b/comps/lvms/Dockerfile_tgi @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms + +ENTRYPOINT ["python", "lvm_tgi.py"] + diff --git a/comps/lvms/README.md b/comps/lvms/README.md index aec41922c..866cde708 100644 --- a/comps/lvms/README.md +++ b/comps/lvms/README.md @@ -16,3 +16,114 @@ See [transformers/README.md](transformers/README.md) for more information. ## Getting started with Prediction Guard The [predictionguard](predictionguard) directory contains instructions for running a single service that serves predictions from a LLaVA LVM via the Prediction Guard framework hosted on Intel Tiber Developer Cloud (ITDC). See [predictionguard](predictionguard) for more information. +# LVM Microservice + +Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image. + +## ๐Ÿš€1. Start Microservice with Python (Option 1) + +### 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +### 1.2 Start LLaVA Service/Test + +- Xeon CPU + +```bash +# Start LLaVA service +cd llava/ +nohup python llava_server.py --device=cpu & +# Wait until the server is up +# Test +python check_llava_server.py +``` + +- Gaudi2 HPU + +```bash +pip install optimum[habana] +``` + +```bash +cd llava/ +# Start LLaVA service +nohup python llava_server.py & +# Test +python check_llava_server.py +``` + +### 1.3 Start Image To Text Service/Test + +```bash +cd .. +# Start the OPEA Microservice +python lvm.py +# Test +python check_lvm.py +``` + +## ๐Ÿš€2. Start Microservice with Docker (Option 2) + +### 2.1 Build Images + +#### 2.1.1 LLaVA Server Image + +- Xeon CPU + +```bash +cd ../.. +docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile . +``` + +- Gaudi2 HPU + +```bash +cd ../.. +docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile_hpu . +``` + +#### 2.1.2 LVM Service Image + +```bash +cd ../.. +docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/Dockerfile . +``` + +### 2.2 Start LLaVA and LVM Service + +#### 2.2.1 Start LLaVA server + +- Xeon + +```bash +docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$https_proxy opea/llava:latest +``` + +- Gaudi2 HPU + +```bash +docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llava:latest +``` + +#### 2.2.2 Start LVM service + +```bash +ip_address=$(hostname -I | awk '{print $1}') + +docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm:latest +``` + +#### 2.2.3 Test + +```bash +# Use curl/python + +# curl +http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' + +# python +python check_lvm.py +``` diff --git a/comps/lvms/lvm_tgi.py b/comps/lvms/lvm_tgi.py new file mode 100644 index 000000000..9492b4eaf --- /dev/null +++ b/comps/lvms/lvm_tgi.py @@ -0,0 +1,97 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from fastapi.responses import StreamingResponse +from huggingface_hub import AsyncInferenceClient + +from comps import ( + CustomLogger, + LVMDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("lvm_tgi") +logflag = os.getenv("LOGFLAG", False) + + +@register_microservice( + name="opea_service@lvm_tgi", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9399, + input_datatype=LVMDoc, + output_datatype=TextDoc, +) +@register_statistics(names=["opea_service@lvm_tgi"]) +async def lvm(request: LVMDoc): + if logflag: + logger.info(request) + start = time.time() + stream_gen_time = [] + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + streaming = request.streaming + repetition_penalty = request.repetition_penalty + temperature = request.temperature + top_k = request.top_k + top_p = request.top_p + + image = f"data:image/png;base64,{img_b64_str}" + image_prompt = f"![]({image})\n{prompt}\nASSISTANT:" + + if streaming: + + async def stream_generator(): + chat_response = "" + text_generation = await lvm_client.text_generation( + prompt=image_prompt, + stream=streaming, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + async for text in text_generation: + stream_gen_time.append(time.time() - start) + chat_response += text + chunk_repr = repr(text.encode("utf-8")) + if logflag: + logger.info(f"[llm - chat_stream] chunk:{chunk_repr}") + yield f"data: {chunk_repr}\n\n" + if logflag: + logger.info(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@lvm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) + yield "data: [DONE]\n\n" + + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + generated_str = await lvm_client.text_generation( + image_prompt, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + temperature=temperature, + top_k=top_k, + top_p=top_p, + ) + statistics_dict["opea_service@lvm_tgi"].append_latency(time.time() - start, None) + if logflag: + logger.info(generated_str) + return TextDoc(text=generated_str) + + +if __name__ == "__main__": + lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + lvm_client = AsyncInferenceClient(lvm_endpoint) + logger.info("[LVM] LVM initialized.") + opea_microservices["opea_service@lvm_tgi"].start() diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt index 3651937bd..556dfb0c1 100644 --- a/comps/lvms/requirements.txt +++ b/comps/lvms/requirements.txt @@ -1,6 +1,7 @@ datasets docarray[full] fastapi +huggingface_hub opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/lvms/transformers/Dockerfile b/comps/lvms/transformers/Dockerfile index 0f6a74ff4..e088224f2 100644 --- a/comps/lvms/transformers/Dockerfile +++ b/comps/lvms/transformers/Dockerfile @@ -2,17 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms +WORKDIR /home/user/comps/lvms ENTRYPOINT ["python", "lvm.py"] diff --git a/comps/lvms/transformers/llava/Dockerfile b/comps/lvms/transformers/llava/Dockerfile index efd2b1d45..07d5cc41d 100644 --- a/comps/lvms/transformers/llava/Dockerfile +++ b/comps/lvms/transformers/llava/Dockerfile @@ -2,18 +2,21 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/lvms/transformers/llava/Dockerfile_hpu b/comps/lvms/transformers/llava/Dockerfile_hpu index bb2bf0676..58e69e043 100644 --- a/comps/lvms/transformers/llava/Dockerfile_hpu +++ b/comps/lvms/transformers/llava/Dockerfile_hpu @@ -2,22 +2,27 @@ # SPDX-License-Identifier: Apache-2.0 # HABANA environment -FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu -RUN rm -rf /etc/ssh/ssh_host* +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +RUN rm -rf /etc/ssh/ssh_host* +USER user # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana -COPY comps /home/comps +COPY comps /home/user/comps # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/lvms/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/lvms/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/lvms/llava +WORKDIR /home/user/comps/lvms/llava -ENTRYPOINT ["python", "llava_server.py"] \ No newline at end of file +ENTRYPOINT ["python", "llava_server.py"] diff --git a/comps/lvms/transformers/lvm.py b/comps/lvms/transformers/lvm.py index a60f6813f..4ae900aae 100644 --- a/comps/lvms/transformers/lvm.py +++ b/comps/lvms/transformers/lvm.py @@ -9,6 +9,7 @@ import requests from comps import ( + CustomLogger, LVMDoc, ServiceType, TextDoc, @@ -18,6 +19,9 @@ statistics_dict, ) +logger = CustomLogger("lvm") +logflag = os.getenv("LOGFLAG", False) + @register_microservice( name="opea_service@lvm", @@ -30,6 +34,8 @@ ) @register_statistics(names=["opea_service@lvm"]) async def lvm(request: LVMDoc): + if logflag: + logger.info(request) start = time.time() img_b64_str = request.image prompt = request.prompt @@ -41,11 +47,14 @@ async def lvm(request: LVMDoc): response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None) - return TextDoc(text=response.json()["text"]) + result = response.json()["text"] + if logflag: + logger.info(result) + return TextDoc(text=result) if __name__ == "__main__": lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") - print("[LVM] LVM initialized.") + logger.info("[LVM] LVM initialized.") opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/video-llama/Dockerfile b/comps/lvms/video-llama/Dockerfile new file mode 100644 index 000000000..b172a217a --- /dev/null +++ b/comps/lvms/video-llama/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/video-llama/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms/video-llama + +ENTRYPOINT ["python", "lvm.py"] \ No newline at end of file diff --git a/comps/lvms/video-llama/README.md b/comps/lvms/video-llama/README.md new file mode 100644 index 000000000..43ec0bd18 --- /dev/null +++ b/comps/lvms/video-llama/README.md @@ -0,0 +1,70 @@ +# LVM Microservice + +This is a Docker-based microservice that runs Video-Llama as a Large Vision Model (LVM). It utilizes Llama-2-7b-chat-hf for conversations based on video dialogues. It support Intel Xeon CPU. + +# ๐Ÿš€1. Start Microservice with Docker + +## 1.1 Build Images + +```bash +cd GenAIComps +# Video-Llama Server Image +docker build --no-cache -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/server/docker/Dockerfile . +# LVM Service Image +docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile . +``` + +## 1.2 Start Video-Llama and LVM Services + +For the very first run, please follow below steps: + +```bash +# prepare environment variables +export ip_address=$(hostname -I | awk '{print $1}') +export no_proxy=$no_proxy,${ip_address} +export LVM_ENDPOINT=http://${ip_address}:9009 +# Start service +docker compose -f comps/lvms/video-llama/docker_compose.yaml up -d +# it should take about 1.5 hours for the model to download in the video-llama server, assuming a maximum download speed of 100 Mbps +until docker logs video-llama-lvm-server 2>&1 | grep -q "Uvicorn running on"; do + sleep 5m +done +``` + +If you've run the microservice before, it's recommended to keep the downloaded model so it won't be redownloaded each time you run it. To achieve this, you need to modify the following configuration: + +```yaml +# comps/lvms/video-llama/docker_compose.yaml +services: + lvm-video-llama: + ... + environment: + llm_download: "False" # avoid download +``` + +# โœ… 2. Test + +```bash +# use curl +export ip_address=$(hostname -I | awk '{print $1}') +## check video-llama +http_proxy="" curl -X POST "http://${ip_address}:9009/generate?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" -H "accept: */*" -d '' + +## check lvm +http_proxy="" curl -X POST http://${ip_address}:9000/v1/lvm -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 9,"prompt":"What is the person doing?","max_new_tokens": 150}' -H 'Content-Type: application/json' + +# or use python +export ip_address=$(hostname -I | awk '{print $1}') +python comps/lvms/video-llama/check_lvm.py +``` + +# โ™ป๏ธ 3. Clean + +```bash +# remove the container +cid=$(docker ps -aq --filter "name=video-llama") +if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +# remove the model volume (suggest to keep this to avoid download for each run) +if docker volume ls | grep -q video-llama-model; then docker volume rm video-llama_video-llama-model; fi + +``` diff --git a/comps/lvms/video-llama/check_lvm.py b/comps/lvms/video-llama/check_lvm.py new file mode 100644 index 000000000..fcf6f6aee --- /dev/null +++ b/comps/lvms/video-llama/check_lvm.py @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import datetime +import json +import os + +import requests + +ip_address = os.getenv("ip_address") +####### video-llama request ######## +print("video-llama request") +api_url = f"http://${ip_address}:9009/generate" +content = { + "video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4", + "start": 0.0, + "duration": 9, + "prompt": "What is the person doing?", + "max_new_tokens": 150, +} + +start = datetime.datetime.now() +with requests.post(api_url, params=content, stream=True) as response: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output + +end = datetime.datetime.now() +print(f"\nTotal time: {end - start}") + +####### lvm request ######## +print("lvm request") +api_url = f"http://${ip_address}:9000/v1/lvm" +headers = {"Content-Type": "application/json"} +data = { + "video_url": "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4", + "chunk_start": 0, + "chunk_duration": 9, + "prompt": "what is the person doing", + "max_new_tokens": 150, +} + +start = datetime.datetime.now() +with requests.post(api_url, headers=headers, data=json.dumps(data), stream=True) as response: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + print(chunk.decode("utf-8"), end="", flush=True) # Flush to ensure immediate output + +end = datetime.datetime.now() +print(f"\nTotal time: {end - start}") diff --git a/comps/lvms/video-llama/docker_compose.yaml b/comps/lvms/video-llama/docker_compose.yaml new file mode 100644 index 000000000..54aace84e --- /dev/null +++ b/comps/lvms/video-llama/docker_compose.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3" +services: + lvm-video-llama: + image: opea/video-llama-lvm-server:latest + container_name: video-llama-lvm-server + ports: + - "9009:9009" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + llm_download: "True" + volumes: + - "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use local cache to avoid download + - video-llama-model:/home/user/model + restart: unless-stopped + + lvm: + image: opea/lvm-video-llama:latest + container_name: lvm-video-llama + ports: + - "9000:9000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + LVM_ENDPOINT: ${LVM_ENDPOINT} + restart: unless-stopped + depends_on: + - lvm-video-llama +networks: + default: + driver: bridge +volumes: + video-llama-model: diff --git a/comps/lvms/video-llama/lvm.py b/comps/lvms/video-llama/lvm.py new file mode 100644 index 000000000..1cbfcd5e1 --- /dev/null +++ b/comps/lvms/video-llama/lvm.py @@ -0,0 +1,80 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +# import json +import logging +import os + +import requests +from fastapi import HTTPException +from fastapi.responses import StreamingResponse + +from comps import LVMVideoDoc, ServiceType, opea_microservices, register_microservice, register_statistics + +# import time + + +logging.basicConfig(level=logging.INFO) + + +@register_microservice( + name="opea_service@lvm", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9000, + input_datatype=LVMVideoDoc, + output_datatype=StreamingResponse, +) +@register_statistics(names=["opea_service@lvm"]) +async def lvm(input: LVMVideoDoc): + """This function handles the LVM microservice, which generates text based on a video URL, start time, duration, prompt, and maximum new tokens. + + Parameters: + input (LVMVideoDoc): The input containing the video URL, start time, duration, prompt, and maximum new tokens. + + Returns: + StreamingResponse: A streaming response containing the generated text in text/event-stream format, or a JSON error response if the upstream API responds with an error. + """ + logging.info("[lvm] Received input") + + video_url = input.video_url + chunk_start = input.chunk_start + chunk_duration = input.chunk_duration + prompt = input.prompt + max_new_tokens = input.max_new_tokens + + params = { + "video_url": video_url, + "start": chunk_start, + "duration": chunk_duration, + "prompt": prompt, + "max_new_tokens": max_new_tokens, + } + logging.info(f"[lvm] Params: {params}") + + response = requests.post(url=f"{lvm_endpoint}/generate", params=params, proxies={"http": None}, stream=True) + logging.info(f"[lvm] Response status code: {response.status_code}") + if response.status_code == 200: + + def streamer(): + yield f"{{'video_url': '{video_url}', 'chunk_start': {chunk_start}, 'chunk_duration': {chunk_duration}}}\n".encode( + "utf-8" + ) + for chunk in response.iter_content(chunk_size=8192): + if chunk: + yield chunk + logging.info(f"[llm - chat_stream] Streaming: {chunk}") + logging.info("[llm - chat_stream] stream response finished") + + return StreamingResponse(streamer(), media_type="text/event-stream") + else: + logging.error(f"[lvm] Error: {response.text}") + raise HTTPException(status_code=500, detail="The upstream API responded with an error.") + + +if __name__ == "__main__": + lvm_endpoint = os.getenv("LVM_ENDPOINT") + + opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/video-llama/requirements.txt b/comps/lvms/video-llama/requirements.txt new file mode 100644 index 000000000..c7cc250eb --- /dev/null +++ b/comps/lvms/video-llama/requirements.txt @@ -0,0 +1,11 @@ +datasets +docarray +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +prometheus-fastapi-instrumentator +pydub +shortuuid +uvicorn diff --git a/comps/lvms/video-llama/server/data/silence_girl.mp4 b/comps/lvms/video-llama/server/data/silence_girl.mp4 new file mode 100644 index 000000000..ad98e90f6 Binary files /dev/null and b/comps/lvms/video-llama/server/data/silence_girl.mp4 differ diff --git a/comps/lvms/video-llama/server/docker/Dockerfile b/comps/lvms/video-llama/server/docker/Dockerfile new file mode 100644 index 000000000..1152aa84c --- /dev/null +++ b/comps/lvms/video-llama/server/docker/Dockerfile @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.9-slim + +ENV LANG=C.UTF-8 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + git git-lfs && \ + git lfs install + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user:user /home/user/ +RUN mkdir /home/user/model && chown user:user -R /home/user/model + +USER user + +COPY --chown=user:user comps /home/user/comps +WORKDIR /home/user/comps/lvms/video-llama/server + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/lvms/video-llama/server/requirements.txt + +ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git +ARG VIDEO_LLAMA_COMMIT=0adb19e +RUN tar -xvf video-llama.patch.tar && \ + git clone ${VIDEO_LLAMA_REPO} Video-LLaMA && \ + cd Video-LLaMA && git checkout ${VIDEO_LLAMA_COMMIT} && \ + git apply --whitespace=fix ../video-llama.patch && \ + mv video_llama ../ && \ + cd ../ && rm -rf Video-LLaMA + + +ENV PYTHONPATH=/home/user + + +ENTRYPOINT ["bash", "start.sh"] \ No newline at end of file diff --git a/comps/lvms/video-llama/server/docker/docker_compose_vllama.yaml b/comps/lvms/video-llama/server/docker/docker_compose_vllama.yaml new file mode 100644 index 000000000..17d38e076 --- /dev/null +++ b/comps/lvms/video-llama/server/docker/docker_compose_vllama.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3" +services: + lvm-video-llama: + image: opea/video-llama-lvm-server:latest + container_name: video-llama-lvm-server + ports: + - "9009:9009" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + llm_download: "True" + volumes: + - "/home/$USER/.cache:/home/user/.cache" # RECOMMENDED: use cache to avoid download + - video-llama-model:/home/user/model + restart: unless-stopped +networks: + default: + driver: bridge +volumes: + video-llama-model: diff --git a/comps/lvms/video-llama/server/extract_vl_embedding.py b/comps/lvms/video-llama/server/extract_vl_embedding.py new file mode 100644 index 000000000..304b5472f --- /dev/null +++ b/comps/lvms/video-llama/server/extract_vl_embedding.py @@ -0,0 +1,41 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import random + +import numpy as np +import torch +import torch.backends.cudnn as cudnn +from video_llama.common.config import Config +from video_llama.common.dist_utils import get_rank +from video_llama.common.registry import registry + + +class VLEmbeddingExtractor(object): + """Docstring for VLEmbeddingExtractor.""" + + def __init__(self, cfg_path, model_type): + super(VLEmbeddingExtractor, self).__init__() + args = argparse.Namespace(**{"cfg_path": cfg_path, "model_type": model_type, "options": []}) + self.cfg = Config(args) + self.setup_seeds() + model_config = self.cfg.model_cfg + print("vis_processor vit_precision:", model_config.get("vit_precision", "fp16")) + if model_config.get("vit_precision", "fp16") == "fp16": + print("WARNING! FP16 not currently supported. Switching to FP32") + model_config["vit_precision"] = "fp32" + model_cls = registry.get_model_class(model_config.arch) + self.model = model_cls.from_config(model_config).to("cpu") + self.model.eval() + + def setup_seeds(self): + seed = self.cfg.run_cfg.seed + get_rank() + + print("Seed: ", seed) + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + cudnn.benchmark = False + cudnn.deterministic = True diff --git a/comps/lvms/video-llama/server/requirements.txt b/comps/lvms/video-llama/server/requirements.txt new file mode 100644 index 000000000..afbac6004 --- /dev/null +++ b/comps/lvms/video-llama/server/requirements.txt @@ -0,0 +1,36 @@ +# OPEA +beautifulsoup4 + +# microservice +decord +docarray +einops +faiss-cpu +fastapi +ftfy +iopath +langchain==0.2.9 +langchain-community==0.2.1 +langchain-core==0.2.21 +numpy +omegaconf +opencv-python-headless +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +prometheus_fastapi_instrumentator +pytorchvideo +sentence-transformers==3.0.1 +sentencepiece +shortuuid +timm +torch==1.13.1 --index-url https://download.pytorch.org/whl/cpu +torchaudio==0.13.1 --index-url https://download.pytorch.org/whl/cpu +torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cpu +transformers +uvicorn +validators +webdataset +werkzeug diff --git a/comps/lvms/video-llama/server/server.py b/comps/lvms/video-llama/server/server.py new file mode 100644 index 000000000..20841732c --- /dev/null +++ b/comps/lvms/video-llama/server/server.py @@ -0,0 +1,286 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Stand-alone video llama FastAPI Server.""" + +import argparse +import logging +import os +import re +from threading import Thread +from urllib.parse import urlparse + +import decord +import requests +import uvicorn +import validators +from extract_vl_embedding import VLEmbeddingExtractor as VL +from fastapi import FastAPI, Query +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, Response, StreamingResponse +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.vectorstores import FAISS +from pydantic import BaseModel, Field +from transformers import TextIteratorStreamer, set_seed +from video_llama.common.registry import registry +from video_llama.conversation.conversation_video import Chat +from werkzeug.utils import secure_filename + +# Initialize decord bridge and seed +decord.bridge.set_bridge("torch") +set_seed(22) + +# Setup logging +logging.basicConfig(level=logging.INFO) + +# Define global variables +context_db = None +streamer = None +chat = None +VIDEO_DIR = "/home/user/comps/lvms/video-llama/server/data" +CFG_PATH = "video_llama_config/video_llama_eval_only_vl.yaml" +MODEL_TYPE = "llama_v2" + +os.makedirs(VIDEO_DIR, exist_ok=True) + +# Initialize FastAPI app +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# Pydantic models for request validation +class videoInfo(BaseModel): + video_path: str = Field(..., description="URL of the video to be processed, support remote") + start_time: float = Field(..., descrciption="video clip start time in seconds", example=0.0) + duration: float = Field(..., description="video clip duration in seconds", example=10.0) + + +class GenerateRequest(BaseModel): + start_time: float = Field(..., descrciption="video clip start time in seconds", example=0.0) + duration: float = Field(..., description="video clip duration in seconds", example=10.0) + prompt: str = Field(..., description="Query for Video-LLama", example="What is the man doing?") + max_new_tokens: int = Field(default=512, description="Maximum number of tokens to generate", example=512) # + + +# Function to construct instructions context +def construct_instructions(): + instructions = [ + """ Identify the person [with specific features / seen at a specific location / performing a specific action] in the provided data based on the video content. + Describe in detail the relevant actions of the individuals mentioned in the question. + Provide full details of their actions being performed and roles. Focus on the individual and the actions being performed. + Exclude information about their age and items on the shelf that are not directly observable. + Do not mention items on the shelf that are not visible. \ + Exclude information about the background and surrounding details. + Ensure all information is distinct, accurate, and directly observable. + Do not repeat actions of individuals and do not mention anything about other persons not visible in the video. + Mention actions and roles once only. + """, + """Analyze the provided data to recognize and describe the activities performed by individuals. + Specify the type of activity and any relevant contextual details, + Do not give repetitions, always give distinct and accurate information only.""", + """Determine the interactions between individuals and items in the provided data. + Describe the nature of the interaction between individuals and the items involved. + Provide full details of their relevant actions and roles. Focus on the individuals and the action being performed by them. + Exclude information about their age and items on the shelf that are not directly observable. + Exclude information about the background and surrounding details. + Ensure all information is distinct, accurate, and directly observable. + Do not repeat actions of individuals and do not mention anything about other persons not visible in the video. + Do not mention items on the shelf that are not observable. \ + """, + """Analyze the provided data to answer queries based on specific time intervals. + Provide detailed information corresponding to the specified time frames, + Do not give repetitions, always give distinct and accurate information only.""", + """Identify individuals based on their appearance as described in the provided data. + Provide details about their identity and actions, + Do not give repetitions, always give distinct and accurate information only.""", + """Answer questions related to events and activities that occurred on a specific day. + Provide a detailed account of the events, + Do not give repetitions, always give distinct and accurate information only.""", + ] + HFembeddings = HuggingFaceEmbeddings(model_kwargs={"device": "cpu"}) + context = FAISS.from_texts(instructions, HFembeddings) + return context + + +# Helper functions for chat and inference +def get_context(query, context): + context = context.similarity_search(query) + return [i.page_content for i in context] + + +def chat_reset(chat_state, img_list): + logging.info("-" * 30) + logging.info("resetting chatState") + if chat_state is not None: + chat_state.messages = [] + if img_list is not None: + img_list = [] + return chat_state, img_list + + +def inference(chat: Chat, streamer, video: videoInfo, instruction: str, max_new_tokens: int): + logging.info("Video-Llama generation begin.") + video_path = video.video_path + start_time = video.start_time + duration = video.duration + + chat.upload_video_without_audio(video_path, start_time, duration) + chat.ask("" + instruction) + chat.answer( + max_new_tokens=max_new_tokens, + num_beams=1, + min_length=1, + top_p=0.9, + repetition_penalty=1.0, + length_penalty=1, + temperature=0.02, + max_length=2000, + keep_conv_hist=True, + streamer=streamer, + ) + if "similar video" not in instruction: + logging.info("Resetting the chat history") + chat.clear() + logging.info("Video-Llama generation done, remove video.") + os.remove(video_path) + + +def stream_res(video, instruction, max_new_tokens): + logging.debug("Start to stream...") + thread = Thread(target=inference, args=(chat, streamer, video, instruction, max_new_tokens)) + thread.start() + for text in streamer: + yield text + + +def is_local_file(url): + """Returns True if url is a local file, False otherwise.""" + return not url.startswith("http://") and not url.startswith("https://") + + +def is_valid_url(url): + # Validate the URL's structure + validation = validators.url(url) + if not validation: + logging.error("URL is invalid") + return False + + # Parse the URL to components + parsed_url = urlparse(url) + + # Check the scheme + if parsed_url.scheme not in ["http", "https"]: + logging.error("URL scheme is invalid") + return False + + # Check for "../" in the path + if "../" in parsed_url.path: + logging.error("URL contains '../', which is not allowed") + return False + + # Check that the path only contains one "." for the file extension + if parsed_url.path.count(".") != 1: + logging.error("URL path does not meet the requirement of having only one '.'") + return False + + # If all checks pass, the URL is valid + logging.info("URL is valid") + return True + + +def is_valid_video(filename): + if re.match(r"^[a-zA-Z0-9-_]+\.(mp4)$", filename, re.IGNORECASE): + return secure_filename(filename) + else: + return False + + +@app.get("/health") +async def health() -> Response: + """Health check.""" + return Response(status_code=200) + + +@app.post("/generate", response_class=StreamingResponse) +async def generate( + video_url: str = Query(..., description="remote URL of the video to be processed"), + start: float = Query(..., description="video clip start time in seconds", examples=0.0), + duration: float = Query(..., description="video clip duration in seconds", examples=10.0), + prompt: str = Query(..., description="Query for Video-LLama", examples="What is the man doing?"), + max_new_tokens: int = Query(150, description="Maximum number of tokens to generate", examples=150), +) -> StreamingResponse: + + if video_url.lower().endswith(".mp4"): + logging.info(f"Format check passed, the file '{video_url}' is an MP4 file.") + else: + logging.info(f"Format check failed, the file '{video_url}' is not an MP4 file.") + return JSONResponse(status_code=500, content={"message": "Invalid file type. Only mp4 videos are allowed."}) + + if is_local_file(video_url): + # validate the video name + if is_valid_video(video_url): + secure_video_name = is_valid_video(video_url) # only support video name without path + else: + return JSONResponse(status_code=500, content={"message": "Invalid file name."}) + + video_path = os.path.join(VIDEO_DIR, secure_video_name) + if os.path.exists(video_path): + logging.info(f"File found: {video_path}") + else: + logging.error(f"File not found: {video_path}") + return JSONResponse( + status_code=404, content={"message": "File not found. Only local files under data folder are allowed."} + ) + else: + # validate the remote URL + if not is_valid_url(video_url): + return JSONResponse(status_code=500, content={"message": "Invalid URL."}) + else: + parsed_url = urlparse(video_url) + video_path = os.path.join(VIDEO_DIR, os.path.basename(parsed_url.path)) + try: + response = requests.get(video_url, stream=True) + if response.status_code == 200: + with open(video_path, "wb") as file: + for chunk in response.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + file.write(chunk) + logging.info(f"File downloaded: {video_path}") + else: + logging.info(f"Error downloading file: {response.status_code}") + return JSONResponse(status_code=500, content={"message": "Error downloading file."}) + except Exception as e: + logging.info(f"Error downloading file: {response.status_code}") + return JSONResponse(status_code=500, content={"message": "Error downloading file."}) + + video_info = videoInfo(start_time=start, duration=duration, video_path=video_path) + + # format context and instruction + instruction = f"{get_context(prompt,context_db)[0]}: {prompt}" + + return StreamingResponse(stream_res(video_info, instruction, max_new_tokens)) + + +# Main entry point +parser = argparse.ArgumentParser() +parser.add_argument("--host", type=str, default="0.0.0.0") +parser.add_argument("--port", type=int, default=9009) +args = parser.parse_args() + +context_db = construct_instructions() +video_llama = VL(cfg_path=CFG_PATH, model_type=MODEL_TYPE) +tokenizer = video_llama.model.llama_tokenizer +streamer = TextIteratorStreamer(tokenizer, skip_prompt=True) + +vis_processor_cfg = video_llama.cfg.datasets_cfg.webvid.vis_processor.train +vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg) + +chat = Chat(video_llama.model, vis_processor, device="cpu") + +uvicorn.run(app, host=args.host, port=args.port) diff --git a/comps/lvms/video-llama/server/start.sh b/comps/lvms/video-llama/server/start.sh new file mode 100644 index 000000000..f016ad1a8 --- /dev/null +++ b/comps/lvms/video-llama/server/start.sh @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# /bin/bash +# Download models +MODEL_REPO=https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-7B-Finetuned +llm_download=${llm_download} +echo "llm_download: ${llm_download}" +if [ "$llm_download" = "True" ]; then + # clean if exists + rm -rf /home/user/model/Video-LLaMA-2-7B-Finetuned + + echo "Please wait for model download..." + git lfs install && git clone ${MODEL_REPO} /home/user/model/Video-LLaMA-2-7B-Finetuned + # rm Video-LLaMA-2-7B-Finetuned/AL*.pth Video-LLaMA-2-7B-Finetuned/imagebind_huge.pth +elif [ "$llm_download" = "False" ]; then + echo "No model download" +else + echo "llm_download should be True or False" + exit 1 +fi + +python server.py diff --git a/comps/lvms/video-llama/server/video-llama.patch.tar b/comps/lvms/video-llama/server/video-llama.patch.tar new file mode 100644 index 000000000..7e9826f11 Binary files /dev/null and b/comps/lvms/video-llama/server/video-llama.patch.tar differ diff --git a/comps/lvms/video-llama/server/video_llama_config/video_llama_eval_only_vl.yaml b/comps/lvms/video-llama/server/video_llama_config/video_llama_eval_only_vl.yaml new file mode 100644 index 000000000..3b239fe0f --- /dev/null +++ b/comps/lvms/video-llama/server/video_llama_config/video_llama_eval_only_vl.yaml @@ -0,0 +1,39 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +model: + arch: video_llama + model_type: pretrain_llama_v2 # pretrain_llama_v2 pretrain_vicuna + freeze_vit: True + freeze_qformer: True + max_txt_len: 256 #512 + end_sym: "###" + low_resource: False + + frozen_llama_proj: True + + llama_model: "/home/user/model/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf" + ckpt: "/home/user/model/Video-LLaMA-2-7B-Finetuned/VL_LLaMA_2_7B_Finetuned.pth" + + equip_audio_branch: False # whether equips the audio branch + fusion_head_layers: 2 + max_frame_pos: 32 #cannot be changed - frozen with training model + fusion_header_type: "seqTransf" + +datasets: + webvid: + vis_processor: + train: + name: "alpro_video_eval" + n_frms: 32 #8 + image_size: 224 + text_processor: + train: + name: "blip_caption" + +run: + task: video_text_pretrain + seed: 10 + input_video_dir: "data/testset-raw" + input_questions_json: "data/testset-raw/testset_small.json" + output_dir: "output/origFT_videollama_testset_small_results_only_vl" diff --git a/comps/nginx/README.md b/comps/nginx/README.md new file mode 100644 index 000000000..cae340331 --- /dev/null +++ b/comps/nginx/README.md @@ -0,0 +1,76 @@ +# Nginx for Microservice Forwarding + +[Nginx](https://nginx.org/en/) serves as a versatile tool in the realm of web services, functioning as an HTTP and reverse proxy server, and a generic TCP/UDP proxy server. + +In GenAIComps, we utilize nginx to streamline our network services. We provide an nginx Docker container, which is essential for deploying [OPEA](https://github.com/opea-project) microservices, mega services, and managing endpoint and port forwarding for frontend services. Our use of Docker to launch nginx ensures a flexible and reliable service deployment, optimizing our infrastructure to meet diverse operational demands. + +## ๐Ÿš€1. Build Docker Image + +```bash +cd ../.. +docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/docker/Dockerfile . +``` + +## ๐Ÿš€2. Environment Settings + +To use Nginx for service forwarding, users need to setup environment variables first. The variables set here will be substituted in `nginx.conf.template`. + +For example, if you want to use Nginx to forward the frontend, backend services of a [ChatQnA](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA) example, setup environment variables as below. + +```bash +export FRONTEND_SERVICE_IP=${your_frontend_service_ip} +export FRONTEND_SERVICE_PORT=5173 +export BACKEND_SERVICE_NAME=chatqna +export BACKEND_SERVICE_IP=${your_backend_service_ip} +export BACKEND_SERVICE_PORT=8888 +export NGINX_PORT=${your_nginx_port} +``` + +Nginx will expose `80` as the default port. You can choose other available ports as `${your_nginx_port}` for Nginx docker. + +For other examples, change the variable above following the corresponding READMEs. + +If you want to forward other services like `dataprep` using Nginx, add the code below in `nginx.conf.template` and setup the right parameters for it. Notice that the `${dataprep_service_endpoint}` need to be the form of `/v1/xxx/xxx`. + +```bash +location ${dataprep_service_endpoint} { + proxy_pass http://${dataprep_service_ip}:${dataprep_service_port}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; +} +``` + +## ๐Ÿš€3. Start Nginx Service + +### 3.1 Start with CLI (Option 1) + +```bash +docker run -d --name opea-nginx -p ${NGINX_PORT}:80 \ + -e FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} \ + -e FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} \ + -e BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} \ + -e BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} \ + -e BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} \ + opea/nginx:latest +``` + +### 3.2 Start with Docker Compose (Option 2) + +```bash +cd docker +docker compose -f docker_compose.yaml up -d +``` + +## ๐Ÿš€4. Consume Forwarded Service + +To consume the backend service, use the curl command as below (this is a ChatQnA service example): + +```bash +curl http://${your_nginx_ip}:${your_nginx_port}/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is Deep Learning?"}' +``` + +For the frontend service, open the following URL in your browser: `http://${your_nginx_ip}:${your_nginx_port}`. diff --git a/comps/nginx/docker/Dockerfile b/comps/nginx/docker/Dockerfile new file mode 100644 index 000000000..447d3946a --- /dev/null +++ b/comps/nginx/docker/Dockerfile @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +FROM nginx:alpine + +RUN apk add --no-cache gettext + +COPY comps/nginx/docker/nginx.conf.template /etc/nginx/nginx.conf.template + +ENV FRONTEND_SERVICE_IP=localhost +ENV FRONTEND_SERVICE_PORT=5173 +ENV BACKEND_SERVICE_NAME=chatqna +ENV BACKEND_SERVICE_IP=localhost +ENV BACKEND_SERVICE_PORT=8888 + +COPY comps/nginx/docker/start-nginx.sh /usr/local/bin/start-nginx.sh +RUN chmod +x /usr/local/bin/start-nginx.sh + +CMD ["/usr/local/bin/start-nginx.sh"] diff --git a/comps/nginx/docker/docker-compose.yaml b/comps/nginx/docker/docker-compose.yaml new file mode 100644 index 000000000..757c75ca7 --- /dev/null +++ b/comps/nginx/docker/docker-compose.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + opea-nginx-server: + image: opea/nginx:latest + container_name: opea-nginx-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/comps/nginx/docker/nginx.conf.template b/comps/nginx/docker/nginx.conf.template new file mode 100644 index 000000000..5ba7223fc --- /dev/null +++ b/comps/nginx/docker/nginx.conf.template @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +server { + listen 80; + listen [::]:80; + + location /home { + alias /usr/share/nginx/html/index.html; + } + + location / { + proxy_pass http://${FRONTEND_SERVICE_IP}:${FRONTEND_SERVICE_PORT}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/${BACKEND_SERVICE_NAME} { + proxy_pass http://${BACKEND_SERVICE_IP}:${BACKEND_SERVICE_PORT}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/comps/nginx/docker/start-nginx.sh b/comps/nginx/docker/start-nginx.sh new file mode 100644 index 000000000..2a2a4d242 --- /dev/null +++ b/comps/nginx/docker/start-nginx.sh @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +#!/bin/sh +envsubst '${FRONTEND_SERVICE_IP} ${FRONTEND_SERVICE_PORT} ${BACKEND_SERVICE_NAME} ${BACKEND_SERVICE_IP} ${BACKEND_SERVICE_PORT}' < /etc/nginx/nginx.conf.template > /etc/nginx/conf.d/default.conf +nginx -g 'daemon off;' diff --git a/comps/prompt_registry/mongo/README.md b/comps/prompt_registry/mongo/README.md index 799fec7ca..86baaaf27 100644 --- a/comps/prompt_registry/mongo/README.md +++ b/comps/prompt_registry/mongo/README.md @@ -21,16 +21,16 @@ Start document preparation microservice for Milvus with below command. python prompt.py ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ~/GenAIComps docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile . ``` -## Run Docker with CLI +### Run Docker with CLI 1. Run mongoDB image @@ -41,10 +41,10 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest 2. Run prompt_registry service ```bash -docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest +docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest ``` -## Invoke Microservice +### Invoke Microservice Once prompt_registry service is up and running, users can access the database by using API endpoint below. Each API serves different purpose and return appropriate response. @@ -64,7 +64,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -73,7 +73,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -84,7 +84,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6013/v1/prompt/get \ + http://{host_ip}:6012/v1/prompt/get \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ @@ -95,7 +95,7 @@ curl -X 'POST' \ ```bash curl -X 'POST' \ - http://{host_ip}:6014/v1/prompt/delete \ + http://{host_ip}:6012/v1/prompt/delete \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ diff --git a/comps/prompt_registry/mongo/docker/Dockerfile b/comps/prompt_registry/mongo/docker/Dockerfile index db2e9c59d..a2845430c 100644 --- a/comps/prompt_registry/mongo/docker/Dockerfile +++ b/comps/prompt_registry/mongo/docker/Dockerfile @@ -8,8 +8,7 @@ ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -28,4 +27,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/prompt_registry/mongo -ENTRYPOINT ["python", "prompt.py"] \ No newline at end of file +ENTRYPOINT ["python", "prompt.py"] diff --git a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml index 23db61c89..f6cb68831 100644 --- a/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml +++ b/comps/prompt_registry/mongo/docker/docker-compose-prompt-registry-mongo.yaml @@ -19,8 +19,6 @@ services: container_name: promptregistry-mongo-server ports: - "6012:6012" - - "6013:6013" - - "6014:6014" ipc: host environment: http_proxy: ${http_proxy} @@ -28,6 +26,7 @@ services: no_proxy: ${no_proxy} MONGO_HOST: ${MONGO_HOST} MONGO_PORT: ${MONGO_PORT} + DB_NAME: ${DB_NAME} COLLECTION_NAME: ${COLLECTION_NAME} restart: unless-stopped diff --git a/comps/prompt_registry/mongo/prompt.py b/comps/prompt_registry/mongo/prompt.py index 4a3f52bc1..fa54ea0d3 100644 --- a/comps/prompt_registry/mongo/prompt.py +++ b/comps/prompt_registry/mongo/prompt.py @@ -1,12 +1,17 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os from typing import Optional from mongo_store import PromptStore from pydantic import BaseModel +from comps import CustomLogger from comps.cores.mega.micro_service import opea_microservices, register_microservice +logger = CustomLogger("prompt_mongo") +logflag = os.getenv("LOGFLAG", False) + class PromptCreate(BaseModel): """This class represents the data model for creating and storing a new prompt in the database. @@ -34,7 +39,7 @@ class PromptId(BaseModel): @register_microservice( - name="opea_service@prompt_mongo_create", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/create", host="0.0.0.0", input_datatype=PromptCreate, @@ -49,24 +54,27 @@ async def create_prompt(prompt: PromptCreate): Returns: JSON (PromptResponse): PromptResponse class object, None otherwise. """ + if logflag: + logger.info(prompt) try: prompt_store = PromptStore(prompt.user) prompt_store.initialize_storage() response = await prompt_store.save_prompt(prompt) - + if logflag: + logger.info(response) return response except Exception as e: - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") return None @register_microservice( - name="opea_service@prompt_mongo_get", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/get", host="0.0.0.0", input_datatype=PromptId, - port=6013, + port=6012, ) async def get_prompt(prompt: PromptId): """Retrieves prompt from prompt store based on provided PromptId or user. @@ -77,6 +85,8 @@ async def get_prompt(prompt: PromptId): Returns: JSON: Retrieved prompt data if successful, None otherwise. """ + if logflag: + logger.info(prompt) try: prompt_store = PromptStore(prompt.user) prompt_store.initialize_storage() @@ -86,20 +96,21 @@ async def get_prompt(prompt: PromptId): response = await prompt_store.prompt_search(prompt.prompt_text) else: response = await prompt_store.get_all_prompt_of_user() - + if logflag: + logger.info(response) return response except Exception as e: - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") return None @register_microservice( - name="opea_service@prompt_mongo_delete", + name="opea_service@prompt_mongo", endpoint="/v1/prompt/delete", host="0.0.0.0", input_datatype=PromptId, - port=6014, + port=6012, ) async def delete_prompt(prompt: PromptId): """Delete a prompt from prompt store by given PromptId. @@ -110,6 +121,8 @@ async def delete_prompt(prompt: PromptId): Returns: Result of deletion if successful, None otherwise. """ + if logflag: + logger.info(prompt) try: prompt_store = PromptStore(prompt.user) prompt_store.initialize_storage() @@ -117,14 +130,14 @@ async def delete_prompt(prompt: PromptId): raise Exception("Prompt id is required.") else: response = await prompt_store.delete_prompt(prompt.prompt_id) + if logflag: + logger.info(response) return response except Exception as e: - print(f"An error occurred: {str(e)}") + logger.info(f"An error occurred: {str(e)}") return None if __name__ == "__main__": - opea_microservices["opea_service@prompt_mongo_get"].start() - opea_microservices["opea_service@prompt_mongo_create"].start() - opea_microservices["opea_service@prompt_mongo_delete"].start() + opea_microservices["opea_service@prompt_mongo"].start() diff --git a/comps/ragas/tgi/Dockerfile b/comps/ragas/tgi/Dockerfile index 55d4229a0..f55d8de0e 100644 --- a/comps/ragas/tgi/Dockerfile +++ b/comps/ragas/tgi/Dockerfile @@ -5,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/ragas/tgi/llm.py b/comps/ragas/tgi/llm.py index f31c66657..0b67164a4 100644 --- a/comps/ragas/tgi/llm.py +++ b/comps/ragas/tgi/llm.py @@ -1,86 +1,99 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from datasets import Dataset -from langchain_community.embeddings import ( - HuggingFaceBgeEmbeddings, - HuggingFaceEmbeddings, - HuggingFaceHubEmbeddings, - HuggingFaceInstructEmbeddings, -) -from langchain_community.llms import HuggingFaceEndpoint -from langsmith import traceable -from ragas import evaluate -from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness - -from comps import GeneratedDoc, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice - -tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") -EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") - - -@register_microservice( - name="opea_service@ragas_tgi_llm", - service_type=ServiceType.RAGAS, - endpoint="/v1/ragas", - host="0.0.0.0", - port=9050, - input_datatype=RAGASParams, - output_datatype=RAGASScores, -) -@traceable(run_type="llm") -def llm_generate(input: RAGASParams): - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - llm = HuggingFaceEndpoint( - endpoint_url=llm_endpoint, - max_new_tokens=input.max_new_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, - streaming=input.streaming, - timeout=600, - ) - - data_collections = { - "question": input.questions, - "answer": input.answers, - "docs": input.docs, - "ground_truth": input.groundtruths, - } - dataset = Dataset.from_dict(data_collections) - - score = evaluate( - dataset, - metrics=[answer_relevancy, faithfulness, context_recall, context_precision], - llm=llm, - embeddings=embedder, - ) - df = score.to_pandas() - answer_relevancy_average = df["answer_relevancy"][:].mean() - faithfulness_average = df["faithfulness"][:].mean() - context_recall_average = df["context_recall"][:].mean() - context_precision_average = df["context_precision"][:].mean() - - return RAGASScores( - answer_relevancy=answer_relevancy_average, - faithfulness=faithfulness_average, - context_recallL=context_recall_average, - context_precision=context_precision_average, - ) - - -if __name__ == "__main__": - opea_microservices["opea_service@llm_tgi"].start() +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from datasets import Dataset +from langchain_community.embeddings import ( + HuggingFaceBgeEmbeddings, + HuggingFaceEmbeddings, + HuggingFaceHubEmbeddings, + HuggingFaceInstructEmbeddings, +) +from langchain_community.llms import HuggingFaceEndpoint +from ragas import evaluate +from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness + +from comps import ( + CustomLogger, + GeneratedDoc, + RAGASParams, + RAGASScores, + ServiceType, + opea_microservices, + register_microservice, +) + +logger = CustomLogger("ragas_tgi_llm") +logflag = os.getenv("LOGFLAG", False) + +tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + + +@register_microservice( + name="opea_service@ragas_tgi_llm", + service_type=ServiceType.RAGAS, + endpoint="/v1/ragas", + host="0.0.0.0", + port=9050, + input_datatype=RAGASParams, + output_datatype=RAGASScores, +) +def llm_generate(input: RAGASParams): + if logflag: + logger.info(input) + llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") + + # Create vectorstore + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + llm = HuggingFaceEndpoint( + endpoint_url=llm_endpoint, + max_new_tokens=input.max_new_tokens, + top_k=input.top_k, + top_p=input.top_p, + typical_p=input.typical_p, + temperature=input.temperature, + repetition_penalty=input.repetition_penalty, + streaming=input.streaming, + timeout=600, + ) + + data_collections = { + "question": input.questions, + "answer": input.answers, + "docs": input.docs, + "ground_truth": input.groundtruths, + } + dataset = Dataset.from_dict(data_collections) + + score = evaluate( + dataset, + metrics=[answer_relevancy, faithfulness, context_recall, context_precision], + llm=llm, + embeddings=embedder, + ) + df = score.to_pandas() + answer_relevancy_average = df["answer_relevancy"][:].mean() + faithfulness_average = df["faithfulness"][:].mean() + context_recall_average = df["context_recall"][:].mean() + context_precision_average = df["context_precision"][:].mean() + result = RAGASScores( + answer_relevancy=answer_relevancy_average, + faithfulness=faithfulness_average, + context_recallL=context_recall_average, + context_precision=context_precision_average, + ) + if logflag: + logger.info(result) + return result + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt index 3fa49150e..7839ef15d 100644 --- a/comps/ragas/tgi/requirements.txt +++ b/comps/ragas/tgi/requirements.txt @@ -1,14 +1,13 @@ -datasets -docarray[full] -fastapi -huggingface_hub -langchain==0.1.16 -langsmith -opentelemetry-api -opentelemetry-exporter-otlp -opentelemetry-sdk -prometheus-fastapi-instrumentator -ragas -shortuuid -transformers -uvicorn +datasets +docarray[full] +fastapi +huggingface_hub +langchain==0.1.16 +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +ragas +shortuuid +transformers +uvicorn diff --git a/comps/reranks/README.md b/comps/reranks/README.md index ecec38272..9b5dc9042 100644 --- a/comps/reranks/README.md +++ b/comps/reranks/README.md @@ -2,17 +2,17 @@ The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the Reranking microservice, you must first install the required python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start TEI Service +### 1.2 Start TEI Service ```bash export HF_TOKEN=${your_hf_api_token} @@ -25,7 +25,7 @@ volume=$PWD/data docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $RERANK_MODEL_ID --revision $revision --hf-api-token $HF_TOKEN ``` -## 1.3 Verify the TEI Service +### 1.3 Verify the TEI Service ```bash curl 127.0.0.1:6060/rerank \ @@ -34,18 +34,18 @@ curl 127.0.0.1:6060/rerank \ -H 'Content-Type: application/json' ``` -## 1.4 Start Reranking Service with Python Script +### 1.4 Start Reranking Service with Python Script ```bash export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060" python reranking_tei_xeon.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) If you start an Reranking microservice with docker, the `docker_compose_reranking.yaml` file will automatically start a TEI service with docker. -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export HF_TOKEN=${your_hf_api_token} @@ -55,7 +55,7 @@ export LANGCHAIN_PROJECT="opea/reranks" export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ @@ -69,22 +69,22 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="reranking-tei-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e LANGCHAIN_API_KEY=$LANGCHAIN_API_KEY opea/reranking-tei:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash cd langchain/docker docker compose -f docker_compose_reranking.yaml up -d ``` -# ๐Ÿš€3. Consume Reranking Service +## ๐Ÿš€3. Consume Reranking Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:8000/v1/health_check \ @@ -92,7 +92,7 @@ curl http://localhost:8000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Reranking Service +### 3.2 Consume Reranking Service ```bash curl http://localhost:8000/v1/reranking \ diff --git a/comps/reranks/fastrag/README.md b/comps/reranks/fastrag/README.md index fac481860..025799d05 100644 --- a/comps/reranks/fastrag/README.md +++ b/comps/reranks/fastrag/README.md @@ -2,17 +2,17 @@ The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the Reranking microservice, you must first install the required python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Install fastRAG +### 1.2 Install fastRAG ```bash git clone https://github.com/IntelLabs/fastRAG.git @@ -21,37 +21,37 @@ pip install . pip install .[intel] ``` -## 1.3 Start Reranking Service with Python Script +### 1.3 Start Reranking Service with Python Script ```bash export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" python local_reranking.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ docker build -t opea/reranking-fastrag:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/fastrag/docker/Dockerfile . ``` -## 2.3 Run Docker +### 2.3 Run Docker ```bash docker run -d --name="reranking-fastrag-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:latest ``` -# ๐Ÿš€3. Consume Reranking Service +## ๐Ÿš€3. Consume Reranking Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:8000/v1/health_check \ @@ -59,7 +59,7 @@ curl http://localhost:8000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Reranking Service +### 3.2 Consume Reranking Service ```bash curl http://localhost:8000/v1/reranking \ diff --git a/comps/reranks/fastrag/docker/Dockerfile b/comps/reranks/fastrag/docker/Dockerfile index 8372e33b7..2bf8e02b5 100644 --- a/comps/reranks/fastrag/docker/Dockerfile +++ b/comps/reranks/fastrag/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -7,10 +6,9 @@ FROM python:3.10-slim ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + git \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim \ - git + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -33,4 +31,3 @@ ENV PYTHONPATH=$PYTHONPH:/home/user WORKDIR /home/user/comps/reranks/fastrag ENTRYPOINT ["python", "local_reranking.py"] - diff --git a/comps/reranks/fastrag/local_reranking.py b/comps/reranks/fastrag/local_reranking.py index f11ebc1f9..d6f33193c 100644 --- a/comps/reranks/fastrag/local_reranking.py +++ b/comps/reranks/fastrag/local_reranking.py @@ -1,14 +1,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os + from config import RANKER_MODEL from fastrag.rankers import IPEXBiEncoderSimilarityRanker from haystack import Document -from langsmith import traceable +from comps import CustomLogger from comps.cores.mega.micro_service import ServiceType, opea_microservices, register_microservice from comps.cores.proto.docarray import RerankedDoc, SearchedDoc, TextDoc +logger = CustomLogger("local_reranking") +logflag = os.getenv("LOGFLAG", False) + @register_microservice( name="opea_service@local_reranking", @@ -19,14 +24,17 @@ input_datatype=SearchedDoc, output_datatype=RerankedDoc, ) -@traceable(run_type="llm") def reranking(input: SearchedDoc) -> RerankedDoc: + if logflag: + logger.info(input) documents = [] for i, d in enumerate(input.retrieved_docs): documents.append(Document(content=d.text, id=(i + 1))) sorted_documents = reranker_model.run(input.initial_query, documents)["documents"] ranked_documents = [TextDoc(id=doc.id, text=doc.content) for doc in sorted_documents] res = RerankedDoc(initial_query=input.initial_query, reranked_docs=ranked_documents) + if logflag: + logger.info(res) return res diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt index c4ded91c8..e8166a77a 100644 --- a/comps/reranks/fastrag/requirements.txt +++ b/comps/reranks/fastrag/requirements.txt @@ -2,7 +2,6 @@ docarray[full] fastapi haystack-ai langchain -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/reranks/langchain-mosec/README.md b/comps/reranks/langchain-mosec/README.md index d67cf78b0..cd7e36ce0 100644 --- a/comps/reranks/langchain-mosec/README.md +++ b/comps/reranks/langchain-mosec/README.md @@ -1,29 +1,29 @@ # build reranking Mosec endpoint docker image ``` -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t reranking-langchain-mosec:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec-endpoint:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . ``` -# build reranking microservice docker image +## build reranking microservice docker image ``` docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec:latest -f comps/reranks/langchain-mosec/docker/Dockerfile . ``` -# launch Mosec endpoint docker container +## launch Mosec endpoint docker container ``` -docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 reranking-langchain-mosec:latest +docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000 opea/reranking-langchain-mosec-endpoint:latest ``` -# launch embedding microservice docker container +## launch embedding microservice docker container ``` export MOSEC_RERANKING_ENDPOINT=http://127.0.0.1:6001 docker run -d --name="reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:latest ``` -# run client test +## run client test ``` curl http://localhost:6000/v1/reranking \ diff --git a/comps/reranks/langchain-mosec/docker/Dockerfile b/comps/reranks/langchain-mosec/docker/Dockerfile index 9a678dc4a..7f3714e60 100644 --- a/comps/reranks/langchain-mosec/docker/Dockerfile +++ b/comps/reranks/langchain-mosec/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -25,4 +23,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/reranks/langchain-mosec ENTRYPOINT ["python", "reranking_mosec_xeon.py"] - diff --git a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile index 0c634fb90..dcf38aee5 100644 --- a/comps/reranks/langchain-mosec/mosec-docker/Dockerfile +++ b/comps/reranks/langchain-mosec/mosec-docker/Dockerfile @@ -2,22 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 From ubuntu:22.04 +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ ARG DEBIAN_FRONTEND=noninteractive ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive -COPY comps /root/comps +COPY comps /home/user/comps RUN apt update && apt install -y python3 python3-pip + RUN pip3 install torch==2.2.2 torchvision --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu RUN pip3 install intel-extension-for-pytorch==2.2.0 RUN pip3 install transformers sentence-transformers RUN pip3 install llmspec mosec -RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /root/bge-reranker-large - -ENV EMB_MODEL="/root/bge-reranker-large/" +RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /home/user/bge-reranker-large +USER user +ENV EMB_MODEL="/home/user/bge-reranker-large/" -WORKDIR /root/comps/reranks/langchain-mosec/mosec-docker +WORKDIR /home/user/comps/reranks/langchain-mosec/mosec-docker CMD ["python3", "server-ipex.py"] diff --git a/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py b/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py index cd81fbf33..c7127c6ea 100644 --- a/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py +++ b/comps/reranks/langchain-mosec/mosec-docker/server-ipex.py @@ -16,7 +16,7 @@ from torch.utils.data import DataLoader from tqdm.autonotebook import tqdm, trange -DEFAULT_MODEL = "/root/bge-reranker-large" +DEFAULT_MODEL = "/home/user/bge-reranker-large" class MyCrossEncoder(CrossEncoder): diff --git a/comps/reranks/langchain-mosec/reranking_mosec_xeon.py b/comps/reranks/langchain-mosec/reranking_mosec_xeon.py index 4640433af..1f222beb3 100644 --- a/comps/reranks/langchain-mosec/reranking_mosec_xeon.py +++ b/comps/reranks/langchain-mosec/reranking_mosec_xeon.py @@ -22,9 +22,9 @@ import requests from langchain_core.prompts import ChatPromptTemplate -from langsmith import traceable from comps import ( + CustomLogger, LLMParamsDoc, SearchedDoc, ServiceType, @@ -34,6 +34,9 @@ statistics_dict, ) +logger = CustomLogger("reranking_mosec_xeon") +logflag = os.getenv("LOGFLAG", False) + @register_microservice( name="opea_service@reranking_mosec_xeon", @@ -44,10 +47,10 @@ input_datatype=SearchedDoc, output_datatype=LLMParamsDoc, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@reranking_mosec_xeon"]) def reranking(input: SearchedDoc) -> LLMParamsDoc: - print("reranking input: ", input) + if logflag: + logger.info("reranking input: ", input) start = time.time() if input.retrieved_docs: docs = [doc.text for doc in input.retrieved_docs] @@ -69,8 +72,12 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc: prompt = ChatPromptTemplate.from_template(template) final_prompt = prompt.format(context=doc.text, question=input.initial_query) statistics_dict["opea_service@reranking_mosec_xeon"].append_latency(time.time() - start, None) + if logflag: + logger.info(final_prompt.strip()) return LLMParamsDoc(query=final_prompt.strip()) else: + if logflag: + logger.info(input.initial_query) return LLMParamsDoc(query=input.initial_query) diff --git a/comps/reranks/requirements.txt b/comps/reranks/requirements.txt index 67503038f..7260862a3 100644 --- a/comps/reranks/requirements.txt +++ b/comps/reranks/requirements.txt @@ -2,7 +2,6 @@ aiohttp docarray[full] fastapi httpx -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/reranks/tei/docker/Dockerfile b/comps/reranks/tei/docker/Dockerfile index 851fbfd58..2a8e7959f 100644 --- a/comps/reranks/tei/docker/Dockerfile +++ b/comps/reranks/tei/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -10,8 +9,7 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -30,4 +28,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/reranks/tei ENTRYPOINT ["python", "reranking_tei.py"] - diff --git a/comps/reranks/tei/local_reranking.py b/comps/reranks/tei/local_reranking.py index d0fa8a79c..284cca7e6 100644 --- a/comps/reranks/tei/local_reranking.py +++ b/comps/reranks/tei/local_reranking.py @@ -1,10 +1,22 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from langsmith import traceable +import os + from sentence_transformers import CrossEncoder -from comps import RerankedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import ( + CustomLogger, + RerankedDoc, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, +) + +logger = CustomLogger("local_reranking") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -16,12 +28,15 @@ input_datatype=SearchedDoc, output_datatype=RerankedDoc, ) -@traceable(run_type="llm") def reranking(input: SearchedDoc) -> RerankedDoc: + if logflag: + logger.info(input) query_and_docs = [(input.initial_query, doc.text) for doc in input.retrieved_docs] scores = reranker_model.predict(query_and_docs) first_passage = sorted(list(zip(input.retrieved_docs, scores)), key=lambda x: x[1], reverse=True)[0][0] res = RerankedDoc(initial_query=input.initial_query, reranked_docs=[first_passage]) + if logflag: + logger.info(res) return res diff --git a/comps/reranks/tei/reranking_tei.py b/comps/reranks/tei/reranking_tei.py index 1beaa83f7..cb423cf83 100644 --- a/comps/reranks/tei/reranking_tei.py +++ b/comps/reranks/tei/reranking_tei.py @@ -6,11 +6,12 @@ import os import re import time +from typing import Union import requests -from langsmith import traceable from comps import ( + CustomLogger, LLMParamsDoc, SearchedDoc, ServiceType, @@ -19,6 +20,15 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RerankingRequest, + RerankingResponse, + RerankingResponseData, +) + +logger = CustomLogger("reranking_tgi_gaudi") +logflag = os.getenv("LOGFLAG", False) @register_microservice( @@ -30,44 +40,54 @@ input_datatype=SearchedDoc, output_datatype=LLMParamsDoc, ) -@traceable(run_type="llm") @register_statistics(names=["opea_service@reranking_tgi_gaudi"]) -def reranking(input: SearchedDoc) -> LLMParamsDoc: +def reranking( + input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest] +) -> Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest]: + if logflag: + logger.info(input) start = time.time() + reranking_results = [] if input.retrieved_docs: docs = [doc.text for doc in input.retrieved_docs] url = tei_reranking_endpoint + "/rerank" - data = {"query": input.initial_query, "texts": docs} + if isinstance(input, SearchedDoc): + query = input.initial_query + else: + # for RerankingRequest, ChatCompletionRequest + query = input.input + data = {"query": query, "texts": docs} headers = {"Content-Type": "application/json"} response = requests.post(url, data=json.dumps(data), headers=headers) response_data = response.json() - best_response_list = heapq.nlargest(input.top_n, response_data, key=lambda x: x["score"]) - context_str = "" - for best_response in best_response_list: - context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text - if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: - # chinese context - template = """ -### ไฝ ๅฐ†ๆ‰ฎๆผ”ไธ€ไธชไนไบŽๅŠฉไบบใ€ๅฐŠ้‡ไป–ไบบๅนถ่ฏšๅฎž็š„ๅŠฉๆ‰‹๏ผŒไฝ ็š„็›ฎๆ ‡ๆ˜ฏๅธฎๅŠฉ็”จๆˆท่งฃ็ญ”้—ฎ้ข˜ใ€‚ๆœ‰ๆ•ˆๅœฐๅˆฉ็”จๆฅ่‡ชๆœฌๅœฐ็Ÿฅ่ฏ†ๅบ“็š„ๆœ็ดข็ป“ๆžœใ€‚็กฎไฟไฝ ็š„ๅ›ž็ญ”ไธญๅชๅŒ…ๅซ็›ธๅ…ณไฟกๆฏใ€‚ๅฆ‚ๆžœไฝ ไธ็กฎๅฎš้—ฎ้ข˜็š„็ญ”ๆกˆ๏ผŒ่ฏท้ฟๅ…ๅˆ†ไบซไธๅ‡†็กฎ็š„ไฟกๆฏใ€‚ -### ๆœ็ดข็ป“ๆžœ๏ผš{context} -### ้—ฎ้ข˜๏ผš{question} -### ๅ›ž็ญ”๏ผš -""" - else: - template = """ -### You are a helpful, respectful and honest assistant to help the user with questions. \ -Please refer to the search results obtained from the local knowledge base. \ -But be careful to not incorporate the information that you think is not relevant to the question. \ -If you don't know the answer to a question, please don't share false information. \ -### Search results: {context} \n -### Question: {question} \n -### Answer: -""" - final_prompt = template.format(context=context_str, question=input.initial_query) - statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) - return LLMParamsDoc(query=final_prompt.strip()) + + for best_response in response_data[: input.top_n]: + reranking_results.append( + {"text": input.retrieved_docs[best_response["index"]].text, "score": best_response["score"]} + ) + + statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) + if isinstance(input, SearchedDoc): + result = [doc["text"] for doc in reranking_results] + if logflag: + logger.info(result) + return LLMParamsDoc(query=input.initial_query, documents=result) else: - return LLMParamsDoc(query=input.initial_query) + reranking_docs = [] + for doc in reranking_results: + reranking_docs.append(RerankingResponseData(text=doc["text"], score=doc["score"])) + if isinstance(input, RerankingRequest): + result = RerankingResponse(reranked_docs=reranking_docs) + if logflag: + logger.info(result) + return result + + if isinstance(input, ChatCompletionRequest): + input.reranked_docs = reranking_docs + input.documents = [doc["text"] for doc in reranking_results] + if logflag: + logger.info(input) + return input if __name__ == "__main__": diff --git a/comps/reranks/video-rag-qna/README.md b/comps/reranks/video-rag-qna/README.md new file mode 100644 index 000000000..9edfe4118 --- /dev/null +++ b/comps/reranks/video-rag-qna/README.md @@ -0,0 +1,62 @@ +# Rerank Microservice + +This is a Docker-based microservice that do result rerank for VideoRAGQnA use case. Local rerank is used rather than rerank model. + +For the `VideoRAGQnA` usecase, during the data preparation phase, frames are extracted from videos and stored in a vector database. To identify the most relevant video, we count the occurrences of each video source among the retrieved data with rerank function `get_top_doc`. This sorts the video as a descending list of names, ranked by their degree of match with the query. Then we could send the `top_n` videos to the downstream LVM. + +# ๐Ÿš€1. Start Microservice with Docker + +## 1.1 Build Images + +```bash +cd GenAIComps +docker build --no-cache -t opea/reranking-videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/video-rag-qna/docker/Dockerfile . +``` + +## 1.2 Start Rerank Service + +```bash +docker compose -f comps/reranks/video-rag-qna/docker/docker_compose_reranking.yaml up -d +# wait until ready +until docker logs reranking-videoragqna-server 2>&1 | grep -q "Uvicorn running on"; do + sleep 2 +done +``` + +Available configuration by environment variable: + +- CHUNK_DURATION: target chunk duration, should be aligned with VideoRAGQnA dataprep. Default 10s. + +# โœ… 2. Test + +```bash +export ip_address=$(hostname -I | awk '{print $1}') +curl -X 'POST' \ +"http://${ip_address}:8000/v1/reranking" \ +-H 'accept: application/json' \ +-H 'Content-Type: application/json' \ +-d '{ + "retrieved_docs": [{"doc": [{"text": "this is the retrieved text"}]}], + "initial_query": "this is the query", + "top_n": 1, + "metadata": [ + {"other_key": "value", "video":"top_video_name", "timestamp":"20"}, + {"other_key": "value", "video":"second_video_name", "timestamp":"40"}, + {"other_key": "value", "video":"top_video_name", "timestamp":"20"} + ] +}' +``` + +The result should be: + +```bash +{"id":"random number","video_url":"http://0.0.0.0:6005/top_video_name","chunk_start":20.0,"chunk_duration":10.0,"prompt":"this is the query","max_new_tokens":512} +``` + +# โ™ป๏ธ 3. Clean + +```bash +# remove the container +cid=$(docker ps -aq --filter "name=reranking-videoragqna-server") +if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +``` diff --git a/comps/reranks/video-rag-qna/docker/Dockerfile b/comps/reranks/video-rag-qna/docker/Dockerfile new file mode 100644 index 000000000..617f47b6a --- /dev/null +++ b/comps/reranks/video-rag-qna/docker/Dockerfile @@ -0,0 +1,24 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/reranks/video-rag-qna/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/reranks/video-rag-qna + +ENTRYPOINT ["python", "local_reranking.py"] \ No newline at end of file diff --git a/comps/reranks/video-rag-qna/docker/docker_compose_reranking.yaml b/comps/reranks/video-rag-qna/docker/docker_compose_reranking.yaml new file mode 100644 index 000000000..d819f331a --- /dev/null +++ b/comps/reranks/video-rag-qna/docker/docker_compose_reranking.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + reranking: + image: opea/reranking-videoragqna:latest + container_name: reranking-videoragqna-server + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + CHUNK_DURATION: ${CHUNK_DURATION} + FILE_SERVER_ENDPOINT: ${FILE_SERVER_ENDPOINT} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/reranks/video-rag-qna/local_reranking.py b/comps/reranks/video-rag-qna/local_reranking.py new file mode 100644 index 000000000..3a3043ca8 --- /dev/null +++ b/comps/reranks/video-rag-qna/local_reranking.py @@ -0,0 +1,89 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import time + +from comps import ( + LVMVideoDoc, + SearchedMultimodalDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +chunk_duration = os.getenv("CHUNK_DURATION", "10") or "10" +chunk_duration = float(chunk_duration) if chunk_duration.isdigit() else 10.0 + +file_server_endpoint = os.getenv("FILE_SERVER_ENDPOINT") or "http://0.0.0.0:6005" + +logging.basicConfig( + level=logging.INFO, format="%(levelname)s: [%(asctime)s] %(message)s", datefmt="%d/%m/%Y %I:%M:%S" +) + + +def get_top_doc(top_n, videos) -> list: + hit_score = {} + if videos is None: + return None + for video_name in videos: + try: + if video_name not in hit_score.keys(): + hit_score[video_name] = 0 + hit_score[video_name] += 1 + except KeyError as r: + logging.info(f"no video name {r}") + + x = dict(sorted(hit_score.items(), key=lambda item: -item[1])) # sorted dict of video name and score + top_n_names = list(x.keys())[:top_n] + logging.info(f"top docs = {x}") + logging.info(f"top n docs names = {top_n_names}") + + return top_n_names + + +def find_timestamp_from_video(metadata_list, video): + return next( + (metadata["timestamp"] for metadata in metadata_list if metadata["video"] == video), + None, + ) + + +@register_microservice( + name="opea_service@reranking_visual_rag", + service_type=ServiceType.RERANK, + endpoint="/v1/reranking", + host="0.0.0.0", + port=8000, + input_datatype=SearchedMultimodalDoc, + output_datatype=LVMVideoDoc, +) +@register_statistics(names=["opea_service@reranking_visual_rag"]) +def reranking(input: SearchedMultimodalDoc) -> LVMVideoDoc: + start = time.time() + + # get top video name from metadata + video_names = [meta["video"] for meta in input.metadata] + top_video_names = get_top_doc(input.top_n, video_names) + + # only use the first top video + timestamp = find_timestamp_from_video(input.metadata, top_video_names[0]) + video_url = f"{file_server_endpoint.rstrip('/')}/{top_video_names[0]}" + + result = LVMVideoDoc( + video_url=video_url, + prompt=input.initial_query, + chunk_start=timestamp, + chunk_duration=float(chunk_duration), + max_new_tokens=512, + ) + statistics_dict["opea_service@reranking_visual_rag"].append_latency(time.time() - start, None) + + return result + + +if __name__ == "__main__": + opea_microservices["opea_service@reranking_visual_rag"].start() diff --git a/comps/reranks/video-rag-qna/requirements.txt b/comps/reranks/video-rag-qna/requirements.txt new file mode 100644 index 000000000..c7cc250eb --- /dev/null +++ b/comps/reranks/video-rag-qna/requirements.txt @@ -0,0 +1,11 @@ +datasets +docarray +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +prometheus-fastapi-instrumentator +pydub +shortuuid +uvicorn diff --git a/comps/retrievers/haystack/qdrant/README.md b/comps/retrievers/haystack/qdrant/README.md index 70d2845ed..a7653e8ce 100644 --- a/comps/retrievers/haystack/qdrant/README.md +++ b/comps/retrievers/haystack/qdrant/README.md @@ -1,54 +1,59 @@ # Retriever Microservice with Qdrant -# ๐Ÿš€Start Microservice with Python +## 1. ๐Ÿš€Start Microservice with Python (Option 1) -## Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## Start Qdrant Server +### 1.2 Start Qdrant Server Please refer to this [readme](../../../vectorstores/langchain/qdrant/README.md). -## Setup Environment Variables +### 1.3 Setup Environment Variables ```bash -export http_proxy=${your_http_proxy} -export https_proxy=${your_https_proxy} export QDRANT_HOST=${your_qdrant_host_ip} export QDRANT_PORT=6333 export EMBED_DIMENSION=${your_embedding_dimension} export INDEX_NAME=${your_index_name} -export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} ``` -## Start Retriever Service +### 1.4 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python haystack/qdrant/retriever_qdrant.py ``` -# ๐Ÿš€Start Microservice with Docker +## 2. ๐Ÿš€Start Microservice with Docker (Option 2) -## Build Docker Image +### 2.1 Setup Environment Variables + +```bash +export QDRANT_HOST=${your_qdrant_host_ip} +export QDRANT_PORT=6333 +export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" +``` + +### 2.2 Build Docker Image ```bash cd ../../ docker build -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile . ``` -## Run Docker with CLI +### 2.3 Run Docker with CLI ```bash -docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} -e QDRANT_HOST=${your_qdrant_host_ip} -e QDRANT_PORT=${your_qdrant_port} opea/retriever-qdrant:latest +docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT opea/retriever-qdrant:latest ``` -# ๐Ÿš€3. Consume Retriever Service +## ๐Ÿš€3. Consume Retriever Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:7000/v1/health_check \ @@ -56,7 +61,7 @@ curl http://${your_ip}:7000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Embedding Service +### 3.2 Consume Embedding Service To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/comps/retrievers/haystack/qdrant/docker/Dockerfile b/comps/retrievers/haystack/qdrant/docker/Dockerfile index e9916c8db..eaeba479a 100644 --- a/comps/retrievers/haystack/qdrant/docker/Dockerfile +++ b/comps/retrievers/haystack/qdrant/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM python:3.11-slim RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -24,4 +22,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/haystack/qdrant -ENTRYPOINT ["python", "retriever_qdrant.py"] \ No newline at end of file +ENTRYPOINT ["python", "retriever_qdrant.py"] diff --git a/comps/retrievers/haystack/qdrant/requirements.txt b/comps/retrievers/haystack/qdrant/requirements.txt index 9b99c00fb..24458a6fb 100644 --- a/comps/retrievers/haystack/qdrant/requirements.txt +++ b/comps/retrievers/haystack/qdrant/requirements.txt @@ -3,7 +3,6 @@ easyocr fastapi haystack-ai==2.2.4 langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/haystack/qdrant/retriever_qdrant.py b/comps/retrievers/haystack/qdrant/retriever_qdrant.py index d57232563..aee2e6fe1 100644 --- a/comps/retrievers/haystack/qdrant/retriever_qdrant.py +++ b/comps/retrievers/haystack/qdrant/retriever_qdrant.py @@ -1,13 +1,17 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os + from haystack.components.embedders import HuggingFaceTEITextEmbedder, SentenceTransformersTextEmbedder from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever from haystack_integrations.document_stores.qdrant import QdrantDocumentStore -from langsmith import traceable from qdrant_config import EMBED_DIMENSION, EMBED_ENDPOINT, EMBED_MODEL, INDEX_NAME, QDRANT_HOST, QDRANT_PORT -from comps import EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import CustomLogger, EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("retriever_qdrant") +logflag = os.getenv("LOGFLAG", False) # Create a pipeline for querying a Qdrant document store @@ -28,11 +32,14 @@ def initialize_qdrant_retriever() -> QdrantEmbeddingRetriever: host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") def retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) search_res = retriever.run(query_embedding=input.embedding)["documents"] - searched_docs = [TextDoc(text=r.content) for r in search_res] + searched_docs = [TextDoc(text=r.content) for r in search_res if r.content] result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + if logflag: + logger.info(result) return result diff --git a/comps/retrievers/langchain/README.md b/comps/retrievers/langchain/README.md index 3de5cab21..300fbc099 100644 --- a/comps/retrievers/langchain/README.md +++ b/comps/retrievers/langchain/README.md @@ -6,14 +6,14 @@ The service primarily utilizes similarity measures in vector space to rapidly re Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. -# Retriever Microservice with Redis +## Retriever Microservice with Redis For details, please refer to this [readme](redis/README.md) -# Retriever Microservice with Milvus +## Retriever Microservice with Milvus For details, please refer to this [readme](milvus/README.md) -# Retriever Microservice with PGVector +## Retriever Microservice with PGVector For details, please refer to this [readme](pgvector/README.md) diff --git a/comps/retrievers/langchain/milvus/README.md b/comps/retrievers/langchain/milvus/README.md index d1bbc80da..6b9fe2045 100644 --- a/comps/retrievers/langchain/milvus/README.md +++ b/comps/retrievers/langchain/milvus/README.md @@ -1,18 +1,18 @@ # Retriever Microservice with Milvus -# ๐Ÿš€Start Microservice with Python +## ๐Ÿš€Start Microservice with Python -## Install Requirements +### Install Requirements ```bash pip install -r requirements.txt ``` -## Start Milvus Server +### Start Milvus Server Please refer to this [readme](../../../vectorstores/langchain/milvus/README.md). -## Setup Environment Variables +### Setup Environment Variables ```bash export no_proxy=${your_no_proxy} @@ -24,31 +24,31 @@ export COLLECTION_NAME=${your_collection_name} export MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint} ``` -## Start Retriever Service +### Start Retriever Service ```bash export MOSEC_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python langchain/retriever_redis.py ``` -# ๐Ÿš€Start Microservice with Docker +## ๐Ÿš€Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ../../ docker build -t opea/retriever-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/milvus/docker/Dockerfile . ``` -## Run Docker with CLI +### Run Docker with CLI ```bash docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS=${your_milvus_host_ip} opea/retriever-milvus:latest ``` -# ๐Ÿš€3. Consume Retriever Service +## ๐Ÿš€3. Consume Retriever Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://${your_ip}:7000/v1/health_check \ @@ -56,7 +56,7 @@ curl http://${your_ip}:7000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Embedding Service +### 3.2 Consume Embedding Service To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/comps/retrievers/langchain/milvus/config.py b/comps/retrievers/langchain/milvus/config.py index dcbe167b5..b7e5ec420 100644 --- a/comps/retrievers/langchain/milvus/config.py +++ b/comps/retrievers/langchain/milvus/config.py @@ -16,4 +16,4 @@ MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "") os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT os.environ["OPENAI_API_KEY"] = "Dummy key" -MODEL_ID = "/root/bce-embedding-base_v1" +MODEL_ID = "/home/user/bce-embedding-base_v1" diff --git a/comps/retrievers/langchain/milvus/docker/Dockerfile b/comps/retrievers/langchain/milvus/docker/Dockerfile index 99c977e6f..233a0ec13 100644 --- a/comps/retrievers/langchain/milvus/docker/Dockerfile +++ b/comps/retrievers/langchain/milvus/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -11,8 +10,7 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -31,4 +29,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/langchain/milvus ENTRYPOINT ["python", "retriever_milvus.py"] - diff --git a/comps/retrievers/langchain/milvus/retriever_milvus.py b/comps/retrievers/langchain/milvus/retriever_milvus.py index 1625eed0a..fb8fb64b2 100644 --- a/comps/retrievers/langchain/milvus/retriever_milvus.py +++ b/comps/retrievers/langchain/milvus/retriever_milvus.py @@ -15,11 +15,11 @@ MODEL_ID, MOSEC_EMBEDDING_ENDPOINT, ) -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, OpenAIEmbeddings from langchain_milvus.vectorstores import Milvus -from langsmith import traceable from comps import ( + CustomLogger, EmbedDoc, SearchedDoc, ServiceType, @@ -30,6 +30,9 @@ statistics_dict, ) +logger = CustomLogger("retriever_milvus") +logflag = os.getenv("LOGFLAG", False) + class MosecEmbeddings(OpenAIEmbeddings): def _get_len_safe_embeddings( @@ -63,9 +66,10 @@ def empty_embedding() -> List[float]: host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_milvus"]) def retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) vector_db = Milvus( embeddings, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT}, @@ -94,6 +98,8 @@ def retrieve(input: EmbedDoc) -> SearchedDoc: searched_docs.append(TextDoc(text=r.page_content)) result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) statistics_dict["opea_service@retriever_milvus"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) return result diff --git a/comps/retrievers/langchain/pathway/README.md b/comps/retrievers/langchain/pathway/README.md new file mode 100644 index 000000000..6f8e953f0 --- /dev/null +++ b/comps/retrievers/langchain/pathway/README.md @@ -0,0 +1,104 @@ +# Retriever Microservice with Pathway + +## ๐Ÿš€Start Microservices + +### With the Docker CLI + +We suggest using `docker compose` to run this app, refer to [`docker compose`](#with-the-docker-compose) section below. + +If you prefer to run them separately, refer to this section. + +#### (Optionally) Start the TEI (embedder) service separately + +> Note that Docker compose will start this service as well, this step is thus optional. + +```bash +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/retriever" +model=BAAI/bge-base-en-v1.5 +revision=refs/pr/4 +# TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" + +# then run: +docker run -p 6060:80 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision +``` + +Health check the embedding service with: + +```bash +curl 127.0.0.1:6060/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json' +``` + +If the model supports re-ranking, you can also use: + +```bash +curl 127.0.0.1:6060/rerank -X POST -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' -H 'Content-Type: application/json' +``` + +#### Start Retriever Service + +Retriever service queries the Pathway vector store on incoming requests. +Make sure that Pathway vector store is already running, [see Pathway vector store here](../../../vectorstores/langchain/pathway/README.md). + +Retriever service expects the Pathway host and port variables to connect to the vector DB. Set the Pathway vector store environment variables. + +```bash +export PATHWAY_HOST=0.0.0.0 +export PATHWAY_PORT=8666 +``` + +```bash +# make sure you are in the root folder of the repo +docker build -t opea/retriever-pathway:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pathway/docker/Dockerfile . + +docker run -p 7000:7000 -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy --network="host" opea/retriever-pathway:latest +``` + +### With the Docker compose + +First, set the env variables: + +```bash +export PATHWAY_HOST=0.0.0.0 +export PATHWAY_PORT=8666 +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/retriever" +model=BAAI/bge-base-en-v1.5 +revision=refs/pr/4 +# TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" +``` + +Text embeddings inference service expects the `RETRIEVE_MODEL_ID` variable to be set. + +```bash +export RETRIEVE_MODEL_ID=BAAI/bge-base-en-v1.5 +``` + +Note that following docker compose sets the `network_mode: host` in retriever image to allow local vector store connection. +This will start the both the embedding and retriever services: + +```bash +cd comps/retrievers/langchain/pathway/docker + +docker compose -f docker_compose_retriever.yaml build +docker compose -f docker_compose_retriever.yaml up + +# shut down the containers +docker compose -f docker_compose_retriever.yaml down +``` + +Make sure the retriever service is working as expected: + +```bash +curl http://0.0.0.0:7000/v1/health_check -X GET -H 'Content-Type: application/json' +``` + +send an example query: + +```bash +exm_embeddings=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + +curl http://0.0.0.0:7000/v1/retrieval -X POST -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${exm_embeddings}}" -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/langchain/pathway/docker/Dockerfile b/comps/retrievers/langchain/pathway/docker/Dockerfile new file mode 100644 index 000000000..b70c01f0e --- /dev/null +++ b/comps/retrievers/langchain/pathway/docker/Dockerfile @@ -0,0 +1,30 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +USER user + +RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ + pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pathway/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/retrievers/langchain/pathway + +ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/retrievers/langchain/pathway/docker/docker_compose_retriever.yaml b/comps/retrievers/langchain/pathway/docker/docker_compose_retriever.yaml new file mode 100644 index 000000000..b2b9383d6 --- /dev/null +++ b/comps/retrievers/langchain/pathway/docker/docker_compose_retriever.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + tei_xeon_service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 + container_name: tei-xeon-server + ports: + - "6060:80" + volumes: + - "./data:/data" + shm_size: 1g + command: --model-id ${RETRIEVE_MODEL_ID} + retriever: + image: opea/retriever-pathway:latest + container_name: retriever-pathway-server + ports: + - "7000:7000" + ipc: host + network_mode: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PATHWAY_HOST: ${PATHWAY_HOST} + PATHWAY_PORT: ${PATHWAY_PORT} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/retrievers/langchain/pathway/entrypoint.sh b/comps/retrievers/langchain/pathway/entrypoint.sh new file mode 100644 index 000000000..f5c8fc151 --- /dev/null +++ b/comps/retrievers/langchain/pathway/entrypoint.sh @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +pip --no-cache-dir install -r requirements-runtime.txt + +python retriever_pathway.py diff --git a/comps/retrievers/langchain/pathway/requirements-runtime.txt b/comps/retrievers/langchain/pathway/requirements-runtime.txt new file mode 100644 index 000000000..53d49066d --- /dev/null +++ b/comps/retrievers/langchain/pathway/requirements-runtime.txt @@ -0,0 +1 @@ +langsmith diff --git a/comps/retrievers/langchain/pathway/requirements.txt b/comps/retrievers/langchain/pathway/requirements.txt new file mode 100644 index 000000000..98fe20fd1 --- /dev/null +++ b/comps/retrievers/langchain/pathway/requirements.txt @@ -0,0 +1,12 @@ +docarray[full] +fastapi +frontend==0.0.3 +huggingface_hub +langchain_community == 0.2.0 +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pathway +prometheus-fastapi-instrumentator +sentence_transformers +shortuuid diff --git a/comps/retrievers/langchain/pathway/retriever_pathway.py b/comps/retrievers/langchain/pathway/retriever_pathway.py new file mode 100644 index 000000000..72b7babaa --- /dev/null +++ b/comps/retrievers/langchain/pathway/retriever_pathway.py @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from langchain_community.vectorstores import PathwayVectorClient +from langsmith import traceable + +from comps import ( + EmbedDoc, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +host = os.getenv("PATHWAY_HOST", "127.0.0.1") +port = int(os.getenv("PATHWAY_PORT", 8666)) + +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + + +@register_microservice( + name="opea_service@retriever_pathway", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@traceable(run_type="retriever") +@register_statistics(names=["opea_service@retriever_pathway"]) +def retrieve(input: EmbedDoc) -> SearchedDoc: + start = time.time() + documents = pw_client.similarity_search(input.text, input.fetch_k) + + docs = [TextDoc(text=r.page_content) for r in documents] + + time_spent = time.time() - start + statistics_dict["opea_service@retriever_pathway"].append_latency(time_spent, None) # noqa: E501 + return SearchedDoc(retrieved_docs=docs, initial_query=input.text) + + +if __name__ == "__main__": + # Create the vectorstore client + pw_client = PathwayVectorClient(host=host, port=port) + opea_microservices["opea_service@retriever_pathway"].start() diff --git a/comps/retrievers/langchain/pgvector/README.md b/comps/retrievers/langchain/pgvector/README.md index a0febb7fb..4914e8cb4 100644 --- a/comps/retrievers/langchain/pgvector/README.md +++ b/comps/retrievers/langchain/pgvector/README.md @@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the retriever microservice, you must first install the required python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start TEI Service +### 1.2 Start TEI Service ```bash export LANGCHAIN_TRACING_V2=true @@ -28,7 +28,7 @@ volume=$PWD/data docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision ``` -## 1.3 Verify the TEI Service +### 1.3 Verify the TEI Service ```bash curl 127.0.0.1:6060/rerank \ @@ -37,7 +37,7 @@ curl 127.0.0.1:6060/rerank \ -H 'Content-Type: application/json' ``` -## 1.4 Setup VectorDB Service +### 1.4 Setup VectorDB Service You need to setup your own VectorDB service (PGvector in this example), and ingest your knowledge documents into the vector database. @@ -52,16 +52,16 @@ export POSTGRES_DB=vectordb docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 ``` -## 1.5 Start Retriever Service +### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python retriever_pgvector.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5" @@ -73,7 +73,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/retrievers" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd comps/retrievers/langchain/pgvector/docker @@ -87,22 +87,22 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash cd comps/retrievers/langchain/pgvector/docker docker compose -f docker_compose_retriever.yaml up -d ``` -# ๐Ÿš€3. Consume Retriever Service +## ๐Ÿš€3. Consume Retriever Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:7000/v1/health_check \ @@ -110,7 +110,7 @@ curl http://localhost:7000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Embedding Service +### 3.2 Consume Embedding Service To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/comps/retrievers/langchain/pgvector/docker/Dockerfile b/comps/retrievers/langchain/pgvector/docker/Dockerfile index 0b935d7a6..84122d629 100644 --- a/comps/retrievers/langchain/pgvector/docker/Dockerfile +++ b/comps/retrievers/langchain/pgvector/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,8 +5,7 @@ FROM langchain/langchain:latest RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/retrievers/langchain/pgvector/requirements.txt b/comps/retrievers/langchain/pgvector/requirements.txt index 18609d361..d3d95dee9 100644 --- a/comps/retrievers/langchain/pgvector/requirements.txt +++ b/comps/retrievers/langchain/pgvector/requirements.txt @@ -2,7 +2,6 @@ docarray[full] easyocr fastapi langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/langchain/pgvector/retriever_pgvector.py b/comps/retrievers/langchain/pgvector/retriever_pgvector.py index 2fba1f1c0..d33a9f197 100644 --- a/comps/retrievers/langchain/pgvector/retriever_pgvector.py +++ b/comps/retrievers/langchain/pgvector/retriever_pgvector.py @@ -7,9 +7,9 @@ from config import EMBED_MODEL, INDEX_NAME, PG_CONNECTION_STRING, PORT from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import PGVector -from langsmith import traceable from comps import ( + CustomLogger, EmbedDoc, SearchedDoc, ServiceType, @@ -20,6 +20,9 @@ statistics_dict, ) +logger = CustomLogger("retriever_pgvector") +logflag = os.getenv("LOGFLAG", False) + tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -30,9 +33,10 @@ host="0.0.0.0", port=PORT, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_pgvector"]) def retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) start = time.time() search_res = vector_db.similarity_search_by_vector(embedding=input.embedding) searched_docs = [] @@ -40,6 +44,8 @@ def retrieve(input: EmbedDoc) -> SearchedDoc: searched_docs.append(TextDoc(text=r.page_content)) result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) statistics_dict["opea_service@retriever_pgvector"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) return result diff --git a/comps/retrievers/langchain/pinecone/config.py b/comps/retrievers/langchain/pinecone/config.py index e6e62db6c..cd7f9e508 100644 --- a/comps/retrievers/langchain/pinecone/config.py +++ b/comps/retrievers/langchain/pinecone/config.py @@ -8,9 +8,9 @@ # Pinecone configuration PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "xxx_xxx") -PINECONE_INDEX_NAME = int(os.getenv("PINECONE_INDEX_NAME", "langchain-test")) +PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "langchain-test") # LLM/Embedding endpoints TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") -TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT") diff --git a/comps/retrievers/langchain/pinecone/docker/Dockerfile b/comps/retrievers/langchain/pinecone/docker/Dockerfile index 7eedfab10..6d36c0f55 100644 --- a/comps/retrievers/langchain/pinecone/docker/Dockerfile +++ b/comps/retrievers/langchain/pinecone/docker/Dockerfile @@ -1,13 +1,13 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 FROM langchain/langchain:latest +ARG ARCH="cpu" + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -15,15 +15,15 @@ RUN useradd -m -s /bin/bash user && \ COPY comps /home/user/comps -RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh - USER user RUN pip install --no-cache-dir --upgrade pip && \ + if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \ pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt + ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/langchain/pinecone -ENTRYPOINT ["/home/user/comps/retrievers/langchain/pinecone/run.sh"] +ENTRYPOINT ["python", "retriever_pinecone.py"] diff --git a/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml b/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml index f9aac5b0b..3c0f7cef2 100644 --- a/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml +++ b/comps/retrievers/langchain/pinecone/docker/docker_compose_retriever.yaml @@ -24,7 +24,9 @@ services: https_proxy: ${https_proxy} PINECONE_API_KEY: ${PINECONE_API_KEY} INDEX_NAME: ${PINECONE_INDEX_NAME} + PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped networks: diff --git a/comps/retrievers/langchain/pinecone/ingest.py b/comps/retrievers/langchain/pinecone/ingest.py deleted file mode 100644 index e17b5ebf5..000000000 --- a/comps/retrievers/langchain/pinecone/ingest.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# - -import io -import os - -import numpy as np -from config import EMBED_MODEL, INDEX_NAME, PINECONE_API_KEY -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings -from langchain_community.vectorstores import Pinecone -from PIL import Image - -tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") - -if os.getenv("PINECONE_API_KEY", None) is None: - raise Exception("Missing `PINECONE_API_KEY` environment variable.") - - -def pdf_loader(file_path): - try: - import easyocr - import fitz - except ImportError: - raise ImportError( - "`PyMuPDF` or 'easyocr' package is not found, please install it with " - "`pip install pymupdf or pip install easyocr.`" - ) - - doc = fitz.open(file_path) - reader = easyocr.Reader(["en"]) - result = "" - for i in range(doc.page_count): - page = doc.load_page(i) - pagetext = page.get_text().strip() - if pagetext: - result = result + pagetext - if len(doc.get_page_images(i)) > 0: - for img in doc.get_page_images(i): - if img: - pageimg = "" - xref = img[0] - img_data = doc.extract_image(xref) - img_bytes = img_data["image"] - pil_image = Image.open(io.BytesIO(img_bytes)) - img = np.array(pil_image) - img_result = reader.readtext(img, paragraph=True, detail=0) - pageimg = pageimg + ", ".join(img_result).strip() - if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."): - pass - else: - pageimg = pageimg + "." - result = result + pageimg - return result - - -def ingest_documents(): - """Ingest PDF to Pinecone from the data/ directory that - contains Edgar 10k filings data for Nike.""" - # Load list of pdfs - company_name = "Nike" - data_path = "../data/" - doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0] - - print("Parsing 10k filing doc for NIKE", doc_path) - - text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True) - content = pdf_loader(doc_path) - chunks = text_splitter.split_text(content) - - print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") - # Create vectorstore - if tei_embedding_endpoint: - # create embeddings using TEI endpoint service - embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) - else: - # create embeddings using local embedding model - embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - - # Batch size - batch_size = 32 - num_chunks = len(chunks) - for i in range(0, num_chunks, batch_size): - batch_chunks = chunks[i : i + batch_size] - batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks] - - _ = Pinecone.from_texts( - texts=batch_texts, - embedding=embedder, - index_name=INDEX_NAME, - ) - print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") - - -if __name__ == "__main__": - ingest_documents() diff --git a/comps/retrievers/langchain/pinecone/requirements.txt b/comps/retrievers/langchain/pinecone/requirements.txt new file mode 100644 index 000000000..7536f591c --- /dev/null +++ b/comps/retrievers/langchain/pinecone/requirements.txt @@ -0,0 +1,21 @@ +beautifulsoup4 +docarray[full] +easyocr +fastapi +huggingface_hub +langchain +langchain-community +langchain-pinecone +numpy +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pandas +Pillow +pinecone-client +prometheus_fastapi_instrumentator +pymupdf +python-docx +sentence_transformers +shortuuid +uvicorn diff --git a/comps/retrievers/langchain/pinecone/retriever_pinecone.py b/comps/retrievers/langchain/pinecone/retriever_pinecone.py new file mode 100644 index 000000000..9bc2da893 --- /dev/null +++ b/comps/retrievers/langchain/pinecone/retriever_pinecone.py @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from config import EMBED_MODEL, PINECONE_API_KEY, PINECONE_INDEX_NAME +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_pinecone import PineconeVectorStore +from pinecone import Pinecone, ServerlessSpec + +from comps import ( + CustomLogger, + EmbedDoc, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +logger = CustomLogger("retriever_pinecone") +logflag = os.getenv("LOGFLAG", False) + +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + + +@register_microservice( + name="opea_service@retriever_pinecone", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@register_statistics(names=["opea_service@retriever_pinecone"]) +def retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) + start = time.time() + + pc = Pinecone(api_key=PINECONE_API_KEY) + + index = pc.Index(PINECONE_INDEX_NAME) + if logflag: + logger.info(index.describe_index_stats()["total_vector_count"]) + # check if the Pinecone index has data + if index.describe_index_stats()["total_vector_count"] == 0: + result = SearchedDoc(retrieved_docs=[], initial_query=input.text) + statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) + return result + + search_res = vector_db.max_marginal_relevance_search(query=input.text, k=input.k, fetch_k=input.fetch_k) + # if the Pinecone index has data, perform the search + if input.search_type == "similarity": + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(embedding=input.embedding, k=input.k) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_by_vector_with_score(query=input.text, k=input.k) + search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + searched_docs = [] + for r in search_res: + searched_docs.append(TextDoc(text=r.page_content)) + result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + statistics_dict["opea_service@retriever_pinecone"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) + return result + + +if __name__ == "__main__": + # Create vectorstore + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + vector_db = PineconeVectorStore(embedding=embeddings, index_name=PINECONE_INDEX_NAME) + opea_microservices["opea_service@retriever_pinecone"].start() diff --git a/comps/retrievers/langchain/pinecone/run.sh b/comps/retrievers/langchain/pinecone/run.sh deleted file mode 100644 index ba658360b..000000000 --- a/comps/retrievers/langchain/pinecone/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -cd /home/user/comps/retrievers/langchain/pinecone -python ingest.py - -python retriever_pinecone.py diff --git a/comps/retrievers/langchain/redis/README.md b/comps/retrievers/langchain/redis/README.md index 5330e7870..17c37ed4c 100644 --- a/comps/retrievers/langchain/redis/README.md +++ b/comps/retrievers/langchain/redis/README.md @@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the retriever microservice, you must first install the required python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Start TEI Service +### 1.2 Start TEI Service ```bash export LANGCHAIN_TRACING_V2=true @@ -28,7 +28,7 @@ volume=$PWD/data docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision ``` -## 1.3 Verify the TEI Service +### 1.3 Verify the TEI Service ```bash curl 127.0.0.1:6060/rerank \ @@ -37,7 +37,7 @@ curl 127.0.0.1:6060/rerank \ -H 'Content-Type: application/json' ``` -## 1.4 Setup VectorDB Service +### 1.4 Setup VectorDB Service You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. @@ -48,16 +48,16 @@ Remember to ingest data into it manually. docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 ``` -## 1.5 Start Retriever Service +### 1.5 Start Retriever Service ```bash export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" python langchain/retriever_redis.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5" @@ -69,7 +69,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/retrievers" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ @@ -83,22 +83,22 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/retriever-redis:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash cd langchain/docker docker compose -f docker_compose_retriever.yaml up -d ``` -# ๐Ÿš€3. Consume Retriever Service +## ๐Ÿš€3. Consume Retriever Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:7000/v1/health_check \ @@ -106,7 +106,7 @@ curl http://localhost:7000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Embedding Service +### 3.2 Consume Embedding Service To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/comps/retrievers/langchain/redis/docker/Dockerfile b/comps/retrievers/langchain/redis/docker/Dockerfile index e3d519910..1993e5fd9 100644 --- a/comps/retrievers/langchain/redis/docker/Dockerfile +++ b/comps/retrievers/langchain/redis/docker/Dockerfile @@ -1,4 +1,3 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -8,8 +7,7 @@ ARG ARCH="cpu" RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/retrievers/langchain/redis/requirements.txt b/comps/retrievers/langchain/redis/requirements.txt index 3720190d3..c68c3d274 100644 --- a/comps/retrievers/langchain/redis/requirements.txt +++ b/comps/retrievers/langchain/redis/requirements.txt @@ -2,7 +2,6 @@ docarray[full] easyocr fastapi langchain_community -langsmith opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/retrievers/langchain/redis/retriever_redis.py b/comps/retrievers/langchain/redis/retriever_redis.py index dc4ed01d4..b4c901cb3 100644 --- a/comps/retrievers/langchain/redis/retriever_redis.py +++ b/comps/retrievers/langchain/redis/retriever_redis.py @@ -3,13 +3,14 @@ import os import time +from typing import Union from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis -from langsmith import traceable from redis_config import EMBED_MODEL, INDEX_NAME, REDIS_URL from comps import ( + CustomLogger, EmbedDoc, SearchedDoc, ServiceType, @@ -19,6 +20,15 @@ register_statistics, statistics_dict, ) +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + RetrievalRequest, + RetrievalResponse, + RetrievalResponseData, +) + +logger = CustomLogger("retriever_redis") +logflag = os.getenv("LOGFLAG", False) tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -30,39 +40,62 @@ host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") @register_statistics(names=["opea_service@retriever_redis"]) -def retrieve(input: EmbedDoc) -> SearchedDoc: +def retrieve( + input: Union[EmbedDoc, RetrievalRequest, ChatCompletionRequest] +) -> Union[SearchedDoc, RetrievalResponse, ChatCompletionRequest]: + if logflag: + logger.info(input) start = time.time() # check if the Redis index has data if vector_db.client.keys() == []: - result = SearchedDoc(retrieved_docs=[], initial_query=input.text) - statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) - return result + search_res = [] + else: + if isinstance(input, EmbedDoc): + query = input.text + else: + # for RetrievalRequest, ChatCompletionRequest + query = input.input + # if the Redis index has data, perform the search + if input.search_type == "similarity": + search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) + elif input.search_type == "similarity_distance_threshold": + if input.distance_threshold is None: + raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") + search_res = vector_db.similarity_search_by_vector( + embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + ) + elif input.search_type == "similarity_score_threshold": + docs_and_similarities = vector_db.similarity_search_with_relevance_scores( + query=input.text, k=input.k, score_threshold=input.score_threshold + ) + search_res = [doc for doc, _ in docs_and_similarities] + elif input.search_type == "mmr": + search_res = vector_db.max_marginal_relevance_search( + query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult + ) + else: + raise ValueError(f"{input.search_type} not valid") + + # return different response format + retrieved_docs = [] + if isinstance(input, EmbedDoc): + for r in search_res: + retrieved_docs.append(TextDoc(text=r.page_content)) + result = SearchedDoc(retrieved_docs=retrieved_docs, initial_query=input.text) + else: + for r in search_res: + retrieved_docs.append(RetrievalResponseData(text=r.page_content, metadata=r.metadata)) + if isinstance(input, RetrievalRequest): + result = RetrievalResponse(retrieved_docs=retrieved_docs) + elif isinstance(input, ChatCompletionRequest): + input.retrieved_docs = retrieved_docs + input.documents = [doc.text for doc in retrieved_docs] + result = input - # if the Redis index has data, perform the search - if input.search_type == "similarity": - search_res = vector_db.similarity_search_by_vector(embedding=input.embedding, k=input.k) - elif input.search_type == "similarity_distance_threshold": - if input.distance_threshold is None: - raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") - search_res = vector_db.similarity_search_by_vector( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold - ) - elif input.search_type == "similarity_score_threshold": - docs_and_similarities = vector_db.similarity_search_with_relevance_scores( - query=input.text, k=input.k, score_threshold=input.score_threshold - ) - search_res = [doc for doc, _ in docs_and_similarities] - elif input.search_type == "mmr": - search_res = vector_db.max_marginal_relevance_search( - query=input.text, k=input.k, fetch_k=input.fetch_k, lambda_mult=input.lambda_mult - ) - searched_docs = [] - for r in search_res: - searched_docs.append(TextDoc(text=r.page_content)) - result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) + if logflag: + logger.info(result) return result diff --git a/comps/retrievers/llamaindex/README.md b/comps/retrievers/llamaindex/README.md index 3f6db8899..28203160c 100644 --- a/comps/retrievers/llamaindex/README.md +++ b/comps/retrievers/llamaindex/README.md @@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. -# ๐Ÿš€1. Start Microservice with Python (Option 1) +## ๐Ÿš€1. Start Microservice with Python (Option 1) To start the retriever microservice, you must first install the required python packages. -## 1.1 Install Requirements +### 1.1 Install Requirements ```bash pip install -r requirements.txt ``` -## 1.2 Setup VectorDB Service +### 1.2 Setup VectorDB Service You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. @@ -29,15 +29,15 @@ docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-sta And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice. -## 1.3 Start Retriever Service +### 1.3 Start Retriever Service ```bash python retriever_redis.py ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) -## 2.1 Setup Environment Variables +### 2.1 Setup Environment Variables ```bash export REDIS_URL="redis://${your_ip}:6379" @@ -47,7 +47,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/retrievers" ``` -## 2.2 Build Docker Image +### 2.2 Build Docker Image ```bash cd ../../ @@ -61,22 +61,22 @@ To start a docker container, you have two options: You can choose one as needed. -## 2.3 Run Docker with CLI (Option A) +### 2.3 Run Docker with CLI (Option A) ```bash docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest ``` -## 2.4 Run Docker with Docker Compose (Option B) +### 2.4 Run Docker with Docker Compose (Option B) ```bash cd llamaindex/docker docker compose -f docker_compose_retriever.yaml up -d ``` -# ๐Ÿš€3. Consume Retriever Service +## ๐Ÿš€3. Consume Retriever Service -## 3.1 Check Service Status +### 3.1 Check Service Status ```bash curl http://localhost:7000/v1/health_check \ @@ -84,7 +84,7 @@ curl http://localhost:7000/v1/health_check \ -H 'Content-Type: application/json' ``` -## 3.2 Consume Retriever Service +### 3.2 Consume Retriever Service To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. diff --git a/comps/retrievers/llamaindex/docker/Dockerfile b/comps/retrievers/llamaindex/docker/Dockerfile index 7d9cd64be..4b022718a 100644 --- a/comps/retrievers/llamaindex/docker/Dockerfile +++ b/comps/retrievers/llamaindex/docker/Dockerfile @@ -1,13 +1,12 @@ - # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 FROM ubuntu:22.04 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + python3-pip \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/retrievers/llamaindex/requirements.txt b/comps/retrievers/llamaindex/requirements.txt index 236ea9af8..83228c259 100644 --- a/comps/retrievers/llamaindex/requirements.txt +++ b/comps/retrievers/llamaindex/requirements.txt @@ -3,7 +3,6 @@ docarray[full] easyocr fastapi httpx -langsmith llama-index-vector-stores-redis llama_index opentelemetry-api diff --git a/comps/retrievers/llamaindex/retriever_redis.py b/comps/retrievers/llamaindex/retriever_redis.py index 4999a7235..8c20e36c9 100644 --- a/comps/retrievers/llamaindex/retriever_redis.py +++ b/comps/retrievers/llamaindex/retriever_redis.py @@ -3,12 +3,14 @@ import os -from langsmith import traceable from llama_index.core.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.redis import RedisVectorStore from redis_config import INDEX_NAME, REDIS_URL -from comps import EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import CustomLogger, EmbedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("retriever_redis") +logflag = os.getenv("LOGFLAG", False) tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -20,14 +22,17 @@ host="0.0.0.0", port=7000, ) -@traceable(run_type="retriever") def retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) vector_store_query = VectorStoreQuery(query_embedding=input.embedding) search_res = vector_store.query(query=vector_store_query) searched_docs = [] for node, id, similarity in zip(search_res.nodes, search_res.ids, search_res.similarities): searched_docs.append(TextDoc(text=node.get_content())) result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + if logflag: + logger.info(result) return result diff --git a/comps/tts/Dockerfile b/comps/tts/Dockerfile index 73272567d..9ceaef634 100644 --- a/comps/tts/Dockerfile +++ b/comps/tts/Dockerfile @@ -2,16 +2,25 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim - +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ +USER user ENV LANG=C.UTF-8 +ARG ARCH=cpu -COPY comps /home/comps +COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts +WORKDIR /home/user/comps/tts ENTRYPOINT ["python", "tts.py"] \ No newline at end of file diff --git a/comps/tts/README.md b/comps/tts/README.md index 52ab6c92b..5045d5870 100644 --- a/comps/tts/README.md +++ b/comps/tts/README.md @@ -30,13 +30,13 @@ python tts.py curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json' ``` -# ๐Ÿš€2. Start Microservice with Docker (Option 2) +## ๐Ÿš€2. Start Microservice with Docker (Option 2) Alternatively, you can start the TTS microservice with Docker. -## 2.1 Build Images +### 2.1 Build Images -### 2.1.1 Whisper Server Image +#### 2.1.1 Whisper Server Image - Xeon CPU @@ -52,15 +52,15 @@ cd ../.. docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile_hpu . ``` -### 2.1.2 TTS Service Image +#### 2.1.2 TTS Service Image ```bash docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/Dockerfile . ``` -## 2.2 Start SpeechT5 and TTS Service +### 2.2 Start SpeechT5 and TTS Service -### 2.2.1 Start SpeechT5 Server +#### 2.2.1 Start SpeechT5 Server - Xeon @@ -74,7 +74,7 @@ docker run -p 7055:7055 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt docker run -p 7055:7055 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/speecht5-gaudi:latest ``` -### 2.2.2 Start TTS service +#### 2.2.2 Start TTS service ```bash ip_address=$(hostname -I | awk '{print $1}') @@ -82,7 +82,7 @@ ip_address=$(hostname -I | awk '{print $1}') docker run -p 9088:9088 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TTS_ENDPOINT=http://$ip_address:7055 opea/tts:latest ``` -### 2.2.3 Test +#### 2.2.3 Test ```bash # curl diff --git a/comps/tts/requirements.txt b/comps/tts/requirements.txt index 7f9363676..d5a2ec6cb 100644 --- a/comps/tts/requirements.txt +++ b/comps/tts/requirements.txt @@ -4,11 +4,9 @@ fastapi opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk -optimum[habana] prometheus-fastapi-instrumentator sentencepiece shortuuid soundfile -torch transformers uvicorn diff --git a/comps/tts/speecht5/Dockerfile b/comps/tts/speecht5/Dockerfile index e4afd07db..7dafd9ea2 100644 --- a/comps/tts/speecht5/Dockerfile +++ b/comps/tts/speecht5/Dockerfile @@ -2,23 +2,34 @@ # SPDX-License-Identifier: Apache-2.0 FROM python:3.11-slim +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user +ARG ARCH=cpu # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt + if [ "${ARCH}" = "cpu" ]; then \ + pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/requirements.txt ; \ + else \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt ; \ + fi -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/tts/speecht5/Dockerfile_hpu b/comps/tts/speecht5/Dockerfile_hpu index 8f889b86a..46de4ecd9 100644 --- a/comps/tts/speecht5/Dockerfile_hpu +++ b/comps/tts/speecht5/Dockerfile_hpu @@ -2,28 +2,34 @@ # SPDX-License-Identifier: Apache-2.0 # HABANA environment -FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest AS hpu - +# FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu +FROM opea/habanalabs:1.16.1-pytorch-installer-2.2.2 as hpu +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ RUN rm -rf /etc/ssh/ssh_host* # Set environment variables ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana +ENV LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH # Install system dependencies RUN apt-get update \ && apt-get install -y ffmpeg \ && apt-get install -y curl -COPY comps /home/comps +COPY --chown=user:user comps /home/user/comps + +USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/comps/tts/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/tts/requirements.txt && \ pip install optimum[habana] -ENV PYTHONPATH=$PYTHONPATH:/home +ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/speecht5 -ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] \ No newline at end of file +ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] diff --git a/comps/tts/speecht5/speecht5_model.py b/comps/tts/speecht5/speecht5_model.py index b79a45adc..8a800d68d 100644 --- a/comps/tts/speecht5/speecht5_model.py +++ b/comps/tts/speecht5/speecht5_model.py @@ -69,7 +69,14 @@ def split_long_text_into_batch(self, text, batch_length=128): cur_end = idx idx += 1 # deal with the last sequence - res.append(text[cur_start:idx]) + if cur_start < len(text): + last_chunk = text[cur_start:] + last_punc_idx = max([last_chunk.rfind(punc) for punc in hitted_ends[:-1]]) # exclude " " + if last_punc_idx != -1: + last_chunk = last_chunk[: last_punc_idx + 1] + res.append(last_chunk[: last_punc_idx + 1]) + else: + res.append(last_chunk) res = [i + "." for i in res] # avoid unexpected end of sequence return res diff --git a/comps/tts/tts.py b/comps/tts/tts.py index 6c6bad232..050a1bbd5 100644 --- a/comps/tts/tts.py +++ b/comps/tts/tts.py @@ -9,6 +9,7 @@ from comps import ( Base64ByteStrDoc, + CustomLogger, ServiceType, TextDoc, opea_microservices, @@ -17,6 +18,9 @@ statistics_dict, ) +logger = CustomLogger("tts") +logflag = os.getenv("LOGFLAG", False) + @register_microservice( name="opea_service@tts", @@ -29,16 +33,21 @@ ) @register_statistics(names=["opea_service@tts"]) async def text_to_audio(input: TextDoc): + if logflag: + logger.info(input) start = time.time() text = input.text inputs = {"text": text} response = requests.post(url=f"{tts_endpoint}/v1/tts", data=json.dumps(inputs), proxies={"http": None}) statistics_dict["opea_service@tts"].append_latency(time.time() - start, None) - return Base64ByteStrDoc(byte_str=response.json()["tts_result"]) + result = Base64ByteStrDoc(byte_str=response.json()["tts_result"]) + if logflag: + logger.info(result) + return result if __name__ == "__main__": tts_endpoint = os.getenv("TTS_ENDPOINT", "http://localhost:7055") - print("[tts - router] TTS initialized.") + logger.info("[tts - router] TTS initialized.") opea_microservices["opea_service@tts"].start() diff --git a/comps/vectorstores/README.md b/comps/vectorstores/README.md index 6dd35ee0b..492ef970f 100644 --- a/comps/vectorstores/README.md +++ b/comps/vectorstores/README.md @@ -2,18 +2,22 @@ The Vectorstores Microservice provides convenient way to start various vector database servers. -# Vectorstores Microservice with Redis +## Vectorstores Microservice with Redis For details, please refer to this [readme](langchain/redis/README.md) -# Vectorstores Microservice with Qdrant +## Vectorstores Microservice with Qdrant For details, please refer to this [readme](langchain/qdrant/README.md) -# Vectorstores Microservice with PGVector +## Vectorstores Microservice with PGVector For details, please refer to this [readme](langchain/pgvector/README.md) -# Vectorstores Microservice with Pinecone +## Vectorstores Microservice with Pinecone For details, please refer to this [readme](langchain/pinecone/README.md) + +## Vectorstores Microservice with Pathway + +For details, please refer to this [readme](langchain/pathway/README.md) diff --git a/comps/vectorstores/langchain/chroma/README.md b/comps/vectorstores/langchain/chroma/README.md index d7399b8fb..155593010 100644 --- a/comps/vectorstores/langchain/chroma/README.md +++ b/comps/vectorstores/langchain/chroma/README.md @@ -2,9 +2,9 @@ Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. Chroma runs in various modes, we can deploy it as a server running your local machine or in the cloud. -# Getting Started +## Getting Started -## Start Chroma Server +### Start Chroma Server To start the Chroma server on your local machine, follow these steps: @@ -14,11 +14,11 @@ cd chroma docker compose up -d ``` -## Start Log Output +### Start Log Output Upon starting the server, you should see log outputs similar to the following: -```log +``` server-1 | Starting 'uvicorn chromadb.app:app' with args: --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30 server-1 | INFO: [02-08-2024 07:03:19] Set chroma_server_nofile to 65536 server-1 | INFO: [02-08-2024 07:03:19] Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information. diff --git a/comps/vectorstores/langchain/lancedb/README.md b/comps/vectorstores/langchain/lancedb/README.md new file mode 100644 index 000000000..bfe01585c --- /dev/null +++ b/comps/vectorstores/langchain/lancedb/README.md @@ -0,0 +1,139 @@ +# LanceDB + +LanceDB is an embedded vector database for AI applications. It is open source and distributed with an Apache-2.0 license. + +LanceDB datasets are persisted to disk and can be shared in Python. + +## Setup + +```bash +npm install -S vectordb +``` + +## Usage + +### Create a new index from texts + +```python +import os +import tempfile +from langchain.vectorstores import LanceDB +from langchain.embeddings.openai import OpenAIEmbeddings +from vectordb import connect + + +async def run(): + dir = tempfile.mkdtemp(prefix="lancedb-") + db = await connect(dir) + table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "id": 1}]) + + vector_store = await LanceDB.from_texts( + ["Hello world", "Bye bye", "hello nice world"], + [{"id": 2}, {"id": 1}, {"id": 3}], + OpenAIEmbeddings(), + table=table, + ) + + result_one = await vector_store.similarity_search("hello world", 1) + print(result_one) + # [ Document(page_content='hello nice world', metadata={'id': 3}) ] + + +# Run the function +import asyncio + +asyncio.run(run()) +``` + +API Reference: + +- `LanceDB` from `@langchain/community/vectorstores/lancedb` +- `OpenAIEmbeddings` from `@langchain/openai` + +### Create a new index from a loader + +```python +import os +import tempfile +from langchain.vectorstores import LanceDB +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.document_loaders.fs import TextLoader +from vectordb import connect + +# Create docs with a loader +loader = TextLoader("src/document_loaders/example_data/example.txt") +docs = loader.load() + + +async def run(): + dir = tempfile.mkdtemp(prefix="lancedb-") + db = await connect(dir) + table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "source": "a"}]) + + vector_store = await LanceDB.from_documents(docs, OpenAIEmbeddings(), table=table) + + result_one = await vector_store.similarity_search("hello world", 1) + print(result_one) + # [ + # Document(page_content='Foo\nBar\nBaz\n\n', metadata={'source': 'src/document_loaders/example_data/example.txt'}) + # ] + + +# Run the function +import asyncio + +asyncio.run(run()) +``` + +API Reference: + +- `LanceDB` from `@langchain/community/vectorstores/lancedb` +- `OpenAIEmbeddings` from `@langchain/openai` +- `TextLoader` from `langchain/document_loaders/fs/text` + +### Open an existing dataset + +```python +import os +import tempfile +from langchain.vectorstores import LanceDB +from langchain.embeddings.openai import OpenAIEmbeddings +from vectordb import connect + + +async def run(): + uri = await create_test_db() + db = await connect(uri) + table = await db.open_table("vectors") + + vector_store = LanceDB(OpenAIEmbeddings(), table=table) + + result_one = await vector_store.similarity_search("hello world", 1) + print(result_one) + # [ Document(page_content='Hello world', metadata={'id': 1}) ] + + +async def create_test_db(): + dir = tempfile.mkdtemp(prefix="lancedb-") + db = await connect(dir) + await db.create_table( + "vectors", + [ + {"vector": [0] * 1536, "text": "Hello world", "id": 1}, + {"vector": [0] * 1536, "text": "Bye bye", "id": 2}, + {"vector": [0] * 1536, "text": "hello nice world", "id": 3}, + ], + ) + return dir + + +# Run the function +import asyncio + +asyncio.run(run()) +``` + +API Reference: + +- `LanceDB` from `@langchain/community/vectorstores/lancedb` +- `OpenAIEmbeddings` from `@langchain/openai` diff --git a/comps/vectorstores/langchain/milvus/README.md b/comps/vectorstores/langchain/milvus/README.md index d02508351..b0f19caf4 100644 --- a/comps/vectorstores/langchain/milvus/README.md +++ b/comps/vectorstores/langchain/milvus/README.md @@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c Customized the path to store data, default is /volumes ```bash -export DOCKER_VOLUME_DIRECTORY=./your_path +export DOCKER_VOLUME_DIRECTORY=${your_path} ``` ## 2. Run Milvus service diff --git a/comps/vectorstores/langchain/milvus/docker-compose.yml b/comps/vectorstores/langchain/milvus/docker-compose.yml index 125463752..d6c39d0f0 100644 --- a/comps/vectorstores/langchain/milvus/docker-compose.yml +++ b/comps/vectorstores/langchain/milvus/docker-compose.yml @@ -7,10 +7,6 @@ services: etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 - deploy: - resources: - limits: - cpus: "0.5" environment: - ETCD_AUTO_COMPACTION_MODE=revision - ETCD_AUTO_COMPACTION_RETENTION=1000 @@ -28,10 +24,6 @@ services: minio: container_name: milvus-minio image: minio/minio:RELEASE.2023-03-20T20-16-18Z - deploy: - resources: - limits: - cpus: "0.5" environment: MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin @@ -49,31 +41,25 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:latest - deploy: - resources: - limits: - cpus: "8" - memory: 32G + image: milvusdb/milvus:v2.4.6 command: ["milvus", "run", "standalone"] security_opt: - seccomp:unconfined environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9000 - DNNL_ENABLE: 0 volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml + - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"] + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] interval: 30s start_period: 90s timeout: 20s retries: 3 ports: - "19530:19530" - - "9092:9092" + - "9091:9091" depends_on: - "etcd" - "minio" diff --git a/comps/vectorstores/langchain/milvus/milvus.yaml b/comps/vectorstores/langchain/milvus/milvus.yaml index de29dfe3d..b9f22cb3d 100644 --- a/comps/vectorstores/langchain/milvus/milvus.yaml +++ b/comps/vectorstores/langchain/milvus/milvus.yaml @@ -105,7 +105,9 @@ minio: region: # Specify minio storage system location region useVirtualHost: false # Whether use virtual host mode for bucket requestTimeoutMs: 10000 # minio timeout for request time in milliseconds - listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout + # The maximum number of objects requested per batch in minio ListObjects rpc, + # 0 means using oss client by default, decrease these configuration if ListObjects timeout + listObjectsMaxKeys: 0 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. # You can change your mq by setting mq.type field. @@ -120,6 +122,10 @@ mq: pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes mqBufSize: 16 # MQ client consumer buffer length + dispatcher: + mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge + targetBufSize: 16 # the length of channel buffer for targe + maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. pulsar: @@ -182,7 +188,7 @@ natsmq: # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests rootCoord: dmlChannelNum: 16 # The number of dml channels created at system startup - maxPartitionNum: 4096 # Maximum number of partitions in a collection + maxPartitionNum: 1024 # Maximum number of partitions in a collection minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed enableActiveStandby: false maxDatabaseNum: 64 # Maximum number of database @@ -200,7 +206,6 @@ rootCoord: proxy: timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick healthCheckTimeout: 3000 # ms, the interval that to do component healthy check - healthCheckTimetout: 3000 # ms, the interval that to do component healthy check msgStream: timeTick: bufSize: 512 @@ -217,6 +222,7 @@ proxy: ginLogging: true ginLogSkipPaths: / # skip url path for gin log maxTaskNum: 1024 # max task number of proxy task queue + mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection accessLog: enable: false # if use access log minioEnable: false # if upload sealed access log file to minio @@ -244,7 +250,7 @@ proxy: port: # high-level restful api acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64 enablePprof: true # Whether to enable pprof middleware on the metrics port - ip: 0.0.0.0 # if not specified, use the first unicastable address + ip: # if not specified, use the first unicastable address port: 19530 internalPort: 19529 grpc: @@ -282,6 +288,8 @@ queryCoord: channelTaskTimeout: 60000 # 1 minute segmentTaskTimeout: 120000 # 2 minute distPullInterval: 500 + collectionObserverInterval: 200 + checkExecutedFlagInterval: 100 heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available loadTimeoutSeconds: 600 distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds @@ -298,6 +306,7 @@ queryCoord: checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session gracefulStopTimeout: 5 # seconds. force stop node without graceful stop enableStoppingBalance: true # whether enable stopping balance + channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds ip: # if not specified, use the first unicastable address port: 19531 @@ -320,6 +329,7 @@ queryNode: nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist memExpansionRate: 1.15 # extra memory needed by building interim index buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 @@ -327,17 +337,22 @@ queryNode: enabled: true memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, off. + # options: async, sync, disable. # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: async + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable mmap: mmapEnabled: false # Enable mmap for loading data - mmapEnabled: false # Enable mmap for loading data - lazyloadEnabled: false # Enable lazyload for loading data + lazyload: + enabled: false # Enable lazyload for loading data + waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve + requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default + requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default + maxRetryTimes: 1 # max retry times for lazy load, 1 by default + maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default grouping: enabled: true maxNQ: 1000 @@ -403,9 +418,11 @@ indexNode: dataCoord: channel: watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. + balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch + legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing balanceInterval: 360 # The interval with which the channel manager check dml channel balance status - checkInterval: 10 # The interval in seconds with which the channel manager advances channel states + checkInterval: 1 # The interval in seconds with which the channel manager advances channel states notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds). segment: maxSize: 1024 # Maximum size of a segment in MB @@ -485,7 +502,7 @@ dataNode: coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds segment: insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB syncPeriod: 600 # The period to sync segments if buffer is not empty. memory: forceSyncEnable: true # Set true to force sync if memory usage is too high @@ -536,8 +553,6 @@ log: grpc: log: level: WARNING - serverMaxSendSize: 536870912 - serverMaxRecvSize: 268435456 gracefulStopTimeout: 10 # second, time to wait graceful stop finish client: compressionEnabled: false @@ -550,8 +565,6 @@ grpc: minResetInterval: 1000 maxCancelError: 32 minSessionCheckInterval: 200 - clientMaxSendSize: 268435456 - clientMaxRecvSize: 536870912 # Configure the proxy tls enable. tls: @@ -560,18 +573,6 @@ tls: caPemPath: configs/cert/ca.pem common: - chanNamePrefix: - cluster: by-dev - rootCoordTimeTick: rootcoord-timetick - rootCoordStatistics: rootcoord-statistics - rootCoordDml: rootcoord-dml - replicateMsg: replicate-msg - queryTimeTick: queryTimeTick - dataCoordTimeTick: datacoord-timetick-channel - dataCoordSegmentInfo: segment-info-channel - subNamePrefix: - dataCoordSubNamePrefix: dataCoord - dataNodeSubNamePrefix: dataNode defaultPartitionName: _default # default partition name for a collection defaultIndexName: _default_idx # default index name entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire @@ -617,7 +618,7 @@ common: ttMsgEnabled: true # Whether the instance disable sending ts messages traceLogMode: 0 # trace request info bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -631,7 +632,7 @@ common: # 4. DQL result rate protection; # If necessary, you can also manually force to deny RW requests. quotaAndLimits: - enabled: false # `true` to enable quota and limits, `false` to disable. + enabled: true # `true` to enable quota and limits, `false` to disable. # quotaCenterCollectInterval is the time interval that quotaCenter # collects metrics from Proxies, Query cluster and Data cluster. # seconds, (0 ~ 65536) @@ -649,10 +650,10 @@ quotaAndLimits: db: max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex flushRate: - enabled: false + enabled: true max: -1 # qps, default no limit, rate for flush collection: - max: -1 # qps, default no limit, rate for flush at collection level. + max: 0.1 # qps, default no limit, rate for flush at collection level. db: max: -1 # qps of db level, default no limit, rate for flush compactionRate: @@ -719,6 +720,7 @@ quotaAndLimits: limits: maxCollectionNum: 65536 maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes limitWriting: # forceDeny false means dml requests are allowed (except for some @@ -786,8 +788,8 @@ quotaAndLimits: trace: # trace exporter type, default is stdout, - # optional values: ['stdout', 'jaeger', 'otlp'] - exporter: stdout + # optional values: ['noop','stdout', 'jaeger', 'otlp'] + exporter: noop # fraction of traceID based sampler, # optional values: [0, 1] # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. diff --git a/comps/vectorstores/langchain/pathway/Dockerfile b/comps/vectorstores/langchain/pathway/Dockerfile new file mode 100644 index 000000000..31cd06a82 --- /dev/null +++ b/comps/vectorstores/langchain/pathway/Dockerfile @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM pathwaycom/pathway:0.13.2-slim + +ENV DOCKER_BUILDKIT=1 +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && apt-get install -y \ + poppler-utils \ + libreoffice \ + libmagic-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt /app/ + +RUN pip install --no-cache-dir -r requirements.txt + +COPY vectorstore_pathway.py /app/ + + +CMD ["python", "vectorstore_pathway.py"] + diff --git a/comps/vectorstores/langchain/pathway/README.md b/comps/vectorstores/langchain/pathway/README.md new file mode 100644 index 000000000..fb0be0152 --- /dev/null +++ b/comps/vectorstores/langchain/pathway/README.md @@ -0,0 +1,84 @@ +# Start the Pathway Vector DB Server + +Set the environment variables for Pathway, and the embedding model. + +> Note: If you are using `TEI_EMBEDDING_ENDPOINT`, make sure embedding service is already running. +> See the instructions under [here](../../../retrievers/langchain/pathway/README.md) + +```bash +export PATHWAY_HOST=0.0.0.0 +export PATHWAY_PORT=8666 +# TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060" # uncomment if you want to use the hosted embedding service, example: "http://127.0.0.1:6060" +``` + +## Configuration + +### Setting up the Pathway data sources + +Pathway can listen to many sources simultaneously, such as local files, S3 folders, cloud storage, and any data stream. Whenever a new file is added or an existing file is modified, Pathway parses, chunks and indexes the documents in real-time. + +See [pathway-io](https://pathway.com/developers/api-docs/pathway-io) for more information. + +You can easily connect to the data inside the folder with the Pathway file system connector. The data will automatically be updated by Pathway whenever the content of the folder changes. In this example, we create a single data source that reads the files under the `./data` folder. + +You can manage your data sources by configuring the `data_sources` in `vectorstore_pathway.py`. + +```python +import pathway as pw + +data = pw.io.fs.read( + "./data", + format="binary", + mode="streaming", + with_metadata=True, +) # This creates a Pathway connector that tracks +# all the files in the ./data directory + +data_sources = [data] +``` + +### Other configs (parser, splitter and the embedder) + +Pathway vectorstore handles the ingestion and processing of the documents. +This allows you to configure the parser, splitter and the embedder. +Whenever a file is added or modified in one of the sources, Pathway will automatically ingest the file. + +By default, `ParseUnstructured` parser, `langchain.text_splitter.CharacterTextSplitter` splitter and `BAAI/bge-base-en-v1.5` embedder are used. + +For more information, see the relevant Pathway docs: + +- [Vector store docs](https://pathway.com/developers/api-docs/pathway-xpacks-llm/vectorstore) +- [parsers docs](https://pathway.com/developers/api-docs/pathway-xpacks-llm/parsers) +- [splitters docs](https://pathway.com/developers/api-docs/pathway-xpacks-llm/splitters) +- [embedders docs](https://pathway.com/developers/api-docs/pathway-xpacks-llm/embedders) + +## Building and running + +Build the Docker and run the Pathway Vector Store: + +```bash +cd comps/vectorstores/langchain/pathway + +docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:latest . + +# with locally loaded model, you may add `EMBED_MODEL` env variable to configure the model. +docker run -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v ./data:/app/data -p ${PATHWAY_PORT}:${PATHWAY_PORT} opea/vectorstore-pathway:latest + +# with the hosted embedder (network argument is needed for the vector server to reach to the embedding service) +docker run -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v ./data:/app/data -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:latest +``` + +## Health check the vector store + +Wait until the server finishes indexing the docs, and send the following request to check it. + +```bash +curl -X 'POST' \ + "http://$PATHWAY_HOST:$PATHWAY_PORT/v1/statistics" \ + -H 'accept: */*' \ + -H 'Content-Type: application/json' +``` + +This should respond with something like: + +> `{"file_count": 1, "last_indexed": 1724325093, "last_modified": 1724317365}` diff --git a/comps/vectorstores/langchain/pathway/data/nke-10k-2023.pdf b/comps/vectorstores/langchain/pathway/data/nke-10k-2023.pdf new file mode 100644 index 000000000..6ade8863e Binary files /dev/null and b/comps/vectorstores/langchain/pathway/data/nke-10k-2023.pdf differ diff --git a/comps/vectorstores/langchain/pathway/requirements.txt b/comps/vectorstores/langchain/pathway/requirements.txt new file mode 100644 index 000000000..3d88eddf6 --- /dev/null +++ b/comps/vectorstores/langchain/pathway/requirements.txt @@ -0,0 +1,4 @@ +langchain_openai +pathway[xpack-llm] >= 0.14.1 +sentence_transformers +unstructured[all-docs] >= 0.10.28,<0.15 diff --git a/comps/vectorstores/langchain/pathway/vectorstore_pathway.py b/comps/vectorstores/langchain/pathway/vectorstore_pathway.py new file mode 100644 index 000000000..c6cac04b7 --- /dev/null +++ b/comps/vectorstores/langchain/pathway/vectorstore_pathway.py @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os + +import pathway as pw +from langchain import text_splitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from pathway.xpacks.llm.parsers import ParseUnstructured +from pathway.xpacks.llm.vector_store import VectorStoreServer + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(name)s %(levelname)s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +# This creates a Pathway connector that tracks all the files in the `data/` directory. +# Additions and modifications will be reflected on the index automatically. + +data = pw.io.fs.read( + "./data", + format="binary", + mode="streaming", + with_metadata=True, +) + +data_sources = [data] + +splitter = text_splitter.TokenTextSplitter(chunk_size=450, chunk_overlap=50) + +host = os.getenv("PATHWAY_HOST", "127.0.0.1") +port = int(os.getenv("PATHWAY_PORT", 8666)) + +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + +if __name__ == "__main__": + # Create vectorstore + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}") + embeddings = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + server = VectorStoreServer.from_langchain_components( + *data_sources, + embedder=embeddings, + parser=ParseUnstructured(), + splitter=splitter, + ) + + server.run_server( + host, + port=port, + with_cache=True, + cache_backend=pw.persistence.Backend.filesystem("./Cache"), + ) diff --git a/comps/version.py b/comps/version.py index 6ae7d54de..3f8ffef29 100644 --- a/comps/version.py +++ b/comps/version.py @@ -3,4 +3,4 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -__version__ = "0.8" +__version__ = "1.0" diff --git a/comps/web_retrievers/langchain/chroma/README.md b/comps/web_retrievers/langchain/chroma/README.md index 47a308837..7a2083448 100644 --- a/comps/web_retrievers/langchain/chroma/README.md +++ b/comps/web_retrievers/langchain/chroma/README.md @@ -2,16 +2,16 @@ The Web Retriever Microservice is designed to efficiently search web pages relevant to the prompt, save them into the VectorDB, and retrieve the matched documents with the highest similarity. The retrieved documents will be used as context in the prompt to LLMs. Different from the normal RAG process, a web retriever can leverage advanced search engines for more diverse demands, such as real-time news, verifiable sources, and diverse sources. -# Start Microservice with Docker +## Start Microservice with Docker -## Build Docker Image +### Build Docker Image ```bash cd ../../ docker build -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/langchain/chroma/docker/Dockerfile . ``` -## Start TEI Service +### Start TEI Service ```bash model=BAAI/bge-base-en-v1.5 @@ -20,7 +20,7 @@ volume=$PWD/data docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision ``` -## Start Web Retriever Service +### Start Web Retriever Service ```bash # set TEI endpoint @@ -32,10 +32,10 @@ export GOOGLE_CSE_ID=xxx ``` ```bash -docker run -d --name="web-retriever-chroma-server" -p 7078:7077 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/web-retriever-chroma:latest +docker run -d --name="web-retriever-chroma-server" -p 7077:7077 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e GOOGLE_API_KEY=$GOOGLE_API_KEY -e GOOGLE_CSE_ID=$GOOGLE_CSE_ID opea/web-retriever-chroma:latest ``` -## Consume Web Retriever Service +### Consume Web Retriever Service To consume the Web Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. @@ -43,8 +43,8 @@ To consume the Web Retriever Microservice, you can generate a mock embedding vec # Test your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${your_ip}:7077/v1/web_retrieval \ +http_proxy= curl http://${your_ip}:7077/v1/web_retrieval \ -X POST \ - -d "{\"text\":\"What is OPEA?\",\"embedding\":${your_embedding}}" \ + -d "{\"text\":\"What is black myth wukong?\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' ``` diff --git a/comps/web_retrievers/langchain/chroma/docker/Dockerfile b/comps/web_retrievers/langchain/chroma/docker/Dockerfile index a6c3d80d5..a5a65bbd5 100644 --- a/comps/web_retrievers/langchain/chroma/docker/Dockerfile +++ b/comps/web_retrievers/langchain/chroma/docker/Dockerfile @@ -5,10 +5,15 @@ FROM langchain/langchain:latest ARG ARCH="cpu" # Set this to "cpu" or "gpu" +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libgl1-mesa-glx \ - libjemalloc-dev \ - vim + libjemalloc-dev + +USER user COPY comps /home/user/comps diff --git a/comps/web_retrievers/langchain/chroma/retriever_chroma.py b/comps/web_retrievers/langchain/chroma/retriever_chroma.py index 3fbd1b755..53d9f8c36 100644 --- a/comps/web_retrievers/langchain/chroma/retriever_chroma.py +++ b/comps/web_retrievers/langchain/chroma/retriever_chroma.py @@ -12,6 +12,7 @@ from langchain_huggingface import HuggingFaceEndpointEmbeddings from comps import ( + CustomLogger, EmbedDoc, SearchedDoc, ServiceType, @@ -22,6 +23,9 @@ statistics_dict, ) +logger = CustomLogger("web_retriever_chroma") +logflag = os.getenv("LOGFLAG", False) + tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -37,7 +41,8 @@ def retrieve_htmls(all_urls): def parse_htmls(docs): - print("Indexing new urls...") + if logflag: + logger.info("Indexing new urls...") html2text = Html2TextTransformer() docs = list(html2text.transform_documents(docs)) @@ -59,6 +64,8 @@ def dump_docs(docs): ) @register_statistics(names=["opea_service@web_retriever_chroma", "opea_service@search"]) def web_retrieve(input: EmbedDoc) -> SearchedDoc: + if logflag: + logger.info(input) start = time.time() query = input.text embedding = input.embedding @@ -70,10 +77,12 @@ def web_retrieve(input: EmbedDoc) -> SearchedDoc: if res.get("link", None): urls_to_look.append(res["link"]) urls = list(set(urls_to_look)) - print(f"urls: {urls}") + if logflag: + logger.info(f"urls: {urls}") docs = retrieve_htmls(urls) docs = parse_htmls(docs) - print(docs) + if logflag: + logger.info(docs) # Remove duplicated docs unique_documents_dict = {(doc.page_content, tuple(sorted(doc.metadata.items()))): doc for doc in docs} unique_documents = list(unique_documents_dict.values()) @@ -101,6 +110,8 @@ def web_retrieve(input: EmbedDoc) -> SearchedDoc: # For Now history is banned if vector_db.get()["ids"]: vector_db.delete(vector_db.get()["ids"]) + if logflag: + logger.info(result) return result diff --git a/requirements.txt b/requirements.txt index 53bfbf8d4..ef12b2fc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ httpx opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk +Pillow prometheus-fastapi-instrumentator pyyaml requests diff --git a/tests/test_agent_langchain.sh b/tests/test_agent_langchain.sh index ad9aae145..6bd97f8d2 100644 --- a/tests/test_agent_langchain.sh +++ b/tests/test_agent_langchain.sh @@ -7,36 +7,37 @@ WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +tgi_port=8080 +tgi_volume=$WORKPATH/data + +export model=mistralai/Mistral-7B-Instruct-v0.3 +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} function build_docker_images() { echo "Building the docker images" cd $WORKPATH echo $WORKPATH - docker build -t opea/comps-agent-langchain:latest -f comps/agent/langchain/docker/Dockerfile . - + docker build --no-cache -t opea/comps-agent-langchain:comps -f comps/agent/langchain/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/comps-agent-langchain built fail" + exit 1 + else + echo "opea/comps-agent-langchain built successful" + fi } -function start_service() { +function start_tgi_service() { # redis endpoint - export model=meta-llama/Meta-Llama-3-8B-Instruct - export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} echo "token is ${HF_TOKEN}" #single card echo "start tgi gaudi service" - docker run -d --runtime=habana --name "comps-tgi-gaudi-service" -p 8080:80 -v ./data:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092 - sleep 5s - docker logs comps-tgi-gaudi-service - - echo "Starting agent microservice" - docker run -d --runtime=runc --name="comps-langchain-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:latest + docker run -d --runtime=habana --name "test-comps-tgi-gaudi-service" -p $tgi_port:80 -v $tgi_volume:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092 sleep 5s - docker logs comps-langchain-agent-endpoint - echo "Waiting tgi gaudi ready" n=0 until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs comps-tgi-gaudi-service > ${WORKPATH}/tests/tgi-gaudi-service.log + docker logs test-comps-tgi-gaudi-service &> ${LOG_PATH}/tgi-gaudi-service.log n=$((n+1)) if grep -q Connected ${WORKPATH}/tests/tgi-gaudi-service.log; then break @@ -44,10 +45,45 @@ function start_service() { sleep 5s done sleep 5s - docker logs comps-tgi-gaudi-service echo "Service started successfully" } +function start_react_langchain_agent_service() { + echo "Starting react_langchain agent microservice" + docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react_langchain -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps + sleep 5s + + docker logs comps-agent-endpoint + echo "Service started successfully" +} + + +function start_react_langgraph_agent_service() { + echo "Starting react_langgraph agent microservice" + docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react_langgraph -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps + sleep 5s + docker logs comps-agent-endpoint + echo "Service started successfully" +} + +function start_react_langgraph_agent_service_openai() { + echo "Starting react_langgraph agent microservice" + docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e model=gpt-4o-mini-2024-07-18 -e strategy=react_langgraph -e llm_engine=openai -e OPENAI_API_KEY=${OPENAI_API_KEY} -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps + sleep 5s + docker logs comps-agent-endpoint + echo "Service started successfully" +} + + +function start_ragagent_agent_service() { + echo "Starting rag agent microservice" + docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=rag_agent -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps + sleep 5s + docker logs comps-agent-endpoint + echo "Service started successfully" +} + + function validate() { local CONTENT="$1" local EXPECTED_RESULT="$2" @@ -63,14 +99,29 @@ function validate() { } function validate_microservice() { - echo "Testing agent service" - local CONTENT=$(curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ + echo "Testing agent service - chat completion API" + local CONTENT=$(http_proxy="" curl http://${ip_address}:5042/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ "query": "What is Intel OPEA project?" - }' | tee ${LOG_PATH}/test-agent-langchain.log) + }') local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent-langchain") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs test-comps-tgi-gaudi-service &> ${LOG_PATH}/test-comps-tgi-gaudi-service.log + docker logs comps-agent-endpoint &> ${LOG_PATH}/test-comps-langchain-agent-endpoint.log + exit 1 + fi +} + +function validate_assistant_api() { + cd $WORKPATH + echo "Testing agent service - assistant api" + local CONTENT=$(python3 comps/agent/langchain/test_assistant_api.py --ip_addr ${ip_address} --ext_port 5042 --assistants_api_test --query 'What is Intel OPEA project?' 2>&1 | tee ${LOG_PATH}/test-agent-langchain-assistantsapi.log) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent-langchain-assistantsapi") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" if [ "$EXIT_CODE" == "1" ]; then docker logs comps-tgi-gaudi-service &> ${LOG_PATH}/test-comps-tgi-gaudi-service.log docker logs comps-langchain-agent-endpoint &> ${LOG_PATH}/test-comps-langchain-agent-endpoint.log @@ -78,28 +129,57 @@ function validate_microservice() { fi } -function stop_docker() { - cid=$(docker ps -aq --filter "name=comps-tgi-gaudi-service") +function stop_tgi_docker() { + cid=$(docker ps -aq --filter "name=test-comps-tgi-gaudi-service") echo "Stopping the docker containers "${cid} if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi - cid=$(docker ps -aq --filter "name=comps-langchain-agent-endpoint") + echo "Docker containers stopped successfully" +} + +function stop_agent_docker() { + cid=$(docker ps -aq --filter "name=comps-agent-endpoint") echo "Stopping the docker containers "${cid} if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi echo "Docker containers stopped successfully" } +function stop_docker() { + stop_tgi_docker + stop_agent_docker +} + function main() { stop_docker - build_docker_images - start_service + start_tgi_service + + # test rag agent + start_ragagent_agent_service + echo "=============Testing RAG Agent=============" validate_microservice + stop_agent_docker + echo "=============================================" + + # test react_langchain + start_react_langchain_agent_service + echo "=============Testing ReAct Langchain=============" + validate_microservice + validate_assistant_api + stop_agent_docker + echo "=============================================" + + # # test react_langgraph + ## For now need OpenAI llms for react_langgraph + # start_react_langgraph_agent_service_openai + # echo "===========Testing ReAct Langgraph (OpenAI LLM)=============" + # validate_microservice + # stop_agent_docker + # echo "=============================================" stop_docker echo y | docker system prune 2>&1 > /dev/null - } main diff --git a/tests/test_asr_whisper.sh b/tests/test_asr.sh similarity index 64% rename from tests/test_asr_whisper.sh rename to tests/test_asr.sh index 5e6e4a8c8..7334a18f1 100644 --- a/tests/test_asr_whisper.sh +++ b/tests/test_asr.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,37 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/whisper:latest -f comps/asr/whisper/Dockerfile . - docker build -t opea/asr:latest -f comps/asr/Dockerfile . + docker build --no-cache -t opea/whisper:comps -f comps/asr/whisper/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/whisper built fail" + exit 1 + else + echo "opea/whisper built successful" + fi + docker build --no-cache -t opea/asr:comps -f comps/asr/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/asr built fail" + exit 1 + else + echo "opea/asr built successful" + fi } function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:latest - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9099:9099 --ipc=host opea/asr:latest + docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:comps + docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9089:9099 --ipc=host opea/asr:comps sleep 3m } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') if [[ $result == *"you"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-asr-whisper + docker logs test-comps-asr exit 1 fi diff --git a/tests/test_chathistory_mongo.sh b/tests/test_chathistory_mongo.sh index 1e60a59c9..c821fc05e 100755 --- a/tests/test_chathistory_mongo.sh +++ b/tests/test_chathistory_mongo.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -18,11 +18,17 @@ function build_docker_images() { docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/chathistory-mongo-server built fail" + exit 1 + else + echo "opea/chathistory-mongo-server built successful" + fi } function start_service() { - docker run -d --name="test-comps-chathistory-mongo-server" -p 6013:6013 -p 6012:6012 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps + docker run -d --name="test-comps-chathistory-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:comps sleep 10s } @@ -42,6 +48,7 @@ function validate_microservice() { echo "Result correct." else echo "Result wrong." + docker logs test-comps-chathistory-mongo-server exit 1 fi diff --git a/tests/test_dataprep_milvus.sh b/tests/test_dataprep_milvus.sh index e379882d5..d9d9b6cab 100644 --- a/tests/test_dataprep_milvus.sh +++ b/tests/test_dataprep_milvus.sh @@ -13,24 +13,34 @@ function build_docker_images() { echo $(pwd) # langchain mosec embedding image docker build --no-cache -t opea/langchain-mosec:comps --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/langchain-mosec built fail" + exit 1 + else + echo "opea/langchain-mosec built successful" + fi # dataprep milvus image docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/milvus/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-milvus built fail" + exit 1 + else + echo "opea/dataprep-milvus built successful" + fi } function start_service() { # start milvus vector db mkdir $WORKPATH/milvus cd $WORKPATH/milvus - wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.6/configs/milvus.yaml - wget https://github.com/milvus-io/milvus/releases/download/v2.4.6/milvus-standalone-docker-compose.yml -O docker-compose.yml + wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.9/configs/milvus.yaml + wget https://github.com/milvus-io/milvus/releases/download/v2.4.9/milvus-standalone-docker-compose.yml -O docker-compose.yml sed '/- \${DOCKER_VOLUME_DIRECTORY:-\.}\/volumes\/milvus:\/var\/lib\/milvus/a \ \ \ \ \ \ - \${DOCKER_VOLUME_DIRECTORY:-\.}\/milvus.yaml:\/milvus\/configs\/milvus.yaml' -i docker-compose.yml docker compose up -d # set service ports mosec_embedding_port=5021 dataprep_service_port=5022 - dataprep_file_service_port=5023 - dataprep_del_service_port=5024 # start mosec embedding service docker run -d --name="test-comps-dataprep-milvus-mosec-server" -p $mosec_embedding_port:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/langchain-mosec:comps @@ -38,69 +48,89 @@ function start_service() { # start dataprep service MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_embedding_port}" MILVUS=${ip_address} - docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:6010 -p ${dataprep_file_service_port}:6011 -p ${dataprep_del_service_port}:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS=${MILVUS} --ipc=host opea/dataprep-milvus:comps + docker run -d --name="test-comps-dataprep-milvus-server" -p ${dataprep_service_port}:6010 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${MOSEC_EMBEDDING_ENDPOINT} -e MILVUS=${MILVUS} -e LOGFLAG=true --ipc=host opea/dataprep-milvus:comps sleep 1m } -function validate_microservice() { - cd $LOG_PATH - - # test /v1/dataprep - dataprep_service_port=5022 - URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" - echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep ] HTTP status is 200. Checking content..." - cp ./dataprep_file.txt ./dataprep_file2.txt - local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file2.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) - - if echo "$CONTENT" | grep -q "Data preparation succeeded"; then - echo "[ dataprep ] Content is as expected." - else - echo "[ dataprep ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi +function validate_service() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' -F "chunk_size=500" "$URL") + elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") else - echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep.log - exit 1 + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - # test /v1/dataprep/get_file - dataprep_file_service_port=5023 - URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep - file ] HTTP status is 200. Checking content..." - local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) - - if echo "$CONTENT" | grep -q '{"name":'; then - echo "[ dataprep - file ] Content is as expected." - else - echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_file.log - exit 1 + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + ##################### + if [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then + docker logs test-comps-dataprep-milvus-mosec-server >> ${LOG_PATH}/mosec-embedding.log fi + exit 1 else - echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_file.log + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi + sleep 5s +} + +function validate_microservice() { + cd $LOG_PATH + dataprep_service_port=5022 + # test /v1/dataprep/delete_file - dataprep_del_service_port=5024 - URL="http://${ip_address}:$dataprep_del_service_port/v1/dataprep/delete_file" - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ dataprep - del ] HTTP status is 200." - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_del.log - else - echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-dataprep-milvus-server >> ${LOG_PATH}/dataprep_del.log - exit 1 - fi + validate_service \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/delete_file" \ + '{"status":true}' \ + "dataprep_del" \ + "test-comps-dataprep-milvus-server" + + # test /v1/dataprep upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_service \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "test-comps-dataprep-milvus-server" + + # test /v1/dataprep upload link + validate_service \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_link" \ + "test-comps-dataprep-milvus-server" + + # test /v1/dataprep/get_file + validate_service \ + "http://${ip_address}:${dataprep_service_port}/v1/dataprep/get_file" \ + '{"name":' \ + "dataprep_get" \ + "test-comps-dataprep-milvus-server" + } function stop_docker() { diff --git a/tests/test_dataprep_pgvector.sh b/tests/test_dataprep_pgvector.sh index c9daba9fc..ca5649fe9 100755 --- a/tests/test_dataprep_pgvector.sh +++ b/tests/test_dataprep_pgvector.sh @@ -2,10 +2,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +dataprep_service_port=5013 + function build_docker_images() { cd $WORKPATH @@ -13,7 +16,13 @@ function build_docker_images() { docker pull pgvector/pgvector:0.7.0-pg16 # build dataprep image for pgvector - docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile . + docker build --no-cache -t opea/dataprep-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-pgvector built fail" + exit 1 + else + echo "opea/dataprep-pgvector built successful" + fi } function start_service() { @@ -21,27 +30,78 @@ function start_service() { export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=vectordb - docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 + docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 sleep 10s - docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest + docker run -d --name="test-comps-dataprep-pgvector" -p ${dataprep_service_port}:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:comps sleep 3m } function validate_microservice() { - URL="http://$ip_address:6007/v1/dataprep" - echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt - curl --noproxy $ip_address --location --request POST \ - --form 'files=@./dataprep_file.txt' $URL + cd $LOG_PATH + + # test /v1/dataprep + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep ] HTTP status is 200. Checking content..." + cp ./dataprep_file.txt ./dataprep_file2.txt + local CONTENT=$(curl -s -X POST -F 'files=@./dataprep_file2.txt' -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/dataprep.log) + + if echo "$CONTENT" | grep -q "Data preparation succeeded"; then + echo "[ dataprep ] Content is as expected." + else + echo "[ dataprep ] Content does not match the expected result: $CONTENT" + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + exit 1 + fi + else + echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log + exit 1 + fi + + # test /v1/dataprep/get_file + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - file ] HTTP status is 200. Checking content..." + local CONTENT=$(curl -s -X POST -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/dataprep_file.log) + + if echo "$CONTENT" | grep -q '{"name":'; then + echo "[ dataprep - file ] Content is as expected." + else + echo "[ dataprep - file ] Content does not match the expected result: $CONTENT" + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + else + echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log + exit 1 + fi + + # test /v1/dataprep/delete_file + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ dataprep - del ] HTTP status is 200." + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + else + echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log + exit 1 + fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=vectorstore-postgres*") + cid=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - cid=$(docker ps -aq --filter "name=dataprep-pgvector*") + cid=$(docker ps -aq --filter "name=test-comps-dataprep-pgvector*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/test_dataprep_pinecone.sh b/tests/test_dataprep_pinecone.sh new file mode 100755 index 000000000..4d0f64fac --- /dev/null +++ b/tests/test_dataprep_pinecone.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + + # build dataprep image for pinecone + docker build --no-cache -t opea/dataprep-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pinecone/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-pinecone built fail" + exit 1 + else + echo "opea/dataprep-pinecone built successful" + fi +} + +function start_service() { + export PINECONE_API_KEY=$PINECONE_KEY + export PINECONE_INDEX_NAME="test-index" + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN + + docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 -p 5040:6008 -p 5041:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:comps + + sleep 1m +} + +function validate_microservice() { + URL="http://$ip_address:5039/v1/dataprep" + echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt + result=$(curl --noproxy $ip_address --location --request POST \ + --form 'files=@./dataprep_file.txt' $URL) + if [[ $result == *"200"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-dataprep-pinecone + exit 1 + fi + DELETE_URL="http://$ip_address:5041/v1/dataprep/delete_file" + result=$(curl --noproxy $ip_address --location --request POST \ + -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL) + if [[ $result == *"true"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-dataprep-pinecone + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=vectorstore-pinecone*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + + cid=$(docker ps -aq --filter "name=test-comps-dataprep-pinecone*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_dataprep_qdrant_langchain.sh b/tests/test_dataprep_qdrant_langchain.sh new file mode 100644 index 000000000..632d7e06f --- /dev/null +++ b/tests/test_dataprep_qdrant_langchain.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + + # dataprep qdrant image + docker build --no-cache -t opea/dataprep-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-qdrant built fail" + exit 1 + else + echo "opea/dataprep-qdrant built successful" + fi +} + +function start_service() { + QDRANT_PORT=6360 + docker run -d --name="test-comps-dataprep-qdrant-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $QDRANT_PORT:6333 -p 6334:6334 --ipc=host qdrant/qdrant + tei_embedding_port=6361 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-dataprep-qdrant-langchain-tei" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_embedding_port:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model + dataprep_service_port=6362 + TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_embedding_port}" + COLLECTION_NAME="rag-qdrant" + docker run -d --name="test-comps-dataprep-qdrant-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$ip_address -e QDRANT_PORT=$QDRANT_PORT -e COLLECTION_NAME=$COLLECTION_NAME -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-qdrant:comps + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-qdrant-langchain + docker logs test-comps-dataprep-qdrant-langchain-tei + docker logs test-comps-dataprep-qdrant-langchain-server + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-qdrant-langchain + docker logs test-comps-dataprep-qdrant-langchain-tei + docker logs test-comps-dataprep-qdrant-langchain-server + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservice() { + # tei for embedding service + validate_services \ + "${ip_address}:6361/embed" \ + "[[" \ + "tei_embedding" \ + "test-comps-dataprep-qdrant-langchain-tei" \ + '{"inputs":"What is Deep Learning?"}' + + # dataprep upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_services \ + "${ip_address}:6362/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "test-comps-dataprep-qdrant-langchain-server" + + # dataprep upload link + validate_services \ + "${ip_address}:6362/v1/dataprep" \ + "Data preparation succeeded" \ + "dataprep_upload_link" \ + "test-comps-dataprep-qdrant-langchain-server" + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-dataprep-qdrant-langchain*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + + rm $LOG_PATH/dataprep_file.txt +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_dataprep_redis_langchain.sh b/tests/test_dataprep_redis_langchain.sh index 760e7aa3d..82c25b9d3 100644 --- a/tests/test_dataprep_redis_langchain.sh +++ b/tests/test_dataprep_redis_langchain.sh @@ -12,16 +12,20 @@ function build_docker_images() { cd $WORKPATH echo $(pwd) docker build --no-cache -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-redis built fail" + exit 1 + else + echo "opea/dataprep-redis built successful" + fi } function start_service() { REDIS_PORT=6380 docker run -d --name="test-comps-dataprep-redis-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 dataprep_service_port=5013 - dataprep_file_service_port=5016 - dataprep_del_service_port=5020 REDIS_URL="redis://${ip_address}:${REDIS_PORT}" - docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 -p ${dataprep_file_service_port}:6008 -p ${dataprep_del_service_port}:6009 --ipc=host opea/dataprep-redis:comps + docker run -d --name="test-comps-dataprep-redis-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e REDIS_HOST=$ip_address -e REDIS_PORT=$REDIS_PORT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-redis:comps sleep 1m } @@ -35,16 +39,17 @@ function validate_microservice() { HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') SERVICE_NAME="dataprep - upload - file" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." @@ -56,55 +61,56 @@ function validate_microservice() { HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') SERVICE_NAME="dataprep - upload - link" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/get_file - dataprep_file_service_port=5016 - URL="http://${ip_address}:$dataprep_file_service_port/v1/dataprep/get_file" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_file" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') SERVICE_NAME="dataprep - get" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi if [[ "$RESPONSE_BODY" != *'{"name":'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." fi # test /v1/dataprep/delete_file - dataprep_del_service_port=5020 - URL="http://${ip_address}:$dataprep_del_service_port/v1/dataprep/delete_file" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_file" HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') SERVICE_NAME="dataprep - del" - docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -112,6 +118,7 @@ function validate_microservice() { # check response body if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log exit 1 else echo "[ $SERVICE_NAME ] Content is as expected." diff --git a/tests/test_dataprep_redis_langchain_ray.sh b/tests/test_dataprep_redis_langchain_ray.sh index f377d9ef5..9b1303d90 100644 --- a/tests/test_dataprep_redis_langchain_ray.sh +++ b/tests/test_dataprep_redis_langchain_ray.sh @@ -11,20 +11,25 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { echo "Building the docker images" cd $WORKPATH - docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile . - echo "Docker image built successfully" + docker build --no-cache -t opea/dataprep-on-ray-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-on-ray-redis built fail" + exit 1 + else + echo "opea/dataprep-on-ray-redis built successful" + fi } function start_service() { echo "Starting redis microservice" # redis endpoint - docker run -d --name="test-comps-dataprep-redis-ray" --runtime=runc -p 6382:6379 -p 8004:8001 redis/redis-stack:7.2.0-v9 + docker run -d --name="test-comps-dataprep-redis-ray" --runtime=runc -p 5038:6379 -p 8004:8001 redis/redis-stack:7.2.0-v9 # dataprep-redis-server endpoint - export REDIS_URL="redis://${ip_address}:6382" + export REDIS_URL="redis://${ip_address}:5038" export INDEX_NAME="rag-redis" echo "Starting dataprep-redis-server" - docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 6009:6007 -p 6010:6008 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest + docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 5037:6007 -p 6010:6008 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:comps sleep 10 echo "Service started successfully" @@ -33,7 +38,7 @@ function start_service() { function validate_microservice() { cd $LOG_PATH - dataprep_service_port=6009 + dataprep_service_port=5037 export URL="http://${ip_address}:$dataprep_service_port/v1/dataprep" echo "Starting validating the microservice" diff --git a/tests/test_dataprep_redis_llama_index.sh b/tests/test_dataprep_redis_llama_index.sh index db959d821..a7d20160b 100644 --- a/tests/test_dataprep_redis_llama_index.sh +++ b/tests/test_dataprep_redis_llama_index.sh @@ -12,6 +12,12 @@ function build_docker_images() { cd $WORKPATH echo $(pwd) docker build --no-cache -t opea/dataprep-redis-llama-index:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/llama_index/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep-redis-llama-index built fail" + exit 1 + else + echo "opea/dataprep-redis-llama-index built successful" + fi } function start_service() { diff --git a/tests/test_dataprep_redis_multimodal_langchain.sh b/tests/test_dataprep_redis_multimodal_langchain.sh new file mode 100644 index 000000000..e5a75f860 --- /dev/null +++ b/tests/test_dataprep_redis_multimodal_langchain.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +LVM_PORT=5028 +LVM_ENDPOINT="http://${ip_address}:${LVM_PORT}/v1/lvm" +WHISPER_MODEL="base" +INDEX_NAME="dataprep" +video_name="WeAreGoingOnBullrun" +transcript_fn="${video_name}.vtt" +video_fn="${video_name}.mp4" + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/multimodal_langchain/docker/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/dataprep-redis built fail" + exit 1 + else + echo "opea/dataprep-redis built successful" + fi +} + +function build_lvm_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llava built fail" + exit 1 + else + echo "opea/llava built successful" + fi + docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/lvm built fail" + exit 1 + else + echo "opea/lvm built successful" + fi +} + +function start_lvm_service() { + unset http_proxy + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${LVM_PORT}:9399 --ipc=host opea/lvm:comps + sleep 5m +} + +function start_lvm() { + cd $WORKPATH + echo $(pwd) + echo "Building LVM Docker Images" + build_lvm_docker_images + echo "Starting LVM Services" + start_lvm_service + +} + +function start_service() { + # start redis + echo "Starting Redis server" + REDIS_PORT=6380 + docker run -d --name="test-comps-dataprep-redis-multimodal-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $REDIS_PORT:6379 -p 8002:8001 --ipc=host redis/redis-stack:7.2.0-v9 + + # start dataprep microservice + echo "Starting dataprep microservice" + dataprep_service_port=5013 + REDIS_URL="redis://${ip_address}:${REDIS_PORT}" + docker run -d --name="test-comps-dataprep-redis-multimodal-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e LVM_ENDPOINT=$LVM_ENDPOINT -p ${dataprep_service_port}:6007 --runtime=runc --ipc=host opea/dataprep-redis:comps + + sleep 1m +} + +function prepare_data() { + echo "Prepare Transcript .vtt" + cd ${LOG_PATH} + echo $(pwd) + echo """WEBVTT + +00:00:00.000 --> 00:00:03.400 +Last year the smoking tire went on the bull run live rally in the + +00:00:03.400 --> 00:00:09.760 +2010 Ford SBT Raptor. I liked it so much. I bought one. Here it is. We're going back + +00:00:09.760 --> 00:00:12.920 +to bull run this year of course we'll help from our friends at Black Magic and + +00:00:12.920 --> 00:00:19.560 +we're so serious about it. We got two Valentine one radar detectors. Oh yeah. + +00:00:19.560 --> 00:00:23.760 +So we're all set up and the reason we got two is because we're going to be going + +00:00:23.760 --> 00:00:29.920 +a little bit faster. We got a 2011 Shelby GT500. The 550 horsepower + +00:00:29.920 --> 00:00:34.560 +all-luminum V8. We are going to be right in the action bringing you guys a video + +00:00:34.560 --> 00:00:40.120 +every single day live from the bull run rally July 9th to 16th and the only + +00:00:40.120 --> 00:00:45.240 +place to watch it is on BlackmagicShine.com. We're right here on the smoking + +00:00:45.240 --> 00:00:47.440 +tire.""" > ${transcript_fn} + + echo "Downloading Video" + wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn} + +} + +function validate_microservice() { + cd $LOG_PATH + + # test v1/generate_transcripts upload file + echo "Testing generate_transcripts API" + URL="http://${ip_address}:$dataprep_service_port/v1/generate_transcripts" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./$video_fn" -H 'Content-Type: multipart/form-data' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - file" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test v1/videos_with_transcripts upload file + echo "Testing videos_with_transcripts API" + URL="http://${ip_address}:$dataprep_service_port/v1/videos_with_transcripts" + + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./$video_fn" -F "files=@./$transcript_fn" -H 'Content-Type: multipart/form-data' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - file" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test v1/generate_captions upload file + echo "Testing generate_captions API" + URL="http://${ip_address}:$dataprep_service_port/v1/generate_captions" + + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./$video_fn" -H 'Content-Type: multipart/form-data' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - upload - file" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + + + # test /v1/dataprep/get_videos + echo "Testing get_videos API" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/get_videos" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - get" + + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + if [[ "$RESPONSE_BODY" != *${video_name}* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_file.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + # test /v1/dataprep/delete_videos + echo "Testing delete_videos API" + URL="http://${ip_address}:$dataprep_service_port/v1/dataprep/delete_videos" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="dataprep - del" + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_del.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-dataprep-redis-multimodal-langchain-server >> ${LOG_PATH}/dataprep_del.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-dataprep-redis-multimodal-langchain*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + +} + +function delete_data() { + cd ${LOG_PATH} + rm -rf WeAreGoingOnBullrun.vtt + rm -rf WeAreGoingOnBullrun.mp4 + sleep 1s +} + +function main() { + + stop_docker + start_lvm + build_docker_images + start_service + prepare_data + + validate_microservice + delete_data + stop_docker + # echo y | docker system prune + +} + +main diff --git a/tests/test_embeddings_langchain-mosec.sh b/tests/test_embeddings_langchain-mosec.sh index 1381a6dcb..95858118b 100644 --- a/tests/test_embeddings_langchain-mosec.sh +++ b/tests/test_embeddings_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,20 +10,32 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t langchain-mosec:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec-endpoint:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding-langchain-mosec-endpoint built fail" + exit 1 + else + echo "opea/embedding-langchain-mosec-endpoint built successful" + fi } function build_docker_images() { cd $WORKPATH echo $(pwd) docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec:comps -f comps/embeddings/langchain-mosec/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding-langchain-mosec built fail" + exit 1 + else + echo "opea/embedding-langchain-mosec built successful" + fi } function start_service() { mosec_endpoint=5001 model="BAAI/bge-large-en-v1.5" unset http_proxy - docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 langchain-mosec:comps + docker run -d --name="test-comps-embedding-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/embedding-langchain-mosec-endpoint:comps export MOSEC_EMBEDDING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5002 docker run -d --name="test-comps-embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:comps @@ -36,6 +48,14 @@ function validate_microservice() { -X POST \ -d '{"text":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' + if [ $? -eq 0 ]; then + echo "curl command executed successfully" + else + echo "curl command failed" + docker logs test-comps-embedding-langchain-mosec-endpoint + docker logs test-comps-embedding-langchain-mosec-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_embeddings_langchain.sh b/tests/test_embeddings_langchain.sh index 65f29b023..6c6241226 100644 --- a/tests/test_embeddings_langchain.sh +++ b/tests/test_embeddings_langchain.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -11,6 +11,12 @@ function build_docker_images() { cd $WORKPATH echo $(pwd) docker build --no-cache -t opea/embedding-tei:comps -f comps/embeddings/langchain/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding-tei built fail" + exit 1 + else + echo "opea/embedding-tei built successful" + fi } function start_service() { @@ -27,10 +33,18 @@ function start_service() { function validate_microservice() { tei_service_port=5002 - http_proxy="" curl http://${ip_address}:$tei_service_port/v1/embeddings \ + result=$(http_proxy="" curl http://${ip_address}:$tei_service_port/v1/embeddings \ -X POST \ -d '{"text":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json') + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-embedding-tei-endpoint + docker logs test-comps-embedding-tei-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_embeddings_llama_index.sh b/tests/test_embeddings_llama_index.sh index 006a2c259..81eac442b 100644 --- a/tests/test_embeddings_llama_index.sh +++ b/tests/test_embeddings_llama_index.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -11,24 +11,30 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/embedding-tei:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . + docker build --no-cache -t opea/embedding-tei-llama-index:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/embedding-tei-llama-index built fail" + exit 1 + else + echo "opea/embedding-tei-llama-index built successful" + fi } function start_service() { tei_endpoint=5001 model="BAAI/bge-large-en-v1.5" revision="refs/pr/5" - docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision + docker run -d --name="test-comps-embedding-tei-llama-index-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" tei_service_port=5010 - docker run -d --name="test-comps-embedding-tei-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps + docker run -d --name="test-comps-embedding-tei-llama-index-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei-llama-index:comps sleep 3m } function validate_microservice() { tei_service_port=5010 URL="http://${ip_address}:$tei_service_port/v1/embeddings" - docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ embedding - llama_index ] HTTP status is 200. Checking content..." @@ -38,12 +44,12 @@ function validate_microservice() { echo "[ embedding - llama_index ] Content is as expected." else echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT" - docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log exit 1 fi else echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log + docker logs test-comps-embedding-tei-llama-index-server >> ${LOG_PATH}/embedding.log exit 1 fi } diff --git a/tests/test_guardrails_llama_guard.sh b/tests/test_guardrails_llama_guard.sh index 1462611aa..0e7980384 100644 --- a/tests/test_guardrails_llama_guard.sh +++ b/tests/test_guardrails_llama_guard.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -11,49 +11,55 @@ function build_docker_images() { echo "Start building docker images for microservice" cd $WORKPATH docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1 - docker build --no-cache -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile . - echo "Docker images built" + docker build --no-cache -t opea/guardrails-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/guardrails-tgi built fail" + exit 1 + else + echo "opea/guardrails-tgi built successful" + fi } function start_service() { echo "Starting microservice" export model_id="meta-llama/Meta-Llama-Guard-2-8B" export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" - export SAFETY_GUARD_ENDPOINT=http://${ip_address}:8088/v1/chat/completions + export SAFETY_GUARD_ENDPOINT=http://${ip_address}:5035/v1/chat/completions - docker run -d --name="test-guardrails-langchain-tgi-server" -p 8088:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 + docker run -d --name="test-comps-guardrails-langchain-tgi-server" -p 5035:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048 sleep 4m - docker run -d --name="test-guardrails-langchain-service" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:latest + docker run -d --name="test-comps-guardrails-langchain-service" -p 5036:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:comps sleep 10s - echo "Microservice started" } function validate_microservice() { echo "Validate microservice started" echo "test 1 - violated policies" - docker logs test-guardrails-langchain-tgi-server - docker logs test-guardrails-langchain-service - result=$(http_proxy= curl http://localhost:9090/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"Violated"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-guardrails-langchain-tgi-server + docker logs test-comps-guardrails-langchain-service exit 1 fi echo "test 2 - safe" - result=$(http_proxy= curl http://localhost:9090/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') + result=$(http_proxy= curl http://localhost:5036/v1/guardrails -X POST -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}' -H 'Content-Type: application/json') if [[ $result == *"car"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-guardrails-langchain-tgi-server + docker logs test-comps-guardrails-langchain-service exit 1 fi } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-guardrails-langchain*") + cid=$(docker ps -aq --filter "name=test-comps-guardrails-langchain*") echo "Shutdown legacy containers "$cid if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/test_guardrails_pii_detection.sh b/tests/test_guardrails_pii_detection.sh index 1ba8202f6..178b6ea23 100644 --- a/tests/test_guardrails_pii_detection.sh +++ b/tests/test_guardrails_pii_detection.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,13 +10,18 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { echo "Start building docker images for microservice" cd $WORKPATH - docker build -t opea/guardrails-pii-detection:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/docker/Dockerfile . - echo "Docker images built" + docker build --no-cache -t opea/guardrails-pii-detection:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/guardrails-pii-detection built fail" + exit 1 + else + echo "opea/guardrails-pii-detection built successful" + fi } function start_service() { echo "Starting microservice" - docker run -d --runtime=runc --name="test-guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-pii-detection:latest + docker run -d --runtime=runc --name="test-comps-guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-pii-detection:comps sleep 5 echo "Microservice started" } @@ -26,19 +31,46 @@ function validate_microservice() { export PATH="${HOME}/miniforge3/bin:$PATH" source activate echo "test 1 - single task - ner" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ner + result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ner) + if [[ $result == *"An error occurred"* ]]; then + echo "Result wrong. Received was $result" + docker logs test-comps-guardrails-pii-detection-endpoint + exit 1 + else + echo "Result correct." + fi echo "test 2 - 20 tasks in parallel - ner" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ner + result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ner) + if [[ $result == *"An error occurred"* ]]; then + echo "Result wrong. Received was $result" + docker logs test-comps-guardrails-pii-detection-endpoint + exit 1 + else + echo "Result correct." + fi echo "test 3 - single task - ml" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ml + result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ml) + if [[ $result == *"An error occurred"* ]]; then + echo "Result wrong. Received was $result" + docker logs test-comps-guardrails-pii-detection-endpoint + exit 1 + else + echo "Result correct." + fi echo "test 4 - 20 tasks in parallel - ml" - python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ml + result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ml) + if [[ $result == *"An error occurred"* ]]; then + echo "Result wrong. Received was $result" + docker logs test-comps-guardrails-pii-detection-endpoint + exit 1 + else + echo "Result correct." + fi echo "Validate microservice completed" - docker logs test-guardrails-pii-detection-endpoint } function stop_docker() { - cid=$(docker ps -aq --filter "name=test-guardrails-pii-detection-endpoint") + cid=$(docker ps -aq --filter "name=test-comps-guardrails-pii-detection-endpoint") echo "Shutdown legacy containers "$cid if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi } diff --git a/tests/test_guardrails_toxicity_detection.sh b/tests/test_guardrails_toxicity_detection.sh new file mode 100755 index 000000000..215019e6c --- /dev/null +++ b/tests/test_guardrails_toxicity_detection.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + echo "Start building docker images for microservice" + cd $WORKPATH + docker build --no-cache -t opea/guardrails-toxicity-detection:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/toxicity_detection/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/guardrails-toxicity-detection built fail" + exit 1 + else + echo "opea/guardrails-toxicity-detection built successful" + fi +} + +function start_service() { + echo "Starting microservice" + docker run -d --runtime=runc --name="test-comps-guardrails-toxicity-detection-endpoint" -p 9091:9091 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-toxicity-detection:comps + sleep 5 + echo "Microservice started" +} + +function validate_microservice() { + echo "Validate microservice started" + echo "test 1 - toxic" + result=$(curl localhost:9091/v1/toxicity -X POST -d '{"text":"How to poison your neighbor'\''s dog secretly"}' -H 'Content-Type: application/json') + if [[ $result == *"Violated"* ]]; then + echo "Result correct." + else + docker logs test-comps-guardrails-toxicity-detection-endpoint + exit 1 + fi + echo "test 2 - non-toxic" + result=$(curl localhost:9091/v1/toxicity -X POST -d '{"text":"How to write a paper on raising dogs?"}' -H 'Content-Type: application/json') + if [[ $result == *"paper"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-guardrails-toxicity-detection-endpoint + exit 1 + fi + echo "Validate microservice completed" +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-guardrails-toxicity-detection-endpoint") + echo "Shutdown legacy containers "$cid + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo "cleanup container images and volumes" + echo y | docker system prune 2>&1 > /dev/null + +} + +main diff --git a/tests/test_intent_detection_langchain.sh b/tests/test_intent_detection_langchain.sh new file mode 100644 index 000000000..45910ca7f --- /dev/null +++ b/tests/test_intent_detection_langchain.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/llm-tgi:latest -f comps/intent_detection/langchain/Dockerfile . +} + +function start_service() { + tgi_endpoint=5004 + # Remember to set HF_TOKEN before invoking this test! + export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} + model=Intel/neural-chat-7b-v3-3 + docker run -d --name="test-comps-intent-tgi-endpoint" -p $tgi_endpoint:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model + + export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint}" + tei_service_port=5005 + unset http_proxy + docker run -d --name="test-comps-intent-tei-server" -p ${tei_service_port}:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/llm-tgi:latest + sleep 3m +} + +function validate_microservice() { + tei_service_port=5005 + http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/intent\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"streaming":false}' \ + -H 'Content-Type: application/json' + docker logs test-comps-intent-tei-server + docker logs test-comps-intent-tgi-endpoint +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-intent*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_llms_faq-generation_tgi.sh b/tests/test_llms_faq-generation_tgi.sh index 9be561cf9..de5ec3466 100755 --- a/tests/test_llms_faq-generation_tgi.sh +++ b/tests/test_llms_faq-generation_tgi.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -11,6 +11,12 @@ LOG_PATH="$WORKPATH/tests" function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/llm-faqgen-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-faqgen-tgi built fail" + exit 1 + else + echo "opea/llm-faqgen-tgi built successful" + fi } function start_service() { @@ -44,7 +50,6 @@ function validate_microservice() { -d '{"query":"Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data."}' \ -H 'Content-Type: application/json' docker logs test-comps-llm-tgi-endpoint - docker logs test-comps-llm-tgi-server cd $LOG_PATH tei_service_port=5015 diff --git a/tests/test_llms_summarization_tgi.sh b/tests/test_llms_summarization_tgi.sh index adffb3b28..9d463d321 100644 --- a/tests/test_llms_summarization_tgi.sh +++ b/tests/test_llms_summarization_tgi.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -11,6 +11,12 @@ LOG_PATH="$WORKPATH/tests" function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/llm-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-tgi built fail" + exit 1 + else + echo "opea/llm-tgi built successful" + fi } function start_service() { diff --git a/tests/test_llms_text-generation_native.sh b/tests/test_llms_text-generation_native.sh new file mode 100644 index 000000000..f1e7fff63 --- /dev/null +++ b/tests/test_llms_text-generation_native.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache \ + --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \ + -t opea/llm-native:comps \ + -f comps/llms/text-generation/native/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-native built fail" + exit 1 + else + echo "opea/llm-native built successful" + fi +} + +function start_service() { + LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct" + llm_native_service_port=5070 + docker run -d \ + --name="test-comps-llm-native-server" \ + -p ${llm_native_service_port}:9000 \ + --runtime=habana \ + --cap-add=SYS_NICE \ + --ipc=host \ + -e http_proxy=${http_proxy} \ + -e https_proxy=${https_proxy} \ + -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e TOKENIZERS_PARALLELISM=false \ + --restart unless-stopped \ + --network bridge \ + opea/llm-native:comps + + sleep 5s +} + +function validate_microservice() { + llm_native_service_port=5070 + URL="http://${ip_address}:${llm_native_service_port}/v1/chat/completions" + INPUT_DATA='{"query":"What is Deep Learning?"}' + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + SERVICE_NAME="llm-native" + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *'"text":"What'* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-llm-native*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + build_docker_images + start_service + validate_microservice + stop_docker + + echo y | docker system prune + +} + +main diff --git a/tests/test_llms_text-generation_ray_serve.sh b/tests/test_llms_text-generation_ray_serve.sh deleted file mode 100644 index 823971ecc..000000000 --- a/tests/test_llms_text-generation_ray_serve.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - ## Build VLLM Ray docker - cd $WORKPATH - docker build \ - -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve \ - --network=host \ - -t ray_serve:habana . - - ## Build OPEA microservice docker - cd $WORKPATH - docker build \ - -t opea/llm-ray:comps \ - -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice . -} - -function start_service() { - export LLM_MODEL="facebook/opt-125m" - port_number=8008 - - docker run -d --rm \ - --runtime=habana \ - --name="test-comps-ray-service" \ - -v $PWD/data:/data \ - -e HABANA_VISIBLE_DEVICES=all \ - -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ - --cap-add=sys_nice \ - --ipc=host \ - -p $port_number:80 \ - -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - -e TRUST_REMOTE_CODE=True \ - ray_serve:habana \ - /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1" - - export RAY_Serve_ENDPOINT="http://${ip_address}:${port_number}" - docker run -d --rm \ - --name="test-comps-ray-microserve" \ - -p 9000:9000 \ - --ipc=host \ - -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT \ - -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ - opea/llm-ray:comps - - # check whether ray is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-comps-ray-service > ${WORKPATH}/tests/test-comps-ray-service.log - n=$((n+1)) - if grep -q Connected ${WORKPATH}/tests/test-comps-ray-service.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_microservice() { - http_proxy="" curl http://${ip_address}:8008/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{"model": "opt-125m", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }' - http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \ - -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ - -H 'Content-Type: application/json' - docker logs test-comps-ray-service - docker logs test-comps-ray-microserve - } - - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-ray*") - if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - # echo y | docker system prune - -} - -main diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index 6b6c17c19..b2956b12b 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,6 +10,12 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm-tgi:comps -f comps/llms/text-generation/tgi/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-tgi built fail" + exit 1 + else + echo "opea/llm-tgi built successful" + fi } function start_service() { @@ -39,12 +45,18 @@ function start_service() { function validate_microservice() { tei_service_port=5005 - http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/completions \ + result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?", "max_new_tokens": 128}' \ - -H 'Content-Type: application/json' - docker logs test-comps-llm-tgi-endpoint - docker logs test-comps-llm-tgi-server + -H 'Content-Type: application/json') + if [[ $result == *"DONE"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-llm-tgi-endpoint + docker logs test-comps-llm-tgi-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_llms_text-generation_vllm-openvino.sh b/tests/test_llms_text-generation_vllm-openvino.sh new file mode 100755 index 000000000..ac57b29d8 --- /dev/null +++ b/tests/test_llms_text-generation_vllm-openvino.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH="$( cd "$( dirname "$0" )" && pwd )" + +# Define variables +port=5033 +HF_CACHE_DIR=$HOME/.cache/huggingface +DOCKER_IMAGE="vllm-openvino:comps" +CONTAINER_NAME="test-comps-vllm-openvino-container" + +function build_container() { + cd $WORKPATH + git clone https://github.com/vllm-project/vllm.git vllm-openvino + cd ./vllm-openvino/ + docker build --no-cache -t $DOCKER_IMAGE \ + -f Dockerfile.openvino \ + . \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy + if [ $? -ne 0 ]; then + echo "vllm-openvino built fail" + exit 1 + else + echo "vllm-openvino built successful" + fi + cd $WORKPATH + rm -rf vllm-openvino +} + +# Function to start Docker container +start_container() { + + docker run -d --rm --name=$CONTAINER_NAME \ + -p $port:$port \ + --ipc=host \ + -e HTTPS_PROXY=$https_proxy \ + -e HTTP_PROXY=$https_proxy \ + -v $HF_CACHE_DIR:/root/.cache/huggingface \ + vllm-openvino:comps /bin/bash -c "\ + cd / && \ + export VLLM_CPU_KVCACHE_SPACE=50 && \ + python3 -m vllm.entrypoints.openai.api_server \ + --model \"Intel/neural-chat-7b-v3-3\" \ + --host 0.0.0.0 \ + --port $port" + + # check whether service is fully ready + n=0 + until [[ "$n" -ge 300 ]]; do + docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 + n=$((n+1)) + if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then + break + fi + sleep 3s + done + +} + +# Cleanup Function +cleanup() { + # Stop and remove Docker container and images + cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") + if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi + docker rmi -f $DOCKER_IMAGE + rm /tmp/$CONTAINER_NAME.log +} + +# Function to test API endpoint +function test_api_endpoint { + local endpoint="$1" + local expected_status="$2" + + # Make the HTTP request + if test "$1" = "v1/completions" + then + local response=$(curl "http://localhost:$port/$endpoint" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Intel/neural-chat-7b-v3-3", + "prompt": "What is the key advantage of Openvino framework", + "max_tokens": 300, + "temperature": 0.7 + }' \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + else + local response=$(curl "http://localhost:$port/$endpoint" \ + --write-out '%{http_code}' \ + --silent \ + --output /dev/null) + fi + + # Assert the response status code + if [[ "$response" -eq "$expected_status" ]]; then + echo "PASS: $endpoint returned expected status code: $expected_status" + else + echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" + docker logs $CONTAINER_NAME + exit 1 + fi +} +# Main function +main() { + + build_container + start_container + + # Sleep to allow the container to start up fully + sleep 10 + # Test the /v1/models API + test_api_endpoint "v1/models" 200 + + # Test the /v1/completions API + test_api_endpoint "v1/completions" 200 + + cleanup +} + +# Call main function +main diff --git a/tests/test_llms_text-generation_vllm-ray.sh b/tests/test_llms_text-generation_vllm-ray.sh index 8ecb487e9..41433b27f 100644 --- a/tests/test_llms_text-generation_vllm-ray.sh +++ b/tests/test_llms_text-generation_vllm-ray.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -12,18 +12,30 @@ function build_docker_images() { cd $WORKPATH docker build \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray \ - -t vllm_ray:habana --network=host . + --no-cache -t opea/vllm_ray-habana:comps --network=host . + if [ $? -ne 0 ]; then + echo "opea/vllm_ray-habana built fail" + exit 1 + else + echo "opea/vllm_ray-habana built successful" + fi ## Build OPEA microservice docker cd $WORKPATH docker build \ - -t opea/llm-vllm-ray:comps \ + --no-cache -t opea/llm-vllm-ray:comps \ -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice . + if [ $? -ne 0 ]; then + echo "opea/llm-vllm-ray built fail" + exit 1 + else + echo "opea/llm-vllm-ray built successful" + fi } function start_service() { export LLM_MODEL="facebook/opt-125m" - port_number=8006 + port_number=5031 docker run -d --rm \ --name="test-comps-vllm-ray-service" \ --runtime=habana \ @@ -34,13 +46,13 @@ function start_service() { --ipc=host \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ -p $port_number:8000 \ - vllm_ray:habana \ + opea/vllm_ray-habana:comps \ /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL --tensor_parallel_size 2 --enforce_eager False" export vLLM_RAY_ENDPOINT="http://${ip_address}:${port_number}" docker run -d --rm\ --name="test-comps-vllm-ray-microservice" \ - -p 9000:9000 \ + -p 5032:9000 \ --ipc=host \ -e vLLM_RAY_ENDPOINT=$vLLM_RAY_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ @@ -61,15 +73,29 @@ function start_service() { } function validate_microservice() { - http_proxy="" curl http://${ip_address}:8006/v1/chat/completions \ + result=$(http_proxy="" curl http://${ip_address}:5031/v1/chat/completions \ -H "Content-Type: application/json" \ - -d '{"model": "facebook/opt-125m", "messages": [{"role": "user", "content": "How are you?"}]}' - http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \ + -d '{"model": "facebook/opt-125m", "messages": [{"role": "user", "content": "How are you?"}]}') + if [[ $result == *"message"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vllm-ray-service + docker logs test-comps-vllm-ray-microservice + exit 1 + fi + result=$(http_proxy="" curl http://${ip_address}:5032/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ - -H 'Content-Type: application/json' - docker logs test-comps-vllm-ray-service - docker logs test-comps-vllm-ray-microservice + -H 'Content-Type: application/json') + if [[ $result == *"text"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vllm-ray-service + docker logs test-comps-vllm-ray-microservice + exit 1 + fi } function stop_docker() { diff --git a/tests/test_llms_text-generation_vllm.sh b/tests/test_llms_text-generation_vllm.sh index c5e7faa4b..0210f5075 100644 --- a/tests/test_llms_text-generation_vllm.sh +++ b/tests/test_llms_text-generation_vllm.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -12,19 +12,31 @@ function build_docker_images() { cd $WORKPATH/comps/llms/text-generation/vllm docker build \ -f docker/Dockerfile.hpu \ - -t vllm:hpu \ + --no-cache -t opea/vllm-hpu:comps \ --shm-size=128g . + if [ $? -ne 0 ]; then + echo "opea/vllm-hpu built fail" + exit 1 + else + echo "opea/vllm-hpu built successful" + fi ## Build OPEA microservice docker cd $WORKPATH docker build \ - -t opea/llm-vllm:comps \ + --no-cache -t opea/llm-vllm:comps \ -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice . + if [ $? -ne 0 ]; then + echo "opea/llm-vllm built fail" + exit 1 + else + echo "opea/llm-vllm built successful" + fi } function start_service() { export LLM_MODEL="facebook/opt-125m" - port_number=8008 + port_number=5025 docker run -d --rm \ --runtime=habana \ --name="test-comps-vllm-service" \ @@ -35,13 +47,13 @@ function start_service() { --cap-add=sys_nice \ --ipc=host \ -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ - vllm:hpu \ + opea/vllm-hpu:comps \ /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048" export vLLM_ENDPOINT="http://${ip_address}:${port_number}" docker run -d --rm \ --name="test-comps-vllm-microservice" \ - -p 9000:9000 \ + -p 5030:9000 \ --ipc=host \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ @@ -62,21 +74,35 @@ function start_service() { } function validate_microservice() { - http_proxy="" curl http://${ip_address}:8008/v1/completions \ + result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \ -H "Content-Type: application/json" \ -d '{ "model": "facebook/opt-125m", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0 - }' - http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \ + }') + if [[ $result == *"text"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vllm-service + docker logs test-comps-vllm-microservice + exit 1 + fi + result=$(http_proxy="" curl http://${ip_address}:5030/v1/chat/completions \ -X POST \ -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \ - -H 'Content-Type: application/json' - docker logs test-comps-vllm-service - docker logs test-comps-vllm-microservice - } + -H 'Content-Type: application/json') + if [[ $result == *"text"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vllm-service + docker logs test-comps-vllm-microservice + exit 1 + fi +} function stop_docker() { cid=$(docker ps -aq --filter "name=test-comps-vllm*") diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh index da7c740a9..08f138e2f 100644 --- a/tests/test_lvms_llava.sh +++ b/tests/test_lvms_llava.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,38 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/llava:latest -f comps/lvms/llava/Dockerfile . - docker build --no-cache -t opea/lvm:latest -f comps/lvms/Dockerfile . + docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llava built fail" + exit 1 + else + echo "opea/llava built successful" + fi + docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/lvm built fail" + exit 1 + else + echo "opea/lvm built successful" + fi } function start_service() { unset http_proxy - docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:latest - docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:latest + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps sleep 8m } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + + result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') if [[ $result == *"yellow"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-lvm-llava + docker logs test-comps-lvm exit 1 fi diff --git a/tests/test_lvms_tgi_llava_next.sh b/tests/test_lvms_tgi_llava_next.sh new file mode 100644 index 000000000..c9b28f6d5 --- /dev/null +++ b/tests/test_lvms_tgi_llava_next.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + git clone https://github.com/yuanwu2017/tgi-gaudi.git && cd tgi-gaudi && git checkout v2.0.4 + docker build --no-cache -t opea/llava-tgi:comps . + if [ $? -ne 0 ]; then + echo "opea/llava-tgi built fail" + exit 1 + else + echo "opea/llava-tgi built successful" + fi + + cd .. + docker build --no-cache -t opea/lvm-tgi:comps -f comps/lvms/Dockerfile_tgi . + if [ $? -ne 0 ]; then + echo "opea/lvm-tgi built fail" + exit 1 + else + echo "opea/lvm-tgi built successful" + fi +} + +function start_service() { + unset http_proxy + model="llava-hf/llava-v1.6-mistral-7b-hf" + docker run -d --name="test-comps-lvm-llava-tgi" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5027:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/llava-tgi:comps --model-id $model --max-input-tokens 4096 --max-total-tokens 8192 + docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:5027 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm-tgi:comps + sleep 3m +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-lvm-llava-tgi + docker logs test-comps-lvm-tgi + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_lvms_video-llama.sh b/tests/test_lvms_video-llama.sh new file mode 100755 index 000000000..a9dcbf3a7 --- /dev/null +++ b/tests/test_lvms_video-llama.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/video-llama-lvm-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/server/docker/Dockerfile . + if $? ; then + echo "opea/video-llama-lvm-server built fail" + exit 1 + else + echo "opea/video-llama-lvm-server built successful" + fi + docker build --no-cache -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/video-llama/Dockerfile . + if $? ; then + echo "opea/lvm-video-llama built fail" + exit 1 + else + echo "opea/lvm-video-llama built successful" + fi + +} + +function start_service() { + cd $WORKPATH + unset http_proxy + export LVM_ENDPOINT=http://$ip_address:5030 + + docker run -d --name="test-comps-lvm-video-llama" -p 5030:9009 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e llm_download="True" \ + -v "/home/$USER/.cache:/home/user/.cache" \ + -v video-llama-model:/home/user/model \ + opea/video-llama-lvm-server:latest + + docker run -d --name="test-comps-lvm" -p 5031:9000 \ + --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e LVM_ENDPOINT=$LVM_ENDPOINT \ + opea/lvm-video-llama:latest + + echo "Waiting for the LVM service to start" + until docker logs test-comps-lvm 2>&1 | grep -q "Uvicorn running on"; do + sleep 5 + done + + echo "Waiting for the Video-Llama service to start, downloading model..." + until docker logs test-comps-lvm-video-llama 2>&1 | grep -q "Uvicorn running on"; do + sleep 5m + done +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:5031/v1/lvm -X POST -d '{"video_url":"silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' -H 'Content-Type: application/json') + if [[ $result == *"silence"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + if docker volume ls | grep -q video-llama-model; then docker volume rm video-llama-model; fi + +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_multimodal_embeddings_langchain_cpu.sh b/tests/test_multimodal_embeddings_langchain_cpu.sh new file mode 100644 index 000000000..77a7b6d99 --- /dev/null +++ b/tests/test_multimodal_embeddings_langchain_cpu.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +export your_mmei_port=8089 +export EMBEDDER_PORT=$your_mmei_port +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" +export your_embedding_port_microservice=6609 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +unset http_proxy + +function build_mmei_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/bridgetower-embedder built fail" + exit 1 + else + echo "opea/bridgetower-embedder built successful" + fi +} + +function build_embedding_service_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/embedding-multimodal built fail" + exit 1 + else + echo "opea/embedding-multimodal built successful" + fi +} + +function build_docker_images() { + build_mmei_docker_images + build_embedding_service_images +} + +function start_service() { + cd $WORKPATH + cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ + docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d + cd $WORKPATH + cd comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/ + docker compose -f docker_compose_multimodal_embedding.yaml up -d + sleep 2m +} +function validate_microservice_text_embedding() { + result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"text" : "This is some sample text."}') + + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs bridgetower-embedding-server + docker logs embedding-multimodal-server + exit 1 + fi +} + +function validate_microservice_image_text_pair_embedding() { + result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}') + + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs bridgetower-embedding-server + docker logs embedding-multimodal-server + exit 1 + fi +} + +function validate_microservice() { + validate_microservice_text_embedding + validate_microservice_image_text_pair_embedding +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=bridgetower-embedding-server" --filter "name=embedding-multimodal-server") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune +} + +main diff --git a/tests/test_multimodal_embeddings_langchain_hpu.sh b/tests/test_multimodal_embeddings_langchain_hpu.sh new file mode 100644 index 000000000..50c789c7d --- /dev/null +++ b/tests/test_multimodal_embeddings_langchain_hpu.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +export your_mmei_port=8089 +export EMBEDDER_PORT=$your_mmei_port +export MMEI_EMBEDDING_ENDPOINT="http://$ip_address:$your_mmei_port/v1/encode" +export your_embedding_port_microservice=6609 +export MM_EMBEDDING_PORT_MICROSERVICE=$your_embedding_port_microservice +unset http_proxy + +function build_mmei_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/bridgetower-embedder:latest --build-arg EMBEDDER_PORT=$EMBEDDER_PORT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/bridgetower/docker/Dockerfile_hpu . + + if [ $? -ne 0 ]; then + echo "opea/bridgetower-embedder built fail" + exit 1 + else + echo "opea/bridgetower-embedder built successful" + fi +} + +function build_embedding_service_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/embedding-multimodal:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/embedding-multimodal built fail" + exit 1 + else + echo "opea/embedding-multimodal built successful" + fi +} + +function build_docker_images() { + build_mmei_docker_images + build_embedding_service_images +} + +function start_service() { + cd $WORKPATH + cd comps/embeddings/multimodal_embeddings/bridgetower/docker/ + docker compose -f docker_compose_bridgetower_embedding_endpoint.yaml up -d + cd $WORKPATH + cd comps/embeddings/multimodal_embeddings/multimodal_langchain/docker/ + docker compose -f docker_compose_multimodal_embedding.yaml up -d + sleep 2m +} +function validate_microservice_text_embedding() { + result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"text" : "This is some sample text."}') + + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs bridgetower-embedding-server + docker logs embedding-multimodal-server + exit 1 + fi +} + +function validate_microservice_image_text_pair_embedding() { + result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}') + + if [[ $result == *"embedding"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs bridgetower-embedding-server + docker logs embedding-multimodal-server + exit 1 + fi +} + +function validate_microservice() { + validate_microservice_text_embedding + validate_microservice_image_text_pair_embedding +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=bridgetower-embedding-server" --filter "name=embedding-multimodal-server") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune +} + +main diff --git a/tests/test_nginx.sh b/tests/test_nginx.sh new file mode 100644 index 000000000..626c6974a --- /dev/null +++ b/tests/test_nginx.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/nginx:comps -f comps/nginx/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/nginx built fail" + exit 1 + else + echo "opea/nginx built successful" + fi +} + +function start_service() { + export NGINX_PORT=80 + + # Start Docker Containers + docker run -d --name test-comps-nginx-server -p 80:80 opea/nginx:comps + + sleep 5s +} + +function validate_service() { + NGINX_PORT=80 + URL="http://${ip_address}:${NGINX_PORT}/home" + DOCKER_NAME="test-comps-nginx-server" + SERVICE_NAME="nginx" + EXPECTED_RESULT="Welcome to nginx!" + + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-nginx*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + build_docker_images + start_service + + validate_service + + echo y | docker system prune + +} + +main diff --git a/tests/test_prompt_registry_mongo.sh b/tests/test_prompt_registry_mongo.sh index bdf5d907c..b5d976999 100644 --- a/tests/test_prompt_registry_mongo.sh +++ b/tests/test_prompt_registry_mongo.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -17,12 +17,18 @@ function build_docker_images() { echo $(pwd) docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest - docker build --no-cache -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile . + docker build --no-cache -t opea/promptregistry-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/promptregistry-mongo-server built fail" + exit 1 + else + echo "opea/promptregistry-mongo-server built successful" + fi } function start_service() { - docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -p 6013:6013 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest + docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:comps sleep 10s } @@ -40,6 +46,7 @@ function validate_microservice() { echo "Correct result." else echo "Incorrect result." + docker logs test-comps-promptregistry-mongo-server exit 1 fi diff --git a/tests/test_reranks_fastrag.sh b/tests/test_reranks_fastrag.sh index d423d19d5..7b0575523 100644 --- a/tests/test_reranks_fastrag.sh +++ b/tests/test_reranks_fastrag.sh @@ -2,30 +2,42 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/reranking-fastrag:comps -f comps/reranks/fastrag/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/reranking-fastrag built fail" + exit 1 + else + echo "opea/reranking-fastrag built successful" + fi } function start_service() { export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" - fastrag_service_port=8000 + fastrag_service_port=5020 unset http_proxy docker run -d --name="test-comps-reranking-fastrag-server" -p ${fastrag_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:comps sleep 3m } function validate_microservice() { - fastrag_service_port=8000 - http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\ + fastrag_service_port=5020 + result=$(http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\ -X POST \ -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - docker logs test-comps-reranking-fastrag-server + -H 'Content-Type: application/json') + if [[ $result == *"reranked_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-reranking-fastrag-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_reranks_langchain-mosec.sh b/tests/test_reranks_langchain-mosec.sh index 899db5122..d34957a4c 100644 --- a/tests/test_reranks_langchain-mosec.sh +++ b/tests/test_reranks_langchain-mosec.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,20 +10,32 @@ ip_address=$(hostname -I | awk '{print $1}') function build_mosec_docker_images() { cd $WORKPATH echo $(pwd) - docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t reranking-langchain-mosec:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . + docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec-endpoint:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/reranking-langchain-mosec-endpoint built fail" + exit 1 + else + echo "opea/reranking-langchain-mosec-endpoint built successful" + fi } function build_docker_images() { cd $WORKPATH echo $(pwd) docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec:comps -f comps/reranks/langchain-mosec/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/reranking-langchain-mosec built fail" + exit 1 + else + echo "opea/reranking-langchain-mosec built successful" + fi } function start_service() { mosec_endpoint=5006 model="BAAI/bge-reranker-large" unset http_proxy - docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 reranking-langchain-mosec:comps + docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000 opea/reranking-langchain-mosec-endpoint:comps export MOSEC_RERANKING_ENDPOINT="http://${ip_address}:${mosec_endpoint}" mosec_service_port=5007 docker run -d --name="test-comps-reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:comps @@ -32,12 +44,18 @@ function start_service() { function validate_microservice() { mosec_service_port=5007 - http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\ + result=$(http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\ -X POST \ -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' - docker logs test-comps-reranking-langchain-mosec-server - docker logs test-comps-reranking-langchain-mosec-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"Human"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-reranking-langchain-mosec-endpoint + docker logs test-comps-reranking-langchain-mosec-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_reranks_tei.sh b/tests/test_reranks_tei.sh index 0777e7e4d..0b146d81e 100644 --- a/tests/test_reranks_tei.sh +++ b/tests/test_reranks_tei.sh @@ -2,13 +2,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/reranking-tei:comps -f comps/reranks/tei/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/reranking-tei built fail" + exit 1 + else + echo "opea/reranking-tei built successful" + fi } function start_service() { @@ -34,7 +40,7 @@ function validate_microservice() { -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json') - if echo "$CONTENT" | grep -q "### Search results:"; then + if echo "$CONTENT" | grep -q "documents"; then echo "Content is as expected." else echo "Content does not match the expected result: $CONTENT" diff --git a/tests/test_reranks_video-rag-qna.sh b/tests/test_reranks_video-rag-qna.sh new file mode 100755 index 000000000..cf4d0c5c8 --- /dev/null +++ b/tests/test_reranks_video-rag-qna.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/reranking-videoragqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/video-rag-qna/docker/Dockerfile . +} + +function start_service() { + docker run -d --name "test-comps-reranking-videoragqna-server" \ + -p 5037:8000 \ + --ipc=host \ + -e no_proxy=${no_proxy} \ + -e http_proxy=${http_proxy} \ + -e https_proxy=${https_proxy} \ + -e CHUNK_DURATION=${CHUNK_DURATION} \ + -e FILE_SERVER_ENDPOINT=${FILE_SERVER_ENDPOINT} \ + opea/reranking-videoragqna:latest + + + until docker logs test-comps-reranking-videoragqna-server 2>&1 | grep -q "Uvicorn running on"; do + sleep 2 + done +} + +function validate_microservice() { + result=$(\ + http_proxy="" \ + curl -X 'POST' \ + "http://${ip_address}:5037/v1/reranking" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "retrieved_docs": [ + {"doc": [{"text": "this is the retrieved text"}]} + ], + "initial_query": "this is the query", + "top_n": 1, + "metadata": [ + {"other_key": "value", "video":"top_video_name", "timestamp":"20"}, + {"other_key": "value", "video":"second_video_name", "timestamp":"40"}, + {"other_key": "value", "video":"top_video_name", "timestamp":"20"} + ] + }') + if [[ $result == *"this is the query"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-reranking*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_retrievers_haystack_qdrant.sh b/tests/test_retrievers_haystack_qdrant.sh index b1f8a02e8..4fdfb13d6 100644 --- a/tests/test_retrievers_haystack_qdrant.sh +++ b/tests/test_retrievers_haystack_qdrant.sh @@ -2,13 +2,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/retriever-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-qdrant built fail" + exit 1 + else + echo "opea/retriever-qdrant built successful" + fi } function start_service() { @@ -39,12 +45,18 @@ function validate_microservice() { export PATH="${HOME}/miniforge3/bin:$PATH" source activate test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \ + result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \ -X POST \ -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json' - docker logs test-comps-retriever-qdrant-server - docker logs test-comps-retriever-tei-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-retriever-qdrant-server + docker logs test-comps-retriever-tei-endpoint + exit 1 + fi } function stop_docker() { diff --git a/tests/test_retrievers_langchain_pathway.sh b/tests/test_retrievers_langchain_pathway.sh new file mode 100644 index 000000000..4db471b56 --- /dev/null +++ b/tests/test_retrievers_langchain_pathway.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + + cd comps/vectorstores/langchain/pathway + + docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps . + + cd $WORKPATH + + docker build --no-cache -t opea/retriever-pathway:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pathway/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-pathway built fail" + exit 1 + else + echo "opea/retriever-pathway built successful" + fi +} + +function start_service() { + cd $WORKPATH + + # tei endpoint + tei_endpoint=5008 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-tei-endpoint" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model + + sleep 30s + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + result=$(http_proxy='' + curl $TEI_EMBEDDING_ENDPOINT -X POST -d '{"inputs":"Hey,"}' -H 'Content-Type: application/json') + + echo "embed_result:" + echo $result + + sleep 30s + + # pathway + export PATHWAY_HOST="0.0.0.0" + export PATHWAY_PORT=5432 + + docker run -d --name="test-comps-vectorstore-pathway" -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $WORKPATH/comps/vectorstores/langchain/pathway/README.md:/app/data/README.md -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:comps + + sleep 45s + + export PATHWAY_HOST=$ip_address # needed in order to reach to vector store + + docker run -d --name="test-comps-retriever-pathway" -p 5009:7000 -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/retriever-pathway:comps + + sleep 10s +} + +function validate_microservice() { + retriever_port=5009 + export PATH="${HOME}/miniforge3/bin:$PATH" + + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + result=$(http_proxy='' + curl http://${ip_address}:$retriever_port/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vectorstore-pathway >> ${LOG_PATH}/vectorstore-pathway.log + docker logs test-comps-retriever-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log + docker logs test-comps-retriever-pathway >> ${LOG_PATH}/retriever-pathway.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_retrievers_langchain_pgvector.sh b/tests/test_retrievers_langchain_pgvector.sh index 4c5b08963..41295eb5e 100755 --- a/tests/test_retrievers_langchain_pgvector.sh +++ b/tests/test_retrievers_langchain_pgvector.sh @@ -2,13 +2,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/retriever-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pgvector/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-pgvector built fail" + exit 1 + else + echo "opea/retriever-pgvector built successful" + fi } function start_service() { @@ -17,7 +23,7 @@ function start_service() { export POSTGRES_PASSWORD=testpwd export POSTGRES_DB=vectordb - docker run --name test-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 + docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16 sleep 10s # tei endpoint @@ -28,21 +34,27 @@ function start_service() { export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" # pgvector retriever - docker run -d --name="test-retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:comps + docker run -d --name="test-retriever-pgvector" -p 5003:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:comps sleep 3m } function validate_microservice() { - retriever_port=7000 + retriever_port=5003 test_embedding="[0.3212316218862614, 0.05284697028105079, 0.792736615029739, -0.01450667589035648, -0.7358454555705813, -0.5159104761926909, 0.3535153166047822, -0.6465310827905328, -0.3260418169245214, 0.5427377177268364, 0.839674125021304, 0.27459120894125255, -0.9833857616143291, 0.4763752586395751, 0.7048355150785723, 0.4935209825796325, -0.09655411499027178, -0.5739389241976944, 0.34450497876796815, -0.03401327136919208, -0.8247080270670755, -0.9430721851019634, 0.4702688485035773, 0.3872526674852217, -0.13436894777006136, 0.27166203983338266, 0.7724679346611174, 0.49524109590526666, 0.9810730976435518, 0.2143402533230332, 0.35235793217357947, -0.3199320624935764, -0.3535996110405917, 0.1982603781951089, -0.37547349902996063, -0.6148649695355071, 0.388521078627599, 0.7073360849235228, 0.1768845283243352, -0.38289339223361885, 0.36390326284734775, -0.4790146416310761, -0.5412301982310956, 0.33793186533237507, -0.7028178009236765, -0.6850965350085609, -0.519584428926227, 0.07610032557230206, 0.8173990245819258, 0.6620078274633294, 0.9159029345791101, -0.6353085978752564, 0.5816911666251467, -0.03007583916355916, 0.7405029634324471, 0.43720248036100817, -0.8588961125219283, -0.5267610831146254, 0.17242810571201828, -0.5958637989986995, -0.9424146892733949, 0.593549429279222, -0.6516554787902789, -0.5666971591678356, -0.942676397097636, -0.7754876202156127, 0.4981071621118629, 0.3479716647812874, -0.20905562164787628, -0.01239748867059931, -0.39282697259470645, -0.682776727276128, 0.8490471472078613, 0.9407846472878745, 0.38429459825058054, -0.6217288222979798, 0.7017039943902317, 0.2666859825508645, -0.8350624589077213, -0.6844099142855995, 0.7150220289787632, 0.6172753342426756, 0.3411977212235433, -0.6885106120374, -0.9063819220399785, -0.8409372842391187, -0.8297926800281972, -0.7209991962325382, -0.10750064217958677, 0.3293914797165298, -0.7839812511866298, 0.3413595850264284, 0.9251256529601857, -0.7129635996889019, 0.2032168270911272, -0.744174955251268, 0.7691350055313244, -0.20065548721684312, 0.8869269473893813, -0.02043469943990095, 0.6747773545635596, -0.08840723444251264, 0.29835753335664084, -0.06410433319206965, 0.6915278973312651, 0.35470936730145075, -0.8143883316077478, 0.3700125242841532, 0.21752822647915626, -0.8620510146349405, -0.9872766671960136, -0.4418160577207253, -0.22054594310628928, -0.12301077500821433, -0.32532691454130314, -0.13151154223491113, -0.11476973253362455, -0.6347877217496254, -0.7764229239974911, 0.8494414471799672, -0.8096141861298036, -0.126108099532108, -0.3910538453811505, 0.7416491690145808, -0.9147820237179922, -0.09053536925720418, 0.6536341825563443, 0.655602583013402, 0.1757558598054938, -0.2501459855449637, 0.23414048418314914, -0.2944157385030681, 0.9386472406881659, -0.18806566910431344, -0.29109490690006345, -0.06582041104197667, -0.24458043176038613, 0.22893907834264082, -0.6322528508563678, -0.7885667746432836, 0.10383516801892911, 0.25661930212021256, 0.48395546864077654, 0.25074187080653787, 0.7878158493705165, 0.23874513474134984, -0.18963037155323526, 0.6768315857746809, 0.5323731821887652, 0.23324330999046516, -0.738289178845237, 0.8231931441360549, -0.5243106029457096, 0.21804967641989204, 0.3707592922049536, 0.1970890658467559, 0.6290401053696923, -0.6193312718716564, 0.4319818453521995, -0.4373242547587233, -0.20412719166280646, -0.868724458613944, -0.9426457085574942, 0.7688331784589177, 0.8429476319014946, -0.6928872166553237, -0.3089062124196522, -0.4951601658025162, -0.20786350848417157, -0.1834098357401246, 0.6258630377921288, -0.25204085881527294, -0.6433661815891194, 0.24194250996512046, 0.7945180851525879, 0.6730215739979015, 0.45995755232419877, 0.27685945410814927, 0.7529674957244883, -0.4439881981193141, 0.38722277085649703, 0.4225851985441007, 0.5151867308566294, 0.8592936274009735, -0.5577167356519221, -0.22541015002223674, 0.7872403040580904, -0.12895843621078895, 0.5887160803674254, -0.6121486933005933, -0.45190497189987, 0.5882515994898736, -0.20915972333667443, 0.6412544240387859, -0.9812292190679823, 0.23598351448404986, -0.01874477123769469, -0.5571884049798792, -0.21717058226127106, -0.8566428604555374, -0.7698283820683764, -0.7788953845967042, -0.9695043602118194, 0.2531642774513472, 0.24476771264255004, 0.799177428779027, 0.15892099361251932, 0.2675472976400166, 0.7977537791258142, 0.5682082238828539, -0.45861936031507833, 0.976812562932188, 0.7074171102968665, -0.255345769250928, -0.8903371790301657, 0.7704811965386686, 0.7499406836491052, 0.015867022798163433, 0.023343856172087563, -0.8985882333056163, 0.967943518200411, 0.6738003473613683, 0.500027753964835, -0.25086930359627546, 0.8192342987623937, -0.5553572601867272, -0.5869387659256808, 0.8105241617485164, 0.26722188191476604, -0.3958252448602495, -0.5045071968072412, -0.28738102025143886, 0.9466985876572256, 0.7491954841518662, -0.05398806963889902, 0.5602374066760636, -0.7105267600964871, 0.9183176656578995, -0.7484524873628995, -0.9707740622635459, -0.835248467210193, -0.6698976002755301, -0.9157167347077453, 0.8385470752014215, -0.8484323571440642, 0.1488482374866753, 0.3535389435893035, 0.40201643606217297, -0.39307181109310174, -0.651228451786785, 0.9707155460374848, 0.7578035730666239, -0.916880505891617, 0.7976566483403702, 0.4769359186496589, -0.9056872532891009, 0.5018227509242583, 0.06634988131602104, -0.38876676686204537, -0.20473802582321277, 0.5980365889203325, -0.34935300908506206, 0.5873905336860825, -0.8339160527604776, 0.2903116937984762, -0.9254374424169307, 0.6580958452134436, 0.15246698154103022, -0.6646130474515959, 0.8207084174685697, 0.06879769054023499, 0.6856796611464853, 0.7434402148947985, -0.07417300955086725, -0.37981881059511857, 0.7945700979382095, 0.9465476443316254, 0.7045891367557522, -0.21374560717812052, 0.09707043886320443, 0.40542472035097754, -0.21295063208183063, -0.3638798039778244, 0.27259830494730597, -0.9679565648433712, 0.574009198040323, 0.5453104171463734, 0.4226578254247848, 0.8135241112071945, -0.9913587704531821, -0.5117490950168377, 0.31240764840477486, 0.05726091394767008, -0.44352035546239654, 0.973651830312322, -0.30089019754641044, -0.38110683211990515, 0.12746451891554633, -0.44142668003974683, -0.6085743100333996, 0.6897705314589502, 0.9941017194163115, 0.22931154106427631, -0.38393397164902865, -0.487276417971108, 0.9823011016539693, -0.525188403356583, 0.20472304461076174, -0.549309125745228, 0.8391439613819196, -0.29947371410247614, -0.9587993477785177, 0.49169643064876745, -0.8450431739492874, 0.4992908092405386, 0.8214166011949593, 0.3514461197612715, 0.7052749449063302, -0.456428137096097, -0.21613329759075817, -0.4240696515484821, -0.6072280877366947, -0.19019911975234938, 0.03207563995916485, 0.7832264288656379, -0.9848532944591397, 0.2814057130788894, 0.860398099217986, -0.5757789213121853, -0.6403226820347003, 0.6276892831123779, 0.6966115314942829, -0.5964071917752842, 0.44624318175630373, 0.7747997483259705, -0.5274892594576506, -0.00345488047657061, 0.39694784159551255, -0.32018146543784254, 0.7503113292041483, 0.2279567107684024, -0.6993797573511833, 0.07551046336599065, 0.34912828888955083, 0.4590408940147299, 0.25454507513086266, -0.30882522463970363, -0.4080889783776509, -0.3123706885833979, -0.8906352519220135, -0.8139972234039548, -0.08828963608894047, 0.14503312886836617, -0.3714118896544083, 0.3827783378301277, 0.5438460044018558, 0.5097760438462526, 0.15715247575456592, 0.7656929283612122, 0.2920396353744734, 0.2373440190759446, 0.9526910643357105, 0.1250822784239567, 0.8541819063485603, -0.12747895073713877, 0.5735382473541981, -0.5032516001742902, 0.7413632640531032, -0.7276977107465363, 0.843580565716205, 0.7018464054348241, 0.5586022744519274, 0.8087171435922904, -0.21245941454116735, -0.948838383837346, -0.33122336674310726, -0.6044852681843789, 0.9537863293189539, 0.2536799406315282, -0.6165803849255769, 0.7101896753682724, -0.7295247078012181, -0.7614076971639918, -0.26355996174665797, 0.2821572530049805, -0.31435759840484767, 0.4606279529588946, -0.6454718015595133, 0.29204230021467015, -0.9773214517280517, 0.9018006022750058, 0.41864735598581615, -0.6362219585524242, 0.6393270283675747, 0.8775458814947836, -0.8151570635893794, 0.3439568607968999, 0.29709851503999474, -0.757078876496533, 0.5012539900859203, 0.9894088580102554, -0.7830638861580885, -0.2991021462567893, 0.106227593453466, 0.475717480159388, -0.8190837445165258, 0.7235860704831878, 0.7463245164230621, -0.5005231847044065, 0.6040314499611552, 0.6735380082955229, -0.5547291176872893, -0.9090102518914822, 0.13079236830880614, 0.30122136258272514, -0.6417236467561747, 0.2630310905704383, -0.37163926901056077, 0.20821525595060142, 0.058213575984825905, -0.7186424501121726, 0.7186917038077467, 0.20368227867764155, 0.7957158871869667, -0.8553769107478018, 0.8475526085456688, -0.929286319233819, -0.4084410910607217, -0.18451194893213185, -0.2629665470348457, 0.36380699955097695, 0.2762298083541519, 0.8264334555626198, -0.022207373606218495, -0.32224911623004626, -0.18947254078026798, 0.33627343422225175, 0.6906306880901341, -0.5248865356053838, -0.8976978225060646, -0.9198989266658277, -0.9045058048590318, -0.43074279628622225, 0.9599523380525761, 0.16694571818827875, 0.08638717900194992, 0.24369341180939874, -0.29293980835779454, 0.13980998987643733, -0.9103052978285509, 0.9109674748745353, -0.6189652187256851, -0.30507868365416413, -0.4232217216255978, 0.34784431052206877, -0.8235167119697908, 0.1565512568825982, -0.11476153735499195, -0.5476852944817927, -0.9695366885614041, 0.31387227761880165, -0.8460727492314095, 0.5313339961520958, 0.5605009436841186, 0.04504755045556719, -0.10937916620725119, -0.40867992424849797, -0.9148814576758182, 0.41260731002228, 0.6535850987782705, -0.3956136730481463, 0.03633719317271722, -0.26520169024611917, -0.39307279913859916, 0.8389708129910836, -0.10965192030153337, -0.8114479506343715, 0.6624055258346568, -0.12364857684372677, -0.3391386034226034, 0.5064344415363975, 0.4222558794792024, -0.8920802019539475, 0.8403881748708741, -0.5144930020007417, -0.3961429483392995, -0.9112376538340263, 0.5369991550001529, 0.4099994212177125, 0.8971702224538953, -0.07250674251100442, -0.4123232887614461, -0.4122138364547645, 0.30115503935936516, 0.9140832812087094, -0.37996517983025035, 0.45766194212423583, 0.8778668278803266, -0.871373882496363, 0.9061603981794313, -0.4815792838295849, -0.3540250825062252, 0.47058280496548677, 0.6353307464139133, -0.9084299203157564, 0.32569503818833767, -0.5917177728092791, 0.017982667746413883, -0.39657854384311597, 0.30240291420731147, -0.8789617636583977, 0.398601970442066, -0.9537566407528597, -0.7326801366509474, 0.6394091009367926, -0.24018952260048332, -0.4410443985541457, -0.715250103875068, -0.9531170489995859, 0.8907413230296786, -0.6270483513933209, -0.1278281545077713, 0.6205668124687644, -0.5880492136441298, 0.8458960227498347, 0.5156432304509859, -0.41522707199863196, -0.9971627462302537, 0.967570980171752, -0.1258013547750596, -0.3920054384667395, -0.7579953976551077, -0.5047276085442098, -0.742917134758996, 0.307776046578512, 0.33240724082891204, -0.12439712701067074, 0.8297068611891512, 0.9092972699438713, -0.5553533790744807, -0.9327632085647035, 0.4797798607215402, -0.6407284323825371, 0.23503537288803233, 0.7356444783186646, 0.550461677629142, -0.8859356421536595, -0.06157466053719496, 0.2628024780598055, -0.14515603184459613, -0.9382781600128365, -0.9076306357777459, -0.5661586668239169, -0.5778188698610502, -0.343591139945177, -0.9957519288956789, 3.652203366399931e-05, -0.2850434941249338, 0.9450784913510459, -0.7344049612004591, 0.3966551077940945, 0.9820403785569927, 0.7132254472780228, 0.04475455308790677, 0.7149662286904288, 0.30640286803677386, -0.11825818002978239, 0.9475071024012094, -0.4020573255284672, -0.25210492474829316, -0.9864930649895771, -0.3662338670933165, 0.6528806547589174, 0.23157758222346203, -0.5707934304014186, -0.12462852967839688, 0.1912875382350001, 0.9111205883142817, -0.7227638014501978, -0.36537014763125186, -0.37380198030841805, 0.4707867786085871, -0.5824192322860218, -0.47547092650542666, 0.7836345381645189, 0.7843678847969751, 0.6754328587362883, -0.6670404362153401, 0.7372872996570987, -0.8333262364813818, -0.41971949504499273, -0.7600660277081586, 0.22809249636551576, -0.8923092554006928, -0.28910705230462663, 0.17556387278264474, -0.3120642961908995, -0.08857040909612457, 0.9736924099705169, -0.6425732085916924, 0.5667862783362607, -0.45242262118684295, -0.3366537122702131, -0.21042580668493605, -0.969230642055972, -0.6986186588663355, -0.5420629464988849, 0.8012632695329027, 0.10364503122371205, -0.8288649738571241, -0.7488901002163446, -0.2086447971105505, 0.24528530567671103, -0.1194706644737491, -0.4487125509839567, 0.19757079065420702, 0.9701391397770309, 0.6918580324259651, -0.6609864495230626, -0.5767397650124655, 0.13274852903677803, 0.45790899492650117, 0.6156249211932037, -0.5400854790245104, -0.4871335994554471, -0.37124459518957686, -0.9740961061020355, 0.8132186161153883, 0.5432742278375737, -0.7555629992450097, -0.3626273029276168, 0.3273351801156006, 0.2950481130490956, 0.5899713501222568, 0.1290258276325824, 0.14809153246329188, -0.8527458869128903, -0.45135237009997664, -0.78966354981686, -0.9869505409499153, 0.5440922045096472, -0.5065478252374527, 0.8914118613097968, -0.7009799840752231, -0.37720301784400667, -0.1990418958793818, 0.07895118490326825, 0.43246496862820827, 0.06871630683294172, 0.04584623777009278, -0.34229499350310455, 0.9387219959330184, -0.5381844165951264, 0.4794422861285379, 0.8534951958829573, 0.5734335942167272, -0.85412829706822, -0.7352963908032732, -0.12895000820916747, -0.22552570725823173, -0.5976878733463429, -0.32791035485443487, 0.7202059113861725, 0.39099290295132905, 0.30525825694263764, -0.2266469266742548, -0.03379388729241706, -0.5954645444941691, -0.02422270847921526, 0.2367051711225363, 0.0254309367030352, -0.8571941247598263, 0.6036464885617703, 0.780145197998714, -0.18486284139078912, -0.4861368589284454, -0.2789831003703762, -0.695370188724934, 0.20748300875047643, 0.613995882433769, -0.20040817194169125, 0.8373240273873666, 0.6138944053316708, -0.7863205352137852, -0.7823411702718377, 0.79906295867358, -0.5467331800231525, -0.6344655458958364, -0.9818941753091346, 0.5525644258030062, 0.6262889073747209, 0.9963129049354384, -0.6272737000603017, -0.2716262931036606, 0.2096677033434846, -0.6982262682600213, -0.5674210473085657, 0.24902399542030595, -0.5657568018493333, 0.08618618872017958, 0.5489764282591345, -0.8941510222698827, 0.41351613826944567, -0.5112980841262675, 0.4470615015729351, -0.20725162805621333, -0.08479642143543553, -0.1278591923549064, -0.4999896814124227, 0.9888904679503661, -0.048462424602504495, -0.7019088972627803, 0.24200967459107448, -0.07080934919496995, -0.7205222066189325, 0.8569714457890816, -0.16535406501060956, -0.6995151061411666, -0.002471197183836038, 0.36657456718336245, -0.21418945415378254, 0.8960422717208372, -0.8112144998402944, 0.3367368342692487, -0.1409734233274329, 0.9270438056838188, 0.6449085435355675, -0.42063510394970094, -0.5514753035609532, -0.7824719546926855, 0.27064161179409774, 0.7610801292513893, 0.041332375564573365, -0.4938906089444197, 0.6565606828711339, -0.8175201877660032, -0.7145428710506601, 0.5266689558422335, -0.36373337569732045, -0.4295940430516798, 0.6614123405581125, -0.5795867768963181, 0.09683447902632913, -0.7233160622088481, -0.035259383881968365, 0.44407987368431834, 0.5080824859277744, -0.025605597564321236, -0.33746311986945, 0.8643101724003239, -0.6590382567793307, 0.11251953056040387, -0.5283365207737802, 0.8881578952123139, -0.9796498715072419, -0.8206325632112821, -0.5431772730915239, -0.09628735573638458, 0.8509192593020449, 0.6468967965920123, -0.5886852895684587, -0.25974684548008664, 0.4474352123365879, -0.2199845691372495, 0.7554317108927318, 0.9809450136647395, -0.9430090133566618, 0.23635288316941683]" - http_proxy='' + result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \ -X POST \ -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json' - docker logs test-vectorstore-postgres - docker logs test-comps-retriever-tei-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vectorstore-postgres + docker logs test-comps-retriever-tei-endpoint + exit 1 + fi } function stop_docker() { @@ -51,7 +63,7 @@ function stop_docker() { docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s fi - cid_redis=$(docker ps -aq --filter "name=test-vectorstore-postgres") + cid_redis=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres") if [[ ! -z "$cid_redis" ]]; then docker stop $cid_redis && docker rm $cid_redis && sleep 1s fi diff --git a/tests/test_retrievers_langchain_pinecone.sh b/tests/test_retrievers_langchain_pinecone.sh new file mode 100755 index 000000000..d370fa92a --- /dev/null +++ b/tests/test_retrievers_langchain_pinecone.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/retriever-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-pinecone built fail" + exit 1 + else + echo "opea/retriever-pinecone built successful" + fi +} + +function start_service() { + + # tei endpoint + tei_endpoint=5008 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-tei-endpoint" -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model + sleep 30s + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + # pinecone retriever + export PINECONE_API_KEY=$PINECONE_KEY + export PINECONE_INDEX_NAME="langchain-test" + export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN + retriever_port=5009 + unset http_proxy + docker run -d --name="test-comps-retriever-pinecone-server" -p ${retriever_port}:7000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e INDEX_NAME=$PINECONE_INDEX_NAME opea/retriever-pinecone:comps + + sleep 2m +} + +function validate_microservice() { + retriever_port=5009 + test_embedding="[0.3212316218862614, 0.05284697028105079, 0.792736615029739, -0.01450667589035648, -0.7358454555705813, -0.5159104761926909, 0.3535153166047822, -0.6465310827905328, -0.3260418169245214, 0.5427377177268364, 0.839674125021304, 0.27459120894125255, -0.9833857616143291, 0.4763752586395751, 0.7048355150785723, 0.4935209825796325, -0.09655411499027178, -0.5739389241976944, 0.34450497876796815, -0.03401327136919208, -0.8247080270670755, -0.9430721851019634, 0.4702688485035773, 0.3872526674852217, -0.13436894777006136, 0.27166203983338266, 0.7724679346611174, 0.49524109590526666, 0.9810730976435518, 0.2143402533230332, 0.35235793217357947, -0.3199320624935764, -0.3535996110405917, 0.1982603781951089, -0.37547349902996063, -0.6148649695355071, 0.388521078627599, 0.7073360849235228, 0.1768845283243352, -0.38289339223361885, 0.36390326284734775, -0.4790146416310761, -0.5412301982310956, 0.33793186533237507, -0.7028178009236765, -0.6850965350085609, -0.519584428926227, 0.07610032557230206, 0.8173990245819258, 0.6620078274633294, 0.9159029345791101, -0.6353085978752564, 0.5816911666251467, -0.03007583916355916, 0.7405029634324471, 0.43720248036100817, -0.8588961125219283, -0.5267610831146254, 0.17242810571201828, -0.5958637989986995, -0.9424146892733949, 0.593549429279222, -0.6516554787902789, -0.5666971591678356, -0.942676397097636, -0.7754876202156127, 0.4981071621118629, 0.3479716647812874, -0.20905562164787628, -0.01239748867059931, -0.39282697259470645, -0.682776727276128, 0.8490471472078613, 0.9407846472878745, 0.38429459825058054, -0.6217288222979798, 0.7017039943902317, 0.2666859825508645, -0.8350624589077213, -0.6844099142855995, 0.7150220289787632, 0.6172753342426756, 0.3411977212235433, -0.6885106120374, -0.9063819220399785, -0.8409372842391187, -0.8297926800281972, -0.7209991962325382, -0.10750064217958677, 0.3293914797165298, -0.7839812511866298, 0.3413595850264284, 0.9251256529601857, -0.7129635996889019, 0.2032168270911272, -0.744174955251268, 0.7691350055313244, -0.20065548721684312, 0.8869269473893813, -0.02043469943990095, 0.6747773545635596, -0.08840723444251264, 0.29835753335664084, -0.06410433319206965, 0.6915278973312651, 0.35470936730145075, -0.8143883316077478, 0.3700125242841532, 0.21752822647915626, -0.8620510146349405, -0.9872766671960136, -0.4418160577207253, -0.22054594310628928, -0.12301077500821433, -0.32532691454130314, -0.13151154223491113, -0.11476973253362455, -0.6347877217496254, -0.7764229239974911, 0.8494414471799672, -0.8096141861298036, -0.126108099532108, -0.3910538453811505, 0.7416491690145808, -0.9147820237179922, -0.09053536925720418, 0.6536341825563443, 0.655602583013402, 0.1757558598054938, -0.2501459855449637, 0.23414048418314914, -0.2944157385030681, 0.9386472406881659, -0.18806566910431344, -0.29109490690006345, -0.06582041104197667, -0.24458043176038613, 0.22893907834264082, -0.6322528508563678, -0.7885667746432836, 0.10383516801892911, 0.25661930212021256, 0.48395546864077654, 0.25074187080653787, 0.7878158493705165, 0.23874513474134984, -0.18963037155323526, 0.6768315857746809, 0.5323731821887652, 0.23324330999046516, -0.738289178845237, 0.8231931441360549, -0.5243106029457096, 0.21804967641989204, 0.3707592922049536, 0.1970890658467559, 0.6290401053696923, -0.6193312718716564, 0.4319818453521995, -0.4373242547587233, -0.20412719166280646, -0.868724458613944, -0.9426457085574942, 0.7688331784589177, 0.8429476319014946, -0.6928872166553237, -0.3089062124196522, -0.4951601658025162, -0.20786350848417157, -0.1834098357401246, 0.6258630377921288, -0.25204085881527294, -0.6433661815891194, 0.24194250996512046, 0.7945180851525879, 0.6730215739979015, 0.45995755232419877, 0.27685945410814927, 0.7529674957244883, -0.4439881981193141, 0.38722277085649703, 0.4225851985441007, 0.5151867308566294, 0.8592936274009735, -0.5577167356519221, -0.22541015002223674, 0.7872403040580904, -0.12895843621078895, 0.5887160803674254, -0.6121486933005933, -0.45190497189987, 0.5882515994898736, -0.20915972333667443, 0.6412544240387859, -0.9812292190679823, 0.23598351448404986, -0.01874477123769469, -0.5571884049798792, -0.21717058226127106, -0.8566428604555374, -0.7698283820683764, -0.7788953845967042, -0.9695043602118194, 0.2531642774513472, 0.24476771264255004, 0.799177428779027, 0.15892099361251932, 0.2675472976400166, 0.7977537791258142, 0.5682082238828539, -0.45861936031507833, 0.976812562932188, 0.7074171102968665, -0.255345769250928, -0.8903371790301657, 0.7704811965386686, 0.7499406836491052, 0.015867022798163433, 0.023343856172087563, -0.8985882333056163, 0.967943518200411, 0.6738003473613683, 0.500027753964835, -0.25086930359627546, 0.8192342987623937, -0.5553572601867272, -0.5869387659256808, 0.8105241617485164, 0.26722188191476604, -0.3958252448602495, -0.5045071968072412, -0.28738102025143886, 0.9466985876572256, 0.7491954841518662, -0.05398806963889902, 0.5602374066760636, -0.7105267600964871, 0.9183176656578995, -0.7484524873628995, -0.9707740622635459, -0.835248467210193, -0.6698976002755301, -0.9157167347077453, 0.8385470752014215, -0.8484323571440642, 0.1488482374866753, 0.3535389435893035, 0.40201643606217297, -0.39307181109310174, -0.651228451786785, 0.9707155460374848, 0.7578035730666239, -0.916880505891617, 0.7976566483403702, 0.4769359186496589, -0.9056872532891009, 0.5018227509242583, 0.06634988131602104, -0.38876676686204537, -0.20473802582321277, 0.5980365889203325, -0.34935300908506206, 0.5873905336860825, -0.8339160527604776, 0.2903116937984762, -0.9254374424169307, 0.6580958452134436, 0.15246698154103022, -0.6646130474515959, 0.8207084174685697, 0.06879769054023499, 0.6856796611464853, 0.7434402148947985, -0.07417300955086725, -0.37981881059511857, 0.7945700979382095, 0.9465476443316254, 0.7045891367557522, -0.21374560717812052, 0.09707043886320443, 0.40542472035097754, -0.21295063208183063, -0.3638798039778244, 0.27259830494730597, -0.9679565648433712, 0.574009198040323, 0.5453104171463734, 0.4226578254247848, 0.8135241112071945, -0.9913587704531821, -0.5117490950168377, 0.31240764840477486, 0.05726091394767008, -0.44352035546239654, 0.973651830312322, -0.30089019754641044, -0.38110683211990515, 0.12746451891554633, -0.44142668003974683, -0.6085743100333996, 0.6897705314589502, 0.9941017194163115, 0.22931154106427631, -0.38393397164902865, -0.487276417971108, 0.9823011016539693, -0.525188403356583, 0.20472304461076174, -0.549309125745228, 0.8391439613819196, -0.29947371410247614, -0.9587993477785177, 0.49169643064876745, -0.8450431739492874, 0.4992908092405386, 0.8214166011949593, 0.3514461197612715, 0.7052749449063302, -0.456428137096097, -0.21613329759075817, -0.4240696515484821, -0.6072280877366947, -0.19019911975234938, 0.03207563995916485, 0.7832264288656379, -0.9848532944591397, 0.2814057130788894, 0.860398099217986, -0.5757789213121853, -0.6403226820347003, 0.6276892831123779, 0.6966115314942829, -0.5964071917752842, 0.44624318175630373, 0.7747997483259705, -0.5274892594576506, -0.00345488047657061, 0.39694784159551255, -0.32018146543784254, 0.7503113292041483, 0.2279567107684024, -0.6993797573511833, 0.07551046336599065, 0.34912828888955083, 0.4590408940147299, 0.25454507513086266, -0.30882522463970363, -0.4080889783776509, -0.3123706885833979, -0.8906352519220135, -0.8139972234039548, -0.08828963608894047, 0.14503312886836617, -0.3714118896544083, 0.3827783378301277, 0.5438460044018558, 0.5097760438462526, 0.15715247575456592, 0.7656929283612122, 0.2920396353744734, 0.2373440190759446, 0.9526910643357105, 0.1250822784239567, 0.8541819063485603, -0.12747895073713877, 0.5735382473541981, -0.5032516001742902, 0.7413632640531032, -0.7276977107465363, 0.843580565716205, 0.7018464054348241, 0.5586022744519274, 0.8087171435922904, -0.21245941454116735, -0.948838383837346, -0.33122336674310726, -0.6044852681843789, 0.9537863293189539, 0.2536799406315282, -0.6165803849255769, 0.7101896753682724, -0.7295247078012181, -0.7614076971639918, -0.26355996174665797, 0.2821572530049805, -0.31435759840484767, 0.4606279529588946, -0.6454718015595133, 0.29204230021467015, -0.9773214517280517, 0.9018006022750058, 0.41864735598581615, -0.6362219585524242, 0.6393270283675747, 0.8775458814947836, -0.8151570635893794, 0.3439568607968999, 0.29709851503999474, -0.757078876496533, 0.5012539900859203, 0.9894088580102554, -0.7830638861580885, -0.2991021462567893, 0.106227593453466, 0.475717480159388, -0.8190837445165258, 0.7235860704831878, 0.7463245164230621, -0.5005231847044065, 0.6040314499611552, 0.6735380082955229, -0.5547291176872893, -0.9090102518914822, 0.13079236830880614, 0.30122136258272514, -0.6417236467561747, 0.2630310905704383, -0.37163926901056077, 0.20821525595060142, 0.058213575984825905, -0.7186424501121726, 0.7186917038077467, 0.20368227867764155, 0.7957158871869667, -0.8553769107478018, 0.8475526085456688, -0.929286319233819, -0.4084410910607217, -0.18451194893213185, -0.2629665470348457, 0.36380699955097695, 0.2762298083541519, 0.8264334555626198, -0.022207373606218495, -0.32224911623004626, -0.18947254078026798, 0.33627343422225175, 0.6906306880901341, -0.5248865356053838, -0.8976978225060646, -0.9198989266658277, -0.9045058048590318, -0.43074279628622225, 0.9599523380525761, 0.16694571818827875, 0.08638717900194992, 0.24369341180939874, -0.29293980835779454, 0.13980998987643733, -0.9103052978285509, 0.9109674748745353, -0.6189652187256851, -0.30507868365416413, -0.4232217216255978, 0.34784431052206877, -0.8235167119697908, 0.1565512568825982, -0.11476153735499195, -0.5476852944817927, -0.9695366885614041, 0.31387227761880165, -0.8460727492314095, 0.5313339961520958, 0.5605009436841186, 0.04504755045556719, -0.10937916620725119, -0.40867992424849797, -0.9148814576758182, 0.41260731002228, 0.6535850987782705, -0.3956136730481463, 0.03633719317271722, -0.26520169024611917, -0.39307279913859916, 0.8389708129910836, -0.10965192030153337, -0.8114479506343715, 0.6624055258346568, -0.12364857684372677, -0.3391386034226034, 0.5064344415363975, 0.4222558794792024, -0.8920802019539475, 0.8403881748708741, -0.5144930020007417, -0.3961429483392995, -0.9112376538340263, 0.5369991550001529, 0.4099994212177125, 0.8971702224538953, -0.07250674251100442, -0.4123232887614461, -0.4122138364547645, 0.30115503935936516, 0.9140832812087094, -0.37996517983025035, 0.45766194212423583, 0.8778668278803266, -0.871373882496363, 0.9061603981794313, -0.4815792838295849, -0.3540250825062252, 0.47058280496548677, 0.6353307464139133, -0.9084299203157564, 0.32569503818833767, -0.5917177728092791, 0.017982667746413883, -0.39657854384311597, 0.30240291420731147, -0.8789617636583977, 0.398601970442066, -0.9537566407528597, -0.7326801366509474, 0.6394091009367926, -0.24018952260048332, -0.4410443985541457, -0.715250103875068, -0.9531170489995859, 0.8907413230296786, -0.6270483513933209, -0.1278281545077713, 0.6205668124687644, -0.5880492136441298, 0.8458960227498347, 0.5156432304509859, -0.41522707199863196, -0.9971627462302537, 0.967570980171752, -0.1258013547750596, -0.3920054384667395, -0.7579953976551077, -0.5047276085442098, -0.742917134758996, 0.307776046578512, 0.33240724082891204, -0.12439712701067074, 0.8297068611891512, 0.9092972699438713, -0.5553533790744807, -0.9327632085647035, 0.4797798607215402, -0.6407284323825371, 0.23503537288803233, 0.7356444783186646, 0.550461677629142, -0.8859356421536595, -0.06157466053719496, 0.2628024780598055, -0.14515603184459613, -0.9382781600128365, -0.9076306357777459, -0.5661586668239169, -0.5778188698610502, -0.343591139945177, -0.9957519288956789, 3.652203366399931e-05, -0.2850434941249338, 0.9450784913510459, -0.7344049612004591, 0.3966551077940945, 0.9820403785569927, 0.7132254472780228, 0.04475455308790677, 0.7149662286904288, 0.30640286803677386, -0.11825818002978239, 0.9475071024012094, -0.4020573255284672, -0.25210492474829316, -0.9864930649895771, -0.3662338670933165, 0.6528806547589174, 0.23157758222346203, -0.5707934304014186, -0.12462852967839688, 0.1912875382350001, 0.9111205883142817, -0.7227638014501978, -0.36537014763125186, -0.37380198030841805, 0.4707867786085871, -0.5824192322860218, -0.47547092650542666, 0.7836345381645189, 0.7843678847969751, 0.6754328587362883, -0.6670404362153401, 0.7372872996570987, -0.8333262364813818, -0.41971949504499273, -0.7600660277081586, 0.22809249636551576, -0.8923092554006928, -0.28910705230462663, 0.17556387278264474, -0.3120642961908995, -0.08857040909612457, 0.9736924099705169, -0.6425732085916924, 0.5667862783362607, -0.45242262118684295, -0.3366537122702131, -0.21042580668493605, -0.969230642055972, -0.6986186588663355, -0.5420629464988849, 0.8012632695329027, 0.10364503122371205, -0.8288649738571241, -0.7488901002163446, -0.2086447971105505, 0.24528530567671103, -0.1194706644737491, -0.4487125509839567, 0.19757079065420702, 0.9701391397770309, 0.6918580324259651, -0.6609864495230626, -0.5767397650124655, 0.13274852903677803, 0.45790899492650117, 0.6156249211932037, -0.5400854790245104, -0.4871335994554471, -0.37124459518957686, -0.9740961061020355, 0.8132186161153883, 0.5432742278375737, -0.7555629992450097, -0.3626273029276168, 0.3273351801156006, 0.2950481130490956, 0.5899713501222568, 0.1290258276325824, 0.14809153246329188, -0.8527458869128903, -0.45135237009997664, -0.78966354981686, -0.9869505409499153, 0.5440922045096472, -0.5065478252374527, 0.8914118613097968, -0.7009799840752231, -0.37720301784400667, -0.1990418958793818, 0.07895118490326825, 0.43246496862820827, 0.06871630683294172, 0.04584623777009278, -0.34229499350310455, 0.9387219959330184, -0.5381844165951264, 0.4794422861285379, 0.8534951958829573, 0.5734335942167272, -0.85412829706822, -0.7352963908032732, -0.12895000820916747, -0.22552570725823173, -0.5976878733463429, -0.32791035485443487, 0.7202059113861725, 0.39099290295132905, 0.30525825694263764, -0.2266469266742548, -0.03379388729241706, -0.5954645444941691, -0.02422270847921526, 0.2367051711225363, 0.0254309367030352, -0.8571941247598263, 0.6036464885617703, 0.780145197998714, -0.18486284139078912, -0.4861368589284454, -0.2789831003703762, -0.695370188724934, 0.20748300875047643, 0.613995882433769, -0.20040817194169125, 0.8373240273873666, 0.6138944053316708, -0.7863205352137852, -0.7823411702718377, 0.79906295867358, -0.5467331800231525, -0.6344655458958364, -0.9818941753091346, 0.5525644258030062, 0.6262889073747209, 0.9963129049354384, -0.6272737000603017, -0.2716262931036606, 0.2096677033434846, -0.6982262682600213, -0.5674210473085657, 0.24902399542030595, -0.5657568018493333, 0.08618618872017958, 0.5489764282591345, -0.8941510222698827, 0.41351613826944567, -0.5112980841262675, 0.4470615015729351, -0.20725162805621333, -0.08479642143543553, -0.1278591923549064, -0.4999896814124227, 0.9888904679503661, -0.048462424602504495, -0.7019088972627803, 0.24200967459107448, -0.07080934919496995, -0.7205222066189325, 0.8569714457890816, -0.16535406501060956, -0.6995151061411666, -0.002471197183836038, 0.36657456718336245, -0.21418945415378254, 0.8960422717208372, -0.8112144998402944, 0.3367368342692487, -0.1409734233274329, 0.9270438056838188, 0.6449085435355675, -0.42063510394970094, -0.5514753035609532, -0.7824719546926855, 0.27064161179409774, 0.7610801292513893, 0.041332375564573365, -0.4938906089444197, 0.6565606828711339, -0.8175201877660032, -0.7145428710506601, 0.5266689558422335, -0.36373337569732045, -0.4295940430516798, 0.6614123405581125, -0.5795867768963181, 0.09683447902632913, -0.7233160622088481, -0.035259383881968365, 0.44407987368431834, 0.5080824859277744, -0.025605597564321236, -0.33746311986945, 0.8643101724003239, -0.6590382567793307, 0.11251953056040387, -0.5283365207737802, 0.8881578952123139, -0.9796498715072419, -0.8206325632112821, -0.5431772730915239, -0.09628735573638458, 0.8509192593020449, 0.6468967965920123, -0.5886852895684587, -0.25974684548008664, 0.4474352123365879, -0.2199845691372495, 0.7554317108927318, 0.9809450136647395, -0.9430090133566618, 0.23635288316941683]" + result=$(http_proxy='' curl --noproxy $ip_address http://${ip_address}:$retriever_port/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-retriever-pinecone-server + docker logs test-comps-retriever-tei-endpoint + exit 1 + fi +} + +function stop_docker() { + cid_retrievers=$(docker ps -aq --filter "name=test-comps-retrievers*") + if [[ ! -z "$cid_retrievers" ]]; then + docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s + fi + + cid_pinecone=$(docker ps -aq --filter "name=test-pinecone-vector-db") + if [[ ! -z "$cid_pinecone" ]]; then + docker stop $cid_pinecone && docker rm $cid_pinecone && sleep 1s + fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_retrievers_langchain_redis.sh b/tests/test_retrievers_langchain_redis.sh index 00ad4c761..9d367ce25 100644 --- a/tests/test_retrievers_langchain_redis.sh +++ b/tests/test_retrievers_langchain_redis.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" @@ -11,6 +11,12 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever-redis built fail" + exit 1 + else + echo "opea/retriever-redis built successful" + fi } function start_service() { diff --git a/tests/test_tts_speecht5.sh b/tests/test_tts.sh similarity index 56% rename from tests/test_tts_speecht5.sh rename to tests/test_tts.sh index d9426bdff..e4039d956 100644 --- a/tests/test_tts_speecht5.sh +++ b/tests/test_tts.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') @@ -10,23 +10,37 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile . - docker build -t opea/tts:latest -f comps/tts/Dockerfile . + docker build --no-cache -t opea/speecht5:comps -f comps/tts/speecht5/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/speecht5 built fail" + exit 1 + else + echo "opea/speecht5 built successful" + fi + docker build --no-cache -t opea/tts:comps -f comps/tts/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/tts built fail" + exit 1 + else + echo "opea/tts built successful" + fi } function start_service() { unset http_proxy - docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:latest - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:latest + docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5:comps + docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps sleep 3m } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json') + result=$(http_proxy="" curl http://localhost:5016/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json') if [[ $result == *"Ukl"* ]]; then echo "Result correct." else echo "Result wrong." + docker logs test-comps-tts-speecht5 + docker logs test-comps-tts exit 1 fi diff --git a/tests/test_vectorstores_langchain_milvus.sh b/tests/test_vectorstores_langchain_milvus.sh new file mode 100644 index 000000000..86124baa5 --- /dev/null +++ b/tests/test_vectorstores_langchain_milvus.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + + +function start_service() { + cd $WORKPATH/comps/vectorstores/langchain/milvus + rm -rf volumes/ + + docker compose up -d + + sleep 60s +} + +function validate_vectorstore() { + PORT="19530" + COLLECTION_NAME="test_col" + + # test create collection + echo "[ test create ] creating collection.." + create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}") + echo $create_response >> ${LOG_PATH}/milvus_create_col.log + if [[ $(echo $create_response | grep '{"code":200') ]]; then + echo "[ test create ] create collection succeed" + else + echo "[ test create ] create collection failed" + docker logs milvus-standalone + exit 1 + fi + + # test insert data + echo "[ test insert ] inserting data.." + insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }") + echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log + if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then + echo "[ test insert ] insert data succeed" + else + echo "[ test insert ] insert data failed" + docker logs milvus-standalone + exit 1 + fi + + # test search data + echo "[ test search ] searching data.." + search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }") + echo $search_response>> ${LOG_PATH}/milvus_search_data.log + if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then + echo "[ test search ] search data succeed" + else + echo "[ test search ] search data failed" + docker logs milvus-standalone + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=milvus-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + start_service + + validate_vectorstore + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_vectorstores_langchain_pathway.sh b/tests/test_vectorstores_langchain_pathway.sh new file mode 100644 index 000000000..0b1eab5ad --- /dev/null +++ b/tests/test_vectorstores_langchain_pathway.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + + cd comps/vectorstores/langchain/pathway + + docker build --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/vectorstore-pathway:comps . + + cd $WORKPATH + + if [ $? -ne 0 ]; then + echo "opea/retriever-pathway built fail" + exit 1 + else + echo "opea/retriever-pathway built successful" + fi +} + +function start_service() { + cd $WORKPATH + + # tei endpoint + tei_endpoint=5008 + model="BAAI/bge-base-en-v1.5" + docker run -d --name="test-comps-retriever-tei-endpoint" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model + + sleep 30s + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + + result=$(http_proxy='' + curl $TEI_EMBEDDING_ENDPOINT -X POST -d '{"inputs":"Hey,"}' -H 'Content-Type: application/json') + + echo "embed_result:" + echo $result + + sleep 30s + + # pathway + export PATHWAY_HOST="0.0.0.0" + export PATHWAY_PORT=5432 + + docker run -d --name="test-comps-vectorstore-pathway" -e PATHWAY_HOST=${PATHWAY_HOST} -e PATHWAY_PORT=${PATHWAY_PORT} -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e http_proxy=$http_proxy -e https_proxy=$https_proxy -v $WORKPATH/comps/vectorstores/langchain/pathway/README.md:/app/data/README.md -p ${PATHWAY_PORT}:${PATHWAY_PORT} --network="host" opea/vectorstore-pathway:comps + + sleep 45s + + export PATHWAY_HOST=$ip_address # needed in order to reach to vector store + + sleep 10s +} + +function validate_microservice() { + export PATH="${HOME}/miniforge3/bin:$PATH" + + result=$(http_proxy='' + curl http://${PATHWAY_HOST}:$PATHWAY_PORT/v1/retrieve \ + -X POST \ + -d "{\"query\":\"test\",\"k\":3}" \ + -H 'Content-Type: application/json') + if [[ $result == *"Pathway"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-vectorstore-pathway >> ${LOG_PATH}/vectorstore-pathway.log + docker logs test-comps-retriever-tei-endpoint >> ${LOG_PATH}/tei-endpoint.log + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_vectorstores_pgvector.sh b/tests/test_vectorstores_pgvector.sh deleted file mode 100755 index 1b43a6930..000000000 --- a/tests/test_vectorstores_pgvector.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') -function build_docker_images() { - cd $WORKPATH - - # piull pgvector image - docker pull pgvector/pgvector:0.7.0-pg16 - - # build dataprep image for pgvector - docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile . -} - -function start_service() { - export POSTGRES_USER=testuser - export POSTGRES_PASSWORD=testpwd - export POSTGRES_DB=vectordb - - docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16 - - sleep 10s - - docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest -} - -function validate_microservice() { - url="http://$ip_address:6007/v1/dataprep" - touch $WORKPATH/tests/test.txt - echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > $WORKPATH/tests/test.txt - - curl --location --request POST "${url}" \ - --form 'files=@"'${WORKPATH}'/tests/test.txt"' \ - --proxy http://proxy-chain.intel.com:912 -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=vectorstore-postgres*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - - cid=$(docker ps -aq --filter "name=dataprep-pgvector*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - #stop_docker - #echo y | docker system prune - -} - -main diff --git a/tests/test_web_retrievers_langchain_chroma.sh b/tests/test_web_retrievers_langchain_chroma.sh index d1e2c3ed5..c9e20aa08 100644 --- a/tests/test_web_retrievers_langchain_chroma.sh +++ b/tests/test_web_retrievers_langchain_chroma.sh @@ -2,13 +2,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -xe +set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH docker build --no-cache -t opea/web-retriever-chroma:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/langchain/chroma/docker/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/web-retriever-chroma built fail" + exit 1 + else + echo "opea/web-retriever-chroma built successful" + fi } function start_service() { @@ -31,11 +37,18 @@ function validate_microservice() { retriever_port=5019 export PATH="${HOME}/miniforge3/bin:$PATH" test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") - http_proxy='' curl http://${ip_address}:$retriever_port/v1/web_retrieval \ + result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/web_retrieval \ -X POST \ -d "{\"text\":\"What is OPEA?\",\"embedding\":${test_embedding}}" \ - -H 'Content-Type: application/json' - docker logs test-comps-web-retriever-tei-endpoint + -H 'Content-Type: application/json') + if [[ $result == *"title"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received status was $result" + docker logs test-comps-web-retriever-tei-endpoint + docker logs test-comps-web-retriever-chroma-server + exit 1 + fi } function stop_docker() { diff --git a/tests/test_workflow_chatqna.py b/tests/test_workflow_chatqna.py deleted file mode 100644 index a2ea0f2d0..000000000 --- a/tests/test_workflow_chatqna.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# - -import asyncio -import os - -from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType - -MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") -MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 8888) -EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") -EMBEDDING_SERVICE_PORT = os.getenv("EMBEDDING_SERVICE_PORT", 6000) -RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") -RETRIEVER_SERVICE_PORT = os.getenv("RETRIEVER_SERVICE_PORT", 7000) -RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") -RERANK_SERVICE_PORT = os.getenv("RERANK_SERVICE_PORT", 8000) -LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") -LLM_SERVICE_PORT = os.getenv("LLM_SERVICE_PORT", 9000) - - -class ChatQnAService: - def __init__(self, host="0.0.0.0", port=8000): - self.host = host - self.port = port - self.megaservice = ServiceOrchestrator() - - def add_remote_service(self): - embedding = MicroService( - name="embedding", - host=EMBEDDING_SERVICE_HOST_IP, - port=EMBEDDING_SERVICE_PORT, - endpoint="/v1/embeddings", - use_remote_service=True, - service_type=ServiceType.EMBEDDING, - ) - retriever = MicroService( - name="retriever", - host=RETRIEVER_SERVICE_HOST_IP, - port=RETRIEVER_SERVICE_PORT, - endpoint="/v1/retrieval", - use_remote_service=True, - service_type=ServiceType.RETRIEVER, - ) - rerank = MicroService( - name="rerank", - host=RERANK_SERVICE_HOST_IP, - port=RERANK_SERVICE_PORT, - endpoint="/v1/reranking", - use_remote_service=True, - service_type=ServiceType.RERANK, - ) - llm = MicroService( - name="llm", - host=LLM_SERVICE_HOST_IP, - port=LLM_SERVICE_PORT, - endpoint="/v1/chat/completions", - use_remote_service=True, - service_type=ServiceType.LLM, - ) - self.megaservice.add(embedding).add(retriever).add(rerank).add(llm) - self.megaservice.flow_to(embedding, retriever) - self.megaservice.flow_to(retriever, rerank) - self.megaservice.flow_to(rerank, llm) - self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) - - async def schedule(self): - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": "What is the revenue of Nike in 2023?"} - ) - print(result_dict) - - -if __name__ == "__main__": - chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() - asyncio.run(chatqna.schedule())