Refactor DocSum example (#1286)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
opea-project · Dec 26, 2024 · a01729a · a01729a
1 parent 6b6a08d
commit a01729a
Show file tree

Hide file tree

Showing 16 changed files with 145 additions and 1,143 deletions.
diff --git a/DocSum/Dockerfile b/DocSum/Dockerfile
@@ -6,7 +6,8 @@ FROM python:3.11-slim
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
     libgl1-mesa-glx \
     libjemalloc-dev \
-    git
+    git \
+    ffmpeg
 
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \

diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
@@ -70,34 +70,6 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
 
-  dataprep-audio2text:
-    image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
-    container_name: dataprep-audio2text-service
-    ports:
-      - "9099:9099"
-    ipc: host
-    environment:
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
-  dataprep-video2audio:
-    image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
-    container_name: dataprep-video2audio-service
-    ports:
-      - "7078:7078"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-
-  dataprep-multimedia2text:
-    image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
-    container_name: dataprep-multimedia2text
-    ports:
-      - "7079:7079"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
   docsum-backend-server:
     image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
     container_name: docsum-backend-server
@@ -111,8 +83,9 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${HOST_IP}
-      - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${HOST_IP}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
+
     ipc: host
     restart: always
 

diff --git a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh
@@ -15,11 +15,3 @@ export DOCSUM_LLM_SERVER_PORT="9000"
 export DOCSUM_BACKEND_SERVER_PORT="8888"
 export DOCSUM_FRONTEND_PORT="5173"
 export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
-export V2A_SERVICE_HOST_IP=${host_ip}
-export V2A_ENDPOINT=http://$host_ip:7078
-export A2T_ENDPOINT=http://$host_ip:7066
-export A2T_SERVICE_HOST_IP=${host_ip}
-export A2T_SERVICE_PORT=9099
-export DATA_ENDPOINT=http://$host_ip:7079
-export DATA_SERVICE_HOST_IP=${host_ip}
-export DATA_SERVICE_PORT=7079
diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -29,30 +29,6 @@ The Whisper Service converts audio files to text. Follow these steps to build an
 docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
 ```
 
-#### Audio to text Service
-
-The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service:
-
-```bash
-docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile .
-```
-
-#### Video to Audio Service
-
-The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service:
-
-```bash
-docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile .
-```
-
-#### Multimedia to Text Service
-
-The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service:
-
-```bash
-docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile .
-```
-
 ### 2. Build MegaService Docker Image
 
 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command:
@@ -149,9 +125,6 @@ You will have the following Docker Images:
 2. `opea/docsum:latest`
 3. `opea/llm-docsum-tgi:latest`
 4. `opea/whisper:latest`
-5. `opea/dataprep-audio2text:latest`
-6. `opea/dataprep-multimedia2text:latest`
-7. `opea/dataprep-video2audio:latest`
 
 ### Validate Microservices
 
@@ -188,37 +161,7 @@ You will have the following Docker Images:
      {"asr_result":"you"}
    ```
 
-4. Audio2Text Microservice
-
-   ```bash
-    curl http://${host_ip}:9099/v1/audio/transcriptions \
-        -X POST \
-        -d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-        -H 'Content-Type: application/json'
-   ```
-
-   Expected output:
-
-   ```bash
-     {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
-   ```
-
-5. Multimedia to text Microservice
-
-   ```bash
-    curl http://${host_ip}:7079/v1/multimedia2text \
-        -X POST \
-        -d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-        -H 'Content-Type: application/json'
-   ```
-
-   Expected output:
-
-   ```bash
-     {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
-   ```
-
-6. MegaService
+4. MegaService
 
    Text:
 
@@ -257,7 +200,7 @@ You will have the following Docker Images:
       -F "stream=true"
    ```
 
-   > Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+   > Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. You can still pass base64 string of the audio or video file as follows:
 
    Audio:
 
@@ -291,7 +234,7 @@ You will have the following Docker Images:
       -F "stream=true"
    ```
 
-7. MegaService with long context
+5. MegaService with long context
 
    If you want to deal with long context, can set following parameters and select suitable summary type.
 

diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -50,52 +50,21 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
 
-  dataprep-audio2text:
-    image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
-    container_name: dataprep-audio2text-server
-    ports:
-      - "9099:9099"
-    ipc: host
-    environment:
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
-  dataprep-video2audio:
-    image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
-    container_name: dataprep-video2audio-server
-    ports:
-      - "7078:7078"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-
-  dataprep-multimedia2text:
-    image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
-    container_name: dataprep-multimedia2text
-    ports:
-      - "7079:7079"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
   docsum-xeon-backend-server:
     image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
     container_name: docsum-xeon-backend-server
     depends_on:
       - tgi-server
       - llm-docsum-tgi
-      - dataprep-multimedia2text
-      - dataprep-video2audio
-      - dataprep-audio2text
     ports:
       - "8888:8888"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
     ipc: host
     restart: always
 

diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -13,28 +13,12 @@ git clone https://github.com/opea-project/GenAIComps.git
 cd GenAIComps
 ```
 
-#### Audio to text Service
+#### Whisper Service
 
-The Audio to text Service is another service for converting audio to text. Follow these steps to build and run the service:
+The Whisper Service converts audio files to text. Follow these steps to build and run the service:
 
 ```bash
-docker build -t opea/dataprep-audio2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/audio2text/Dockerfile .
-```
-
-#### Video to Audio Service
-
-The Video to Audio Service extracts audio from video files. Follow these steps to build and run the service:
-
-```bash
-docker build -t opea/dataprep-video2audio:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/video2audio/Dockerfile .
-```
-
-#### Multimedia to Text Service
-
-The Multimedia to Text Service transforms multimedia data to text data. Follow these steps to build and run the service:
-
-```bash
-docker build -t opea/dataprep-multimedia2text:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimedia2text/Dockerfile .
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
 ```
 
 ### 2. Build MegaService Docker Image
@@ -133,9 +117,6 @@ You will have the following Docker Images:
 2. `opea/docsum:latest`
 3. `opea/llm-docsum-tgi:latest`
 4. `opea/whisper:latest`
-5. `opea/dataprep-audio2text:latest`
-6. `opea/dataprep-multimedia2text:latest`
-7. `opea/dataprep-video2audio:latest`
 
 ### Validate Microservices
 
@@ -172,37 +153,7 @@ You will have the following Docker Images:
      {"asr_result":"you"}
    ```
 
-4. Audio2Text Microservice
-
-   ```bash
-    curl http://${host_ip}:9199/v1/audio/transcriptions \
-        -X POST \
-        -d '{"byte_str":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-        -H 'Content-Type: application/json'
-   ```
-
-   Expected output:
-
-   ```bash
-     {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
-   ```
-
-5. Multimedia to text Microservice
-
-   ```bash
-    curl http://${host_ip}:7079/v1/multimedia2text \
-        -X POST \
-        -d '{"audio":"UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-        -H 'Content-Type: application/json'
-   ```
-
-   Expected output:
-
-   ```bash
-     {"downstream_black_list":[],"id":"--> this will be different id number for each run <--","query":"you"}
-   ```
-
-6. MegaService
+4. MegaService
 
    Text:
 
@@ -274,7 +225,7 @@ You will have the following Docker Images:
       -F "stream=True"
    ```
 
-7. MegaService with long context
+5. MegaService with long context
 
    If you want to deal with long context, can set following parameters and select suitable summary type.
 

diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -62,52 +62,21 @@ services:
       - SYS_NICE
     restart: unless-stopped
 
-  dataprep-audio2text:
-    image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
-    container_name: dataprep-audio2text-server
-    ports:
-      - "9199:9099"
-    ipc: host
-    environment:
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
-  dataprep-video2audio:
-    image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
-    container_name: dataprep-video2audio-server
-    ports:
-      - "7078:7078"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-
-  dataprep-multimedia2text:
-    image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
-    container_name: dataprep-multimedia2text
-    ports:
-      - "7079:7079"
-    ipc: host
-    environment:
-      V2A_ENDPOINT: ${V2A_ENDPOINT}
-      A2T_ENDPOINT: ${A2T_ENDPOINT}
-
   docsum-gaudi-backend-server:
     image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
     container_name: docsum-gaudi-backend-server
     depends_on:
       - tgi-server
       - llm-docsum-tgi
-      - dataprep-multimedia2text
-      - dataprep-video2audio
-      - dataprep-audio2text
     ports:
       - "8888:8888"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - DATA_SERVICE_HOST_IP=${DATA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
 
     ipc: host
     restart: always

diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh
@@ -13,15 +13,7 @@ export no_proxy="${no_proxy},${host_ip}"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
-
-export V2A_SERVICE_HOST_IP=${host_ip}
-export V2A_ENDPOINT=http://$host_ip:7078
+export ASR_SERVICE_HOST_IP=${host_ip}
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 
-export A2T_ENDPOINT=http://$host_ip:7066
-export A2T_SERVICE_HOST_IP=${host_ip}
-export A2T_SERVICE_PORT=9099
-
-export DATA_ENDPOINT=http://$host_ip:7079
-export DATA_SERVICE_HOST_IP=${host_ip}
-export DATA_SERVICE_PORT=7079
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"