microsoft · abrichr · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/.github/workflows/docker-build-ec2.yml b/.github/workflows/docker-build-ec2.yml
@@ -0,0 +1,41 @@
+# Autogenerated via deploy.py, do not edit!
+
+name: Docker Build on EC2 Instance for OmniParser
+
+on:
+  push:
+    branches:
+      - feat/deploy
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: SSH and Execute Build on EC2
+        uses: appleboy/ssh-action@master
+        with:
+          command_timeout: "60m"
+          host: 44.198.58.162
+          username: ubuntu
+
+          key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+          script: |
+            rm -rf OmniParser || true
+            git clone https://github.com/OpenAdaptAI/OmniParser
+            cd OmniParser
+            git checkout feat/deploy
+            git pull
+
+            # Stop and remove any existing containers
+            sudo docker stop omniparser-container || true
+            sudo docker rm omniparser-container || true
+
+            # Build the Docker image
+            sudo nvidia-docker build -t omniparser .
+
+            # Run the Docker container on the specified port
+            sudo docker run -d -p 7861:7861 --gpus all --name omniparser-container omniparser
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,8 @@ weights/icon_caption_florence
 weights/icon_detect/
 .gradio
 __pycache__
+*.swp
+.env
+.env.*
+venv/
+*.pem
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,95 @@
+# Dockerfile for OmniParser with GPU support and OpenGL libraries
+#
+# This Dockerfile is intended to create an environment with NVIDIA CUDA
+# support and the necessary dependencies to run the OmniParser project.
+# The configuration is designed to support applications that rely on
+# Python 3.12, OpenCV, Hugging Face transformers, and Gradio. Additionally,
+# it includes steps to pull large files from Git LFS and a script to
+# convert model weights from .safetensor to .pt format. The container
+# runs a Gradio server by default, exposed on port 7861.
+#
+# Base image: nvidia/cuda:12.3.1-devel-ubuntu22.04
+#
+# Key features:
+# - System dependencies for OpenGL to support graphical libraries.
+# - Miniconda for Python 3.12, allowing for environment management.
+# - Git Large File Storage (LFS) setup for handling large model files.
+# - Requirement file installation, including specific versions of
+#   OpenCV and Hugging Face Hub.
+# - Entrypoint script execution with Gradio server configuration for
+#   external access.
+
+FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
+
+# Install system dependencies with explicit OpenGL libraries
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git \
+    git-lfs \
+    wget \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libglu1-mesa \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    python3-opencv \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && git lfs install
+
+# Install Miniconda for Python 3.12
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh
+ENV PATH="/opt/conda/bin:$PATH"
+
+# Create and activate Conda environment with Python 3.12, and set it as the default
+RUN conda create -n omni python=3.12 && \
+    echo "source activate omni" > ~/.bashrc
+ENV CONDA_DEFAULT_ENV=omni
+ENV PATH="/opt/conda/envs/omni/bin:$PATH"
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy project files and requirements
+COPY . .
+COPY requirements.txt /usr/src/app/requirements.txt
+
+# Initialize Git LFS and pull LFS files
+RUN git lfs install && \
+    git lfs pull
+
+# Install dependencies from requirements.txt with specific opencv-python-headless version
+RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+    pip uninstall -y opencv-python opencv-python-headless && \
+    pip install --no-cache-dir opencv-python-headless==4.8.1.78 && \
+    pip install -r requirements.txt && \
+    pip install huggingface_hub
+
+# Run download.py to fetch model weights and convert safetensors to .pt format
+RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+    python download.py && \
+    echo "Contents of weights directory:" && \
+    ls -lR weights && \
+    python weights/convert_safetensor_to_pt.py
+
+# Expose the default Gradio port
+EXPOSE 7861
+
+# Configure Gradio to be accessible externally
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+
+# Copy and set permissions for entrypoint script
+COPY entrypoint.sh /usr/src/app/entrypoint.sh
+RUN chmod +x /usr/src/app/entrypoint.sh
+
+# To debug, keep the container running
+# CMD ["tail", "-f", "/dev/null"]
+
+# Set the entrypoint
+ENTRYPOINT ["/usr/src/app/entrypoint.sh"]
diff --git a/client.py b/client.py
@@ -0,0 +1,94 @@
+"""
+This module provides a command-line interface to interact with the OmniParser Gradio server.
+
+Usage:
+    python client.py "http://<server_ip>:7861" "path/to/image.jpg"
+"""
+
+import fire
+from gradio_client import Client
+from loguru import logger
+from PIL import Image
+import base64
+from io import BytesIO
+import os
+import shutil
+
+def predict(server_url: str, image_path: str, box_threshold: float = 0.05, iou_threshold: float = 0.1):
+    """
+    Makes a prediction using the OmniParser Gradio client with the provided server URL and image.
+
+    Args:
+        server_url (str): The URL of the OmniParser Gradio server.
+        image_path (str): Path to the image file to be processed.
+        box_threshold (float): Box threshold value (default: 0.05).
+        iou_threshold (float): IOU threshold value (default: 0.1).
+    """
+    client = Client(server_url)
+
+    # Load and encode the image
+    with open(image_path, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+    # Prepare the image input in the format expected by the server
+    image_input = {
+        "path": None,
+        "url": f"data:image/png;base64,{encoded_image}",
+        "size": None,
+        "orig_name": image_path,
+        "mime_type": "image/png",
+        "is_stream": False,
+        "meta": {}
+    }
+
+    # Make the prediction
+    try:
+        result = client.predict(
+            image_input,    # image input as dictionary
+            box_threshold,  # box_threshold
+            iou_threshold,  # iou_threshold
+            api_name="/process"
+        )
+
+        # Process and log the results
+        output_image, parsed_content = result
+
+        logger.info("Prediction completed successfully")
+        logger.info(f"Parsed content:\n{parsed_content}")
+
+        # Save the output image
+        output_image_path = "output_image.png"
+        if isinstance(output_image, dict) and 'url' in output_image:
+            # Handle base64 encoded image
+            img_data = base64.b64decode(output_image['url'].split(',')[1])
+            with open(output_image_path, 'wb') as f:
+                f.write(img_data)
+        elif isinstance(output_image, str):
+            if output_image.startswith('data:image'):
+                # Handle base64 encoded image string
+                img_data = base64.b64decode(output_image.split(',')[1])
+                with open(output_image_path, 'wb') as f:
+                    f.write(img_data)
+            elif os.path.exists(output_image):
+                # Handle file path
+                shutil.copy(output_image, output_image_path)
+            else:
+                logger.warning(f"Unexpected output_image format: {output_image}")
+        elif isinstance(output_image, Image.Image):
+            output_image.save(output_image_path)
+        else:
+            logger.warning(f"Unexpected output_image format: {type(output_image)}")
+            logger.warning(f"Output image content: {output_image[:100]}...")  # Log the first 100 characters
+
+        if os.path.exists(output_image_path):
+            logger.info(f"Output image saved to: {output_image_path}")
+        else:
+            logger.warning(f"Failed to save output image to: {output_image_path}")
+
+    except Exception as e:
+        logger.error(f"An error occurred: {str(e)}")
+        logger.exception("Traceback:")
+
+if __name__ == "__main__":
+    fire.Fire(predict)
+
-Original file line number
+Diff line change
@@ @@ -3,3 +3,8 @@ weights/icon_caption_florence @@
     weights/icon_detect/
     .gradio
     __pycache__
+    *.swp
+    .env
+    .env.*
+    venv/
+    *.pem