Merge pull request #1 from mobiusml/init_app

Initial version
mobiusml · Nov 1, 2023 · 98203a6 · 98203a6
2 parents 7104273 + 656eec4
commit 98203a6
Show file tree

Hide file tree

Showing 31 changed files with 1,145 additions and 1 deletion.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,2 @@
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,36 @@
+{
+	"name": "Ubuntu",
+	"build": {
+		"dockerfile": "Dockerfile"
+	},
+	"features": {
+		"ghcr.io/devcontainers/features/python:1": {
+			"installTools": true,
+			"version": "3.10"
+		},
+		"ghcr.io/devcontainers-contrib/features/poetry:2": {
+			"version": "latest"
+		}
+	},
+	"hostRequirements": {
+		"gpu": "optional" 
+	},
+	"mounts": [
+		"source=/nas,target=/nas,type=bind",
+		"source=/nas2,target=/nas2,type=bind"
+	],
+
+	"postCreateCommand": "sh ${containerWorkspaceFolder}/install.sh",
+	"postStartCommand": "git config --global --add safe.directory ${containerWorkspaceFolder}",
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"ms-python.black-formatter",
+				"ms-python.python",
+				"ms-python.mypy-type-checker",
+				"ms-toolsai.jupyter"
+			]
+		}
+	}
+
+}
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "mobius-pipeline"]
+	path = mobius-pipeline
+	url = ../mobius-pipeline.git
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none"
+}
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,33 @@
+# Use NVIDIA CUDA as base image
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+
+# Set working directory
+WORKDIR /app
+
+# Set environment variables to non-interactive (this prevents some prompts)
+ENV DEBIAN_FRONTEND=non-interactive
+
+# Install required libraries, tools, and Python3
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl git python3.10 python3.10-dev python3-pip python3.10-venv
+
+# Install poetry
+RUN curl -sSL https://install.python-poetry.org | python3 -
+
+# Update PATH
+RUN echo 'export PATH="/root/.local/bin:$PATH"' >> /root/.bashrc
+ENV PATH="/root/.local/bin:$PATH"
+
+# Copy project files into the container
+COPY . /app
+
+# Install the package with poetry
+RUN sh install.sh
+
+# Disable buffering for stdout and stderr to get the logs in real time
+ENV PYTHONUNBUFFERED=1
+
+# Expose the desired port
+EXPOSE 8000
+
+# Set the command to run the SDK when the container starts
+CMD ["poetry", "run", "serve", "run", "--port", "8000", "--host", "0.0.0.0", "aana.main:server"]
diff --git a/README.md b/README.md
@@ -1 +1,84 @@
-# aana_sdk
+# Aana
+
+Aana is a multi-model SDK for deploying and serving machine learning models.
+
+## Installation
+
+1. Clone this repository.
+2. Update submodules.
+
+```bash
+git submodule update --init --recursive
+```
+
+3. Install additional libraries.
+
+```bash
+apt update && apt install -y libgl1
+```
+
+4. Install the package with poetry.
+
+It will install the package and all dependencies in a virtual environment.
+
+```bash
+sh install.sh
+```
+
+5. Run the SDK.
+
+```bash
+CUDA_VISIBLE_DEVICES=0 poetry run serve run --port 8000 --host 0.0.0.0 aana.main:server
+```
+
+The first run might take a while because the models will be downloaded from Google Drive and cached.
+
+Once you see `Deployed Serve app successfully.` in the logs, the server is ready to accept requests.
+
+You can change the port and CUDA_VISIBLE_DEVICES environment variable to your needs.
+
+The server will be available at http://localhost:8000.
+
+The documentation will be available at http://localhost:8000/docs and http://localhost:8000/redoc.
+
+For HuggingFace Transformers, you need to specify HF_AUTH environment variable with your HuggingFace API token.
+
+6. Send a request to the server.
+
+You can find examples in the [demo notebook](notebooks/demo.ipynb).
+
+## Run with Docker
+
+1. Clone this repository.
+
+2. Update submodules.
+
+```bash
+git submodule update --init --recursive
+```
+
+3. Build the Docker image.
+
+```bash
+docker build -t aana:0.1.0 .
+```
+
+4. Run the Docker container.
+
+```bash
+docker run --rm --init -p 8000:8000 --gpus all -e CUDA_VISIBLE_DEVICES=0 -v aana_cache:/root/.aana -v aana_hf_cache:/root/.cache/huggingface --name aana_instance aana:0.1.0
+```
+
+The first run might take a while because the models will be downloaded from Google Drive and cached. The models will be stored in the `aana_cache` volume. The HuggingFace models will be stored in the `aana_hf_cache` volume. If you want to remove the cached models, remove the volume.
+
+Once you see `Deployed Serve app successfully.` in the logs, the server is ready to accept requests.
+
+You can change the port and gpus parameters to your needs.
+
+The server will be available at http://localhost:8000.
+
+The documentation will be available at http://localhost:8000/docs and http://localhost:8000/redoc.
+
+5. Send a request to the server.
+
+You can find examples in the [demo notebook](notebooks/demo.ipynb).
diff --git a/aana/__init__.py b/aana/__init__.py
diff --git a/aana/api/__init__.py b/aana/api/__init__.py
diff --git a/aana/api/app.py b/aana/api/app.py
@@ -0,0 +1,122 @@
+import traceback
+from typing import Union
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from mobius_pipeline.exceptions import BaseException
+from pydantic import ValidationError
+from ray.exceptions import RayTaskError
+
+
+app = FastAPI()
+
+
+@app.exception_handler(ValidationError)
+async def validation_exception_handler(request: Request, exc: ValidationError):
+    """
+    This handler is used to handle pydantic validation errors
+
+    Args:
+        request (Request): The request object
+        exc (ValidationError): The validation error
+
+    Returns:
+        JSONResponse: JSON response with the error details
+    """
+    # TODO: Structure the error response so that it is consistent with the other error responses
+    return JSONResponse(
+        status_code=422,
+        content={"detail": exc.errors()},
+    )
+
+
+def custom_exception_handler(
+    request: Request, exc_raw: Union[BaseException, RayTaskError]
+):
+    """
+    This handler is used to handle custom exceptions raised in the application.
+    BaseException is the base exception for all the exceptions
+    from the Mobius Pipeline and Aana application.
+    Sometimes custom exception are wrapped into RayTaskError so we need to handle that as well.
+
+    Args:
+        request (Request): The request object
+        exc_raw (Union[BaseException, RayTaskError]): The exception raised
+
+    Returns:
+        JSONResponse: JSON response with the error details. The response contains the following fields:
+            error: The name of the exception class.
+            message: The message of the exception.
+            data: The additional data returned by the exception that can be used to identify the error (e.g. image path, url, model name etc.)
+            stacktrace: The stacktrace of the exception.
+    """
+    # a BaseException can be wrapped into a RayTaskError
+    if isinstance(exc_raw, RayTaskError):
+        # str(e) returns whole stack trace
+        # if exception is a RayTaskError
+        # let's use it to get the stack trace
+        stacktrace = str(exc_raw)
+        # get the original exception
+        exc: BaseException = exc_raw.cause
+        assert isinstance(exc, BaseException)
+    else:
+        # if it is not a RayTaskError
+        # then we need to get the stack trace
+        stacktrace = traceback.format_exc()
+        exc = exc_raw
+    # get the data from the exception
+    # can be used to return additional info
+    # like image path, url, model name etc.
+    data = exc.get_data()
+    # get the name of the class of the exception
+    # can be used to identify the type of the error
+    error = exc.__class__.__name__
+    # get the message of the exception
+    message = str(exc)
+    return JSONResponse(
+        status_code=400,
+        content={
+            "error": error,
+            "message": message,
+            "data": data,
+            "stacktrace": stacktrace,
+        },
+    )
+
+
+@app.exception_handler(BaseException)
+async def pipeline_exception_handler(request: Request, exc: BaseException):
+    """
+    This handler is used to handle exceptions raised by the Mobius Pipeline and Aana application.
+
+    Args:
+        request (Request): The request object
+        exc (BaseException): The exception raised
+
+    Returns:
+        JSONResponse: JSON response with the error details
+    """
+    return custom_exception_handler(request, exc)
+
+
+@app.exception_handler(RayTaskError)
+async def ray_task_error_handler(request: Request, exc: RayTaskError):
+    """
+    This handler is used to handle RayTaskError exceptions.
+
+    Args:
+        request (Request): The request object
+        exc (RayTaskError): The exception raised
+
+    Returns:
+        JSONResponse: JSON response with the error details. The response contains the following fields:
+            error: The name of the exception class.
+            message: The message of the exception.
+            stacktrace: The stacktrace of the exception.
+    """
+    error = exc.__class__.__name__
+    stacktrace = traceback.format_exc()
+
+    return JSONResponse(
+        status_code=400,
+        content={"error": error, "message": str(exc), "stacktrace": stacktrace},
+    )
diff --git a/aana/api/request_handler.py b/aana/api/request_handler.py
@@ -0,0 +1,76 @@
+from typing import Dict, List, Tuple
+from ray import serve
+
+from mobius_pipeline.pipeline import Pipeline
+
+from aana.api.app import app
+from aana.api.responses import AanaJSONResponse
+from aana.configs.pipeline import nodes
+from aana.models.pydantic.llm_request import LLMRequest
+
+
+async def run_pipeline(
+    pipeline: Pipeline, data: Dict, required_outputs: List[str]
+) -> Tuple[Dict, Dict[str, float]]:
+    """
+    This function is used to run a Mobius Pipeline.
+    It creates a container from the data, runs the pipeline and returns the output.
+
+    Args:
+        pipeline (Pipeline): The pipeline to run.
+        data (dict): The data to create the container from.
+        required_outputs (List[str]): The required outputs of the pipeline.
+
+    Returns:
+        tuple[dict, dict[str, float]]: The output of the pipeline and the execution time of the pipeline.
+    """
+
+    # create a container from the data
+    container = pipeline.parse_dict(data)
+
+    # run the pipeline
+    output, execution_time = await pipeline.run(
+        container, required_outputs, return_execution_time=True
+    )
+    return output, execution_time
+
+
+@serve.deployment(route_prefix="/", num_replicas=1, ray_actor_options={"num_cpus": 0.1})
+@serve.ingress(app)
+class RequestHandler:
+    """This class is used to handle requests to the Aana application."""
+
+    def __init__(self, deployments: Dict):
+        """
+        Args:
+            deployments (Dict): The dictionary of deployments.
+                It is passed to the context to the pipeline so the pipeline can access the deployments handles.
+        """
+        self.context = {
+            "deployments": deployments,
+        }
+        self.pipeline = Pipeline(nodes, self.context)
+
+    @app.post("/llm/generate")
+    async def generate_llm(self, llm_request: LLMRequest) -> AanaJSONResponse:
+        """
+        The endpoint for running the LLM.
+        It is running the pipeline with the given prompt and sampling parameters.
+        This is here as an example and will be replace with automatic endpoint generation.
+
+        Args:
+            llm_request (LLMRequest): The LLM request. It contains the prompt and sampling parameters.
+
+        Returns:
+            AanaJSONResponse: The response containing the output of the pipeline and the execution time.
+        """
+        prompt = llm_request.prompt
+        sampling_params = llm_request.sampling_params
+
+        output, execution_time = await run_pipeline(
+            self.pipeline,
+            {"prompt": prompt, "sampling_params": sampling_params},
+            ["vllm_llama2_7b_chat_output"],
+        )
+        output["execution_time"] = execution_time
+        return AanaJSONResponse(content=output)
diff --git a/aana/api/responses.py b/aana/api/responses.py
@@ -0,0 +1,26 @@
+from typing import Any, Optional
+from fastapi.responses import JSONResponse
+import orjson
+
+
+class AanaJSONResponse(JSONResponse):
+    """
+    A JSON response class that uses orjson to serialize data.
+    It has additional support for numpy arrays.
+    """
+
+    media_type = "application/json"
+    option = None
+
+    def __init__(self, option: Optional[int] = orjson.OPT_SERIALIZE_NUMPY, **kwargs):
+        """
+        Initialize the response class with the orjson option.
+        """
+        self.option = option
+        super().__init__(**kwargs)
+
+    def render(self, content: Any) -> bytes:
+        """
+        Override the render method to use orjson.dumps instead of json.dumps.
+        """
+        return orjson.dumps(content, option=self.option)
diff --git a/aana/configs/__init__.py b/aana/configs/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
		RUN apt-get update && apt-get install -y libgl1 libglib2.0-0