-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from mobiusml/init_app
Initial version
- Loading branch information
Showing
31 changed files
with
1,145 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 | ||
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"name": "Ubuntu", | ||
"build": { | ||
"dockerfile": "Dockerfile" | ||
}, | ||
"features": { | ||
"ghcr.io/devcontainers/features/python:1": { | ||
"installTools": true, | ||
"version": "3.10" | ||
}, | ||
"ghcr.io/devcontainers-contrib/features/poetry:2": { | ||
"version": "latest" | ||
} | ||
}, | ||
"hostRequirements": { | ||
"gpu": "optional" | ||
}, | ||
"mounts": [ | ||
"source=/nas,target=/nas,type=bind", | ||
"source=/nas2,target=/nas2,type=bind" | ||
], | ||
|
||
"postCreateCommand": "sh ${containerWorkspaceFolder}/install.sh", | ||
"postStartCommand": "git config --global --add safe.directory ${containerWorkspaceFolder}", | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"ms-python.black-formatter", | ||
"ms-python.python", | ||
"ms-python.mypy-type-checker", | ||
"ms-toolsai.jupyter" | ||
] | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "mobius-pipeline"] | ||
path = mobius-pipeline | ||
url = ../mobius-pipeline.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"[python]": { | ||
"editor.defaultFormatter": "ms-python.black-formatter" | ||
}, | ||
"python.formatting.provider": "none" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Use NVIDIA CUDA as base image | ||
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 | ||
|
||
# Set working directory | ||
WORKDIR /app | ||
|
||
# Set environment variables to non-interactive (this prevents some prompts) | ||
ENV DEBIAN_FRONTEND=non-interactive | ||
|
||
# Install required libraries, tools, and Python3 | ||
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl git python3.10 python3.10-dev python3-pip python3.10-venv | ||
|
||
# Install poetry | ||
RUN curl -sSL https://install.python-poetry.org | python3 - | ||
|
||
# Update PATH | ||
RUN echo 'export PATH="/root/.local/bin:$PATH"' >> /root/.bashrc | ||
ENV PATH="/root/.local/bin:$PATH" | ||
|
||
# Copy project files into the container | ||
COPY . /app | ||
|
||
# Install the package with poetry | ||
RUN sh install.sh | ||
|
||
# Disable buffering for stdout and stderr to get the logs in real time | ||
ENV PYTHONUNBUFFERED=1 | ||
|
||
# Expose the desired port | ||
EXPOSE 8000 | ||
|
||
# Set the command to run the SDK when the container starts | ||
CMD ["poetry", "run", "serve", "run", "--port", "8000", "--host", "0.0.0.0", "aana.main:server"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,84 @@ | ||
# aana_sdk | ||
# Aana | ||
|
||
Aana is a multi-model SDK for deploying and serving machine learning models. | ||
|
||
## Installation | ||
|
||
1. Clone this repository. | ||
2. Update submodules. | ||
|
||
```bash | ||
git submodule update --init --recursive | ||
``` | ||
|
||
3. Install additional libraries. | ||
|
||
```bash | ||
apt update && apt install -y libgl1 | ||
``` | ||
|
||
4. Install the package with poetry. | ||
|
||
It will install the package and all dependencies in a virtual environment. | ||
|
||
```bash | ||
sh install.sh | ||
``` | ||
|
||
5. Run the SDK. | ||
|
||
```bash | ||
CUDA_VISIBLE_DEVICES=0 poetry run serve run --port 8000 --host 0.0.0.0 aana.main:server | ||
``` | ||
|
||
The first run might take a while because the models will be downloaded from Google Drive and cached. | ||
|
||
Once you see `Deployed Serve app successfully.` in the logs, the server is ready to accept requests. | ||
|
||
You can change the port and CUDA_VISIBLE_DEVICES environment variable to your needs. | ||
|
||
The server will be available at http://localhost:8000. | ||
|
||
The documentation will be available at http://localhost:8000/docs and http://localhost:8000/redoc. | ||
|
||
For HuggingFace Transformers, you need to specify HF_AUTH environment variable with your HuggingFace API token. | ||
|
||
6. Send a request to the server. | ||
|
||
You can find examples in the [demo notebook](notebooks/demo.ipynb). | ||
|
||
## Run with Docker | ||
|
||
1. Clone this repository. | ||
|
||
2. Update submodules. | ||
|
||
```bash | ||
git submodule update --init --recursive | ||
``` | ||
|
||
3. Build the Docker image. | ||
|
||
```bash | ||
docker build -t aana:0.1.0 . | ||
``` | ||
|
||
4. Run the Docker container. | ||
|
||
```bash | ||
docker run --rm --init -p 8000:8000 --gpus all -e CUDA_VISIBLE_DEVICES=0 -v aana_cache:/root/.aana -v aana_hf_cache:/root/.cache/huggingface --name aana_instance aana:0.1.0 | ||
``` | ||
|
||
The first run might take a while because the models will be downloaded from Google Drive and cached. The models will be stored in the `aana_cache` volume. The HuggingFace models will be stored in the `aana_hf_cache` volume. If you want to remove the cached models, remove the volume. | ||
|
||
Once you see `Deployed Serve app successfully.` in the logs, the server is ready to accept requests. | ||
|
||
You can change the port and gpus parameters to your needs. | ||
|
||
The server will be available at http://localhost:8000. | ||
|
||
The documentation will be available at http://localhost:8000/docs and http://localhost:8000/redoc. | ||
|
||
5. Send a request to the server. | ||
|
||
You can find examples in the [demo notebook](notebooks/demo.ipynb). |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import traceback | ||
from typing import Union | ||
from fastapi import FastAPI, Request | ||
from fastapi.responses import JSONResponse | ||
from mobius_pipeline.exceptions import BaseException | ||
from pydantic import ValidationError | ||
from ray.exceptions import RayTaskError | ||
|
||
|
||
app = FastAPI() | ||
|
||
|
||
@app.exception_handler(ValidationError) | ||
async def validation_exception_handler(request: Request, exc: ValidationError): | ||
""" | ||
This handler is used to handle pydantic validation errors | ||
Args: | ||
request (Request): The request object | ||
exc (ValidationError): The validation error | ||
Returns: | ||
JSONResponse: JSON response with the error details | ||
""" | ||
# TODO: Structure the error response so that it is consistent with the other error responses | ||
return JSONResponse( | ||
status_code=422, | ||
content={"detail": exc.errors()}, | ||
) | ||
|
||
|
||
def custom_exception_handler( | ||
request: Request, exc_raw: Union[BaseException, RayTaskError] | ||
): | ||
""" | ||
This handler is used to handle custom exceptions raised in the application. | ||
BaseException is the base exception for all the exceptions | ||
from the Mobius Pipeline and Aana application. | ||
Sometimes custom exception are wrapped into RayTaskError so we need to handle that as well. | ||
Args: | ||
request (Request): The request object | ||
exc_raw (Union[BaseException, RayTaskError]): The exception raised | ||
Returns: | ||
JSONResponse: JSON response with the error details. The response contains the following fields: | ||
error: The name of the exception class. | ||
message: The message of the exception. | ||
data: The additional data returned by the exception that can be used to identify the error (e.g. image path, url, model name etc.) | ||
stacktrace: The stacktrace of the exception. | ||
""" | ||
# a BaseException can be wrapped into a RayTaskError | ||
if isinstance(exc_raw, RayTaskError): | ||
# str(e) returns whole stack trace | ||
# if exception is a RayTaskError | ||
# let's use it to get the stack trace | ||
stacktrace = str(exc_raw) | ||
# get the original exception | ||
exc: BaseException = exc_raw.cause | ||
assert isinstance(exc, BaseException) | ||
else: | ||
# if it is not a RayTaskError | ||
# then we need to get the stack trace | ||
stacktrace = traceback.format_exc() | ||
exc = exc_raw | ||
# get the data from the exception | ||
# can be used to return additional info | ||
# like image path, url, model name etc. | ||
data = exc.get_data() | ||
# get the name of the class of the exception | ||
# can be used to identify the type of the error | ||
error = exc.__class__.__name__ | ||
# get the message of the exception | ||
message = str(exc) | ||
return JSONResponse( | ||
status_code=400, | ||
content={ | ||
"error": error, | ||
"message": message, | ||
"data": data, | ||
"stacktrace": stacktrace, | ||
}, | ||
) | ||
|
||
|
||
@app.exception_handler(BaseException) | ||
async def pipeline_exception_handler(request: Request, exc: BaseException): | ||
""" | ||
This handler is used to handle exceptions raised by the Mobius Pipeline and Aana application. | ||
Args: | ||
request (Request): The request object | ||
exc (BaseException): The exception raised | ||
Returns: | ||
JSONResponse: JSON response with the error details | ||
""" | ||
return custom_exception_handler(request, exc) | ||
|
||
|
||
@app.exception_handler(RayTaskError) | ||
async def ray_task_error_handler(request: Request, exc: RayTaskError): | ||
""" | ||
This handler is used to handle RayTaskError exceptions. | ||
Args: | ||
request (Request): The request object | ||
exc (RayTaskError): The exception raised | ||
Returns: | ||
JSONResponse: JSON response with the error details. The response contains the following fields: | ||
error: The name of the exception class. | ||
message: The message of the exception. | ||
stacktrace: The stacktrace of the exception. | ||
""" | ||
error = exc.__class__.__name__ | ||
stacktrace = traceback.format_exc() | ||
|
||
return JSONResponse( | ||
status_code=400, | ||
content={"error": error, "message": str(exc), "stacktrace": stacktrace}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from typing import Dict, List, Tuple | ||
from ray import serve | ||
|
||
from mobius_pipeline.pipeline import Pipeline | ||
|
||
from aana.api.app import app | ||
from aana.api.responses import AanaJSONResponse | ||
from aana.configs.pipeline import nodes | ||
from aana.models.pydantic.llm_request import LLMRequest | ||
|
||
|
||
async def run_pipeline( | ||
pipeline: Pipeline, data: Dict, required_outputs: List[str] | ||
) -> Tuple[Dict, Dict[str, float]]: | ||
""" | ||
This function is used to run a Mobius Pipeline. | ||
It creates a container from the data, runs the pipeline and returns the output. | ||
Args: | ||
pipeline (Pipeline): The pipeline to run. | ||
data (dict): The data to create the container from. | ||
required_outputs (List[str]): The required outputs of the pipeline. | ||
Returns: | ||
tuple[dict, dict[str, float]]: The output of the pipeline and the execution time of the pipeline. | ||
""" | ||
|
||
# create a container from the data | ||
container = pipeline.parse_dict(data) | ||
|
||
# run the pipeline | ||
output, execution_time = await pipeline.run( | ||
container, required_outputs, return_execution_time=True | ||
) | ||
return output, execution_time | ||
|
||
|
||
@serve.deployment(route_prefix="/", num_replicas=1, ray_actor_options={"num_cpus": 0.1}) | ||
@serve.ingress(app) | ||
class RequestHandler: | ||
"""This class is used to handle requests to the Aana application.""" | ||
|
||
def __init__(self, deployments: Dict): | ||
""" | ||
Args: | ||
deployments (Dict): The dictionary of deployments. | ||
It is passed to the context to the pipeline so the pipeline can access the deployments handles. | ||
""" | ||
self.context = { | ||
"deployments": deployments, | ||
} | ||
self.pipeline = Pipeline(nodes, self.context) | ||
|
||
@app.post("/llm/generate") | ||
async def generate_llm(self, llm_request: LLMRequest) -> AanaJSONResponse: | ||
""" | ||
The endpoint for running the LLM. | ||
It is running the pipeline with the given prompt and sampling parameters. | ||
This is here as an example and will be replace with automatic endpoint generation. | ||
Args: | ||
llm_request (LLMRequest): The LLM request. It contains the prompt and sampling parameters. | ||
Returns: | ||
AanaJSONResponse: The response containing the output of the pipeline and the execution time. | ||
""" | ||
prompt = llm_request.prompt | ||
sampling_params = llm_request.sampling_params | ||
|
||
output, execution_time = await run_pipeline( | ||
self.pipeline, | ||
{"prompt": prompt, "sampling_params": sampling_params}, | ||
["vllm_llama2_7b_chat_output"], | ||
) | ||
output["execution_time"] = execution_time | ||
return AanaJSONResponse(content=output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from typing import Any, Optional | ||
from fastapi.responses import JSONResponse | ||
import orjson | ||
|
||
|
||
class AanaJSONResponse(JSONResponse): | ||
""" | ||
A JSON response class that uses orjson to serialize data. | ||
It has additional support for numpy arrays. | ||
""" | ||
|
||
media_type = "application/json" | ||
option = None | ||
|
||
def __init__(self, option: Optional[int] = orjson.OPT_SERIALIZE_NUMPY, **kwargs): | ||
""" | ||
Initialize the response class with the orjson option. | ||
""" | ||
self.option = option | ||
super().__init__(**kwargs) | ||
|
||
def render(self, content: Any) -> bytes: | ||
""" | ||
Override the render method to use orjson.dumps instead of json.dumps. | ||
""" | ||
return orjson.dumps(content, option=self.option) |
Empty file.
Oops, something went wrong.