Skip to content

Commit

Permalink
feat(client): support dynamic host URLs and upgrade ONNXRuntime (#18)
Browse files Browse the repository at this point in the history
* fix: remove FocoosEnvHostUrl

* feat: update onnxruntime to 1.20.1

* fix: use github runners instead of self hosted
  • Loading branch information
CuriousDolphin authored Nov 29, 2024
1 parent f2e6eb1 commit 5e26911
Show file tree
Hide file tree
Showing 15 changed files with 92 additions and 692 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
workflow_dispatch:
jobs:
release:
runs-on: actions-runner-cpu
runs-on: ubuntu-22.04
permissions:
contents: write
steps:
Expand All @@ -33,11 +33,10 @@ jobs:
file_pattern: pyproject.toml
# Step 4: Attendere che il commit sia sincronizzato su GitHub
- name: Wait for commit sync
run: sleep 15
run: sleep 10
- uses: actions/checkout@v4
# Step 5: Creare e taggare la nuova versione
- name: Tag version

id: tag_version
uses: mathieudutour/[email protected]
with:
Expand All @@ -49,4 +48,4 @@ jobs:
tag: ${{ steps.tag_version.outputs.new_tag }}
name: Release ${{ steps.tag_version.outputs.new_tag }}
body: ${{ steps.tag_version.outputs.changelog }}
generateReleaseNotes: true
generateReleaseNotes: true
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ env:
AWS_REGION: eu-west-1
jobs:
Run-test:
runs-on: actions-runner-cpu
runs-on: ubuntu-22.04
permissions:
id-token: write # This is required for requesting the JWT
contents: read
Expand All @@ -24,7 +24,7 @@ jobs:
python-version: "3.12"
cache: "pip"
- name: Install dependencies
run: make install-dev
run: pip install .[dev]
- name: Run test
run: make test
- name: Pytest coverage comment
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,4 @@ ipython_config.py
notebooks/.data
.venv
/data
tests/junit.xml
tests/junit.xml
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"python.analysis.typeCheckingMode": "off",
"python.analysis.typeCheckingMode": "basic",
"python.analysis.autoImportCompletions": true
}
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
install:
@pip install . --no-cache-dir
install-dev:
@pip install -e ".[inference,dev]" --no-cache-dir
@pip install -e ".[dev]" --no-cache-dir

install-pre-commit:
@pre-commit install


lint:
@isort . --profile=black
@black .
Expand Down
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,29 @@

# Focoos SDK


## Requirements
For **local inference**, ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1.

To install cuDNN 9:
```bash
apt-get -y install cudnn9-cuda-12
```

To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed.

# Install

```bash
pip install .
```

- local inference (CPU, COREML): `pip install .[inference]`
- local inference with GPU (CUDA): `pip install .[inference-gpu]`
- local inference (CPU, COREML): `pip install .`
- local inference with GPU (CUDA): `pip install .[gpu]`


## 🤖 Cloud Inference

```python
from focoos import Focoos

Expand Down
4 changes: 2 additions & 2 deletions focoos/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

from pydantic_settings import BaseSettings

from focoos.ports import FocoosEnvHostUrl, RuntimeTypes
from focoos.ports import PROD_API_URL, RuntimeTypes


class FocoosConfig(BaseSettings):
focoos_api_key: Optional[str] = None
default_host_url: FocoosEnvHostUrl = FocoosEnvHostUrl.PROD
default_host_url: str = PROD_API_URL
runtime_type: RuntimeTypes = RuntimeTypes.ONNX_CUDA32
warmup_iter: int = 2
12 changes: 2 additions & 10 deletions focoos/focoos.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,13 @@
import os
from typing import Optional, Union

import requests
from supervision import Detections
from tqdm import tqdm

from focoos.config import FocoosConfig
from focoos.local_model import LocalModel
from focoos.ports import (
DatasetMetadata,
FocoosEnvHostUrl,
ModelMetadata,
ModelPreview,
ModelStatus,
)
from focoos.ports import DatasetMetadata, ModelMetadata, ModelPreview, ModelStatus
from focoos.remote_model import RemoteModel
from focoos.utils.logger import get_logger, setup_logging
from focoos.utils.logger import setup_logging
from focoos.utils.system import HttpClient

logger = setup_logging()
Expand Down
11 changes: 4 additions & 7 deletions focoos/ports.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
r"^s3://" r"(?P<bucket>[a-zA-Z0-9.-]+)/" r"(?P<path>.+(\.tar\.gz|\.zip)?)$"
)

DEV_API_URL = "https://api.dev.focoos.ai/v0"
PROD_API_URL = "https://api.focoos.ai/v0"
LOCAL_API_URL = "http://localhost:8501/v0"


class FocoosBaseModel(BaseModel):
@classmethod
Expand All @@ -23,12 +27,6 @@ def from_json(cls, data: Union[str, dict]):
return cls.model_validate(data_dict)


class FocoosEnvHostUrl(str, Enum):
DEV = "https://api.dev.focoos.ai/v0"
PROD = "https://api.focoos.ai/v0"
LOCAL = "http://localhost:8501/v0"


class DeploymentMode(str, Enum):
LOCAL = "local"
REMOTE = "remote"
Expand Down Expand Up @@ -236,7 +234,6 @@ class LatencyMetrics:

class RuntimeTypes(str, Enum):
ONNX_CUDA32 = "onnx_cuda32"
ONNX_CUDA16 = "onnx_cuda16"
ONNX_TRT32 = "onnx_trt32"
ONNX_TRT16 = "onnx_trt16"
ONNX_CPU = "onnx_cpu"
Expand Down
10 changes: 3 additions & 7 deletions focoos/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def __init__(
)
)
elif opts.coreml and "CoreMLExecutionProvider" in available_providers:
# # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
providers.append("CoreMLExecutionProvider")
# # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
providers.append("CoreMLExecutionProvider")
else:
binding = None

Expand Down Expand Up @@ -270,7 +270,7 @@ def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
max=round(durations.max(), 3),
min=round(durations.min(), 3),
std=round(durations.std(), 3),
im_size=size,
im_size=size[0],
device="",
)
self.logger.info(f"🔥 FPS: {metrics.fps}")
Expand All @@ -287,10 +287,6 @@ def get_runtime(
opts = OnnxEngineOpts(
cuda=True, verbose=False, fp16=False, warmup_iter=warmup_iter
)
elif runtime_type == RuntimeTypes.ONNX_CUDA16:
opts = OnnxEngineOpts(
cuda=True, verbose=False, fp16=True, warmup_iter=warmup_iter
)
elif runtime_type == RuntimeTypes.ONNX_TRT32:
opts = OnnxEngineOpts(
cuda=False, verbose=False, trt=True, fp16=False, warmup_iter=warmup_iter
Expand Down
8 changes: 3 additions & 5 deletions gradio/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@
from dotenv import load_dotenv

import gradio as gr
from focoos import Focoos, FocoosEnvHostUrl
from focoos import Focoos

load_dotenv()
ASSETS_DIR = os.path.dirname(os.path.abspath(__file__)) + "/assets"

focoos_models = []
focoos = Focoos(api_key=os.getenv("FOCOOS_API_KEY"), host_url=FocoosEnvHostUrl.PROD)
focoos = Focoos(api_key=os.getenv("FOCOOS_API_KEY"))
focoos_models = [
model["ref"]
for model in focoos.list_focoos_models()
if model["status"] == "DEPLOYED"
model.ref for model in focoos.list_focoos_models() if model.status == "DEPLOYED"
]
loaded_models = {}
image_examples = [
Expand Down
Loading

0 comments on commit 5e26911

Please sign in to comment.