feat(client): support dynamic host URLs and upgrade ONNXRuntime (#18)

* fix: remove FocoosEnvHostUrl * feat: update onnxruntime to 1.20.1 * fix: use github runners instead of self hosted
FocoosAI · Nov 29, 2024 · 5e26911 · 5e26911
1 parent f2e6eb1
commit 5e26911
Show file tree

Hide file tree

Showing 15 changed files with 92 additions and 692 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -6,7 +6,7 @@ on:
   workflow_dispatch:
 jobs:
   release:
-    runs-on: actions-runner-cpu
+    runs-on: ubuntu-22.04
     permissions:
       contents: write
     steps:
@@ -33,11 +33,10 @@ jobs:
           file_pattern: pyproject.toml
       # Step 4: Attendere che il commit sia sincronizzato su GitHub
       - name: Wait for commit sync
-        run: sleep 15
+        run: sleep 10
       - uses: actions/checkout@v4
       # Step 5: Creare e taggare la nuova versione
       - name: Tag version
-
         id: tag_version
         uses: mathieudutour/[email protected]
         with:
@@ -49,4 +48,4 @@ jobs:
           tag: ${{ steps.tag_version.outputs.new_tag }}
           name: Release ${{ steps.tag_version.outputs.new_tag }}
           body: ${{ steps.tag_version.outputs.changelog }}
-          generateReleaseNotes: true
+          generateReleaseNotes: true
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,7 +7,7 @@ env:
   AWS_REGION: eu-west-1
 jobs:
   Run-test:
-    runs-on: actions-runner-cpu
+    runs-on: ubuntu-22.04
     permissions:
       id-token: write # This is required for requesting the JWT
       contents: read
@@ -24,7 +24,7 @@ jobs:
           python-version: "3.12"
           cache: "pip"
       - name: Install dependencies
-        run: make install-dev
+        run: pip install .[dev]
       - name: Run test
         run: make test
       - name: Pytest coverage comment

diff --git a/.gitignore b/.gitignore
@@ -88,4 +88,4 @@ ipython_config.py
 notebooks/.data
 .venv
 /data
-tests/junit.xml
+tests/junit.xml
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,4 +1,4 @@
 {
-    "python.analysis.typeCheckingMode": "off",
+    "python.analysis.typeCheckingMode": "basic",
     "python.analysis.autoImportCompletions": true
 }
diff --git a/Makefile b/Makefile
@@ -3,12 +3,10 @@
 install:
 	@pip install . --no-cache-dir
 install-dev:
-	@pip install -e ".[inference,dev]" --no-cache-dir
+	@pip install -e ".[dev]" --no-cache-dir
 
 install-pre-commit:
 	@pre-commit install
-
-
 lint:
 	@isort . --profile=black
 	@black .

diff --git a/README.md b/README.md
@@ -17,16 +17,29 @@
 
 # Focoos SDK
 
+
+## Requirements
+For **local inference**, ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1.
+
+To install cuDNN 9:
+```bash
+apt-get -y install cudnn9-cuda-12
+```
+
+To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed.
+
 # Install
 
 ```bash
 pip install .
 ```
 
-- local inference (CPU, COREML): `pip install .[inference]`
-- local inference with GPU (CUDA): `pip install .[inference-gpu]`
+- local inference (CPU, COREML): `pip install .`
+- local inference with GPU (CUDA): `pip install .[gpu]`
+
 
 ## 🤖 Cloud Inference
+
 ```python
 from focoos import Focoos
 

diff --git a/focoos/config.py b/focoos/config.py
@@ -2,11 +2,11 @@
 
 from pydantic_settings import BaseSettings
 
-from focoos.ports import FocoosEnvHostUrl, RuntimeTypes
+from focoos.ports import PROD_API_URL, RuntimeTypes
 
 
 class FocoosConfig(BaseSettings):
     focoos_api_key: Optional[str] = None
-    default_host_url: FocoosEnvHostUrl = FocoosEnvHostUrl.PROD
+    default_host_url: str = PROD_API_URL
     runtime_type: RuntimeTypes = RuntimeTypes.ONNX_CUDA32
     warmup_iter: int = 2
diff --git a/focoos/focoos.py b/focoos/focoos.py
@@ -1,21 +1,13 @@
 import os
 from typing import Optional, Union
 
-import requests
-from supervision import Detections
 from tqdm import tqdm
 
 from focoos.config import FocoosConfig
 from focoos.local_model import LocalModel
-from focoos.ports import (
-    DatasetMetadata,
-    FocoosEnvHostUrl,
-    ModelMetadata,
-    ModelPreview,
-    ModelStatus,
-)
+from focoos.ports import DatasetMetadata, ModelMetadata, ModelPreview, ModelStatus
 from focoos.remote_model import RemoteModel
-from focoos.utils.logger import get_logger, setup_logging
+from focoos.utils.logger import setup_logging
 from focoos.utils.system import HttpClient
 
 logger = setup_logging()

diff --git a/focoos/ports.py b/focoos/ports.py
@@ -11,6 +11,10 @@
     r"^s3://" r"(?P<bucket>[a-zA-Z0-9.-]+)/" r"(?P<path>.+(\.tar\.gz|\.zip)?)$"
 )
 
+DEV_API_URL = "https://api.dev.focoos.ai/v0"
+PROD_API_URL = "https://api.focoos.ai/v0"
+LOCAL_API_URL = "http://localhost:8501/v0"
+
 
 class FocoosBaseModel(BaseModel):
     @classmethod
@@ -23,12 +27,6 @@ def from_json(cls, data: Union[str, dict]):
         return cls.model_validate(data_dict)
 
 
-class FocoosEnvHostUrl(str, Enum):
-    DEV = "https://api.dev.focoos.ai/v0"
-    PROD = "https://api.focoos.ai/v0"
-    LOCAL = "http://localhost:8501/v0"
-
-
 class DeploymentMode(str, Enum):
     LOCAL = "local"
     REMOTE = "remote"
@@ -236,7 +234,6 @@ class LatencyMetrics:
 
 class RuntimeTypes(str, Enum):
     ONNX_CUDA32 = "onnx_cuda32"
-    ONNX_CUDA16 = "onnx_cuda16"
     ONNX_TRT32 = "onnx_trt32"
     ONNX_TRT16 = "onnx_trt16"
     ONNX_CPU = "onnx_cpu"

diff --git a/focoos/runtime.py b/focoos/runtime.py
@@ -150,8 +150,8 @@ def __init__(
                 )
             )
         elif opts.coreml and "CoreMLExecutionProvider" in available_providers:
-        #     # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-              providers.append("CoreMLExecutionProvider")
+            #     # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+            providers.append("CoreMLExecutionProvider")
         else:
             binding = None
 
@@ -270,7 +270,7 @@ def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
             max=round(durations.max(), 3),
             min=round(durations.min(), 3),
             std=round(durations.std(), 3),
-            im_size=size,
+            im_size=size[0],
             device="",
         )
         self.logger.info(f"🔥 FPS: {metrics.fps}")
@@ -287,10 +287,6 @@ def get_runtime(
         opts = OnnxEngineOpts(
             cuda=True, verbose=False, fp16=False, warmup_iter=warmup_iter
         )
-    elif runtime_type == RuntimeTypes.ONNX_CUDA16:
-        opts = OnnxEngineOpts(
-            cuda=True, verbose=False, fp16=True, warmup_iter=warmup_iter
-        )
     elif runtime_type == RuntimeTypes.ONNX_TRT32:
         opts = OnnxEngineOpts(
             cuda=False, verbose=False, trt=True, fp16=False, warmup_iter=warmup_iter

diff --git a/gradio/app.py b/gradio/app.py
@@ -4,17 +4,15 @@
 from dotenv import load_dotenv
 
 import gradio as gr
-from focoos import Focoos, FocoosEnvHostUrl
+from focoos import Focoos
 
 load_dotenv()
 ASSETS_DIR = os.path.dirname(os.path.abspath(__file__)) + "/assets"
 
 focoos_models = []
-focoos = Focoos(api_key=os.getenv("FOCOOS_API_KEY"), host_url=FocoosEnvHostUrl.PROD)
+focoos = Focoos(api_key=os.getenv("FOCOOS_API_KEY"))
 focoos_models = [
-    model["ref"]
-    for model in focoos.list_focoos_models()
-    if model["status"] == "DEPLOYED"
+    model.ref for model in focoos.list_focoos_models() if model.status == "DEPLOYED"
 ]
 loaded_models = {}
 image_examples = [