diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7264b7f..001c6c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ files: "" repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v5.0.0 hooks: - id: check-ast - id: check-toml @@ -47,6 +47,6 @@ repos: hooks: - id: nbstripout - repo: https://github.com/hadialqattan/pycln # remove unused import - rev: v2.3.0 + rev: v2.4.0 hooks: - id: pycln diff --git a/README.md b/README.md index 57a2fe3..f9a4c86 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ ## Requirements +### CUDA 12 + For **local inference**, ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1. To install cuDNN 9: @@ -26,8 +28,14 @@ To install cuDNN 9: apt-get -y install cudnn9-cuda-12 ``` +### (Optional) TensorRT + To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed. +```bash +sudo apt-get install tensorrt +``` + # Install Nvidia GPU: diff --git a/notebooks/playground.ipynb b/notebooks/playground.ipynb index 0d54d04..817d75c 100644 --- a/notebooks/playground.ipynb +++ b/notebooks/playground.ipynb @@ -79,28 +79,48 @@ "We will load a model, deploy it locally, and then run inference on a sample image.\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available Runtime Types" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import os\n", + "from focoos.ports import RuntimeTypes\n", "\n", - "# os.environ[\"RUNTIME_TYPE\"] = \"onnx_trt16\"\n", - "from focoos import Focoos, DEV_API_URL\n", - "from focoos.config import FOCOOS_CONFIG\n", + "for runtime_type in RuntimeTypes:\n", + " print(runtime_type)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OnnxRuntime With CUDA (focoos_object365)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", "import os\n", "from pprint import pprint\n", "from supervision import plot_image\n", "\n", - "print(FOCOOS_CONFIG)\n", "focoos = Focoos(\n", " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", - " host_url=DEV_API_URL,\n", ")\n", "image_path = \"./assets/ade_val_034.jpg\"\n", - "model_ref = \"focoos_rtdetr\"\n", + "model_ref = \"focoos_object365\"\n", "\n", "\n", "model = focoos.get_local_model(model_ref)\n", @@ -115,6 +135,47 @@ "plot_image(preview)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OnnxRuntime With TensorRT (FP16) (focoos_object365)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from focoos import Focoos\n", + "import os\n", + "from pprint import pprint\n", + "from supervision import plot_image\n", + "\n", + "from focoos.ports import RuntimeTypes\n", + "\n", + "focoos = Focoos(\n", + " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", + ")\n", + "image_path = \"./assets/ade_val_034.jpg\"\n", + "model_ref = \"focoos_object365\"\n", + "\n", + "\n", + "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_TRT16)\n", + "\n", + "latency = model.benchmark(iterations=10, size=640)\n", + "pprint(latency)\n", + "# pprint(latency)\n", + "output, preview = model.infer(image_path, threshold=0.3, annotate=True)\n", + "pprint(output.detections)\n", + "pprint(output.latency)\n", + "\n", + "plot_image(preview)" + ] + }, { "cell_type": "markdown", "metadata": {},