diff --git a/Makefile b/Makefile index e462050a..9269a3a0 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ fvt: ginkgo -v -procs=2 --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m fvt-stable: - ginkgo -v -procs=2 --progress --fail-fast fvt/predictor fvt/scaleToZero fvt/storage --timeout=50m + ginkgo -v -procs=2 --fail-fast fvt/predictor fvt/scaleToZero fvt/storage fvt/hpa --timeout=50m .PHONY: codegen-fvt ## Regenerate grpc code stubs for FVT diff --git a/main.go b/main.go index ce2cfcf0..055d5831 100644 --- a/main.go +++ b/main.go @@ -290,8 +290,8 @@ func main() { enablePprof := os.Getenv(EnablePprof) if enablePprof != "" { - // Enable PPROF - setupLog.Info("Started PPROF HTTP server", "host","","port","9999") + // Enable PPROF + setupLog.Info("Started PPROF HTTP server", "host", "", "port", "9999") go func() { var username string var password string diff --git a/opendatahub/docs/generate-manifests.md b/opendatahub/docs/generate-manifests.md index 963bf5ed..b5667efe 100644 --- a/opendatahub/docs/generate-manifests.md +++ b/opendatahub/docs/generate-manifests.md @@ -1,21 +1,22 @@ # How to use gen scripts There are 3 gen scripts to support generating new manifests. + - [gen_odh_model_manifests.sh](../scripts/gen_odh_model_manifests.sh) - [gen_odh_modelmesh_manifests.sh](../scripts/gen_odh_modelmesh_manifests.sh) - [gen_copy_new_manifests.sh](../scripts/gen_copy_new_manifests.h) To simplify, here's how the script works. -First, the above scripts generates new manifests and compares it with existing manifest without touching the existing manifests. If there are any differences, [gen_copy_new_manifests.sh](../scripts/gen_copy_new_manifests.h) copies the new manifests under the odh-manifests folder,then runs the fvt test, and if there are any problems, you have to manually modify the new manifests to make them work. If you modify the new manifests, you must also update odh-manifests/model-mesh_template or model-mesh_template_stable. +First, the above scripts generates new manifests and compares it with existing manifest without touching the existing manifests. If there are any differences, [gen_copy_new_manifests.sh](../scripts/gen_copy_new_manifests.h) copies the new manifests under the odh-manifests folder,then runs the fvt test, and if there are any problems, you have to manually modify the new manifests to make them work. If you modify the new manifests, you must also update odh-manifests/model-mesh_template or model-mesh_template_stable. ## Common The temporary folder name is stored in this file (opendatahub/scripts/.temp_new_modelmesh_manifests). If this file exist, the folder name will be reused or a new file will be recreated. `gen_odh_model_manifests.sh` and `gen_odh_modelmesh_manifests.sh` have an option(-n, --create-new-dir) to delete the file to recreate. -~~~ +``` cat opendatahub/scripts/.temp_new_modelmesh_manifests modelmesh-20230608061686254480 -~~~ +``` ## [gen_odh_model_manifests.sh](../scripts/gen_odh_model_manifests.sh) @@ -23,7 +24,7 @@ This script clones a specific branch of the odh-model-controller repository and **Script Usage** -~~~ +``` $ opendatahub/scripts/gen_odh_model_manifests.sh --help usage: opendatahub/scripts/gen_odh_model_manifests.sh [flags] @@ -33,28 +34,31 @@ Flags: -n, --create-new-dir (optional) Use a new directory. By default, it uses the existing directory if it exists (default false). Generate odh-manifest for odh-modelmesh-controller -~~~ +``` **Use Cases - main branch** Create a new temp folder name and generate a new odh-model-controller with main branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template` -~~~ + +``` opendatahub/scripts/gen_odh_model_manifests.sh -n -~~~ +``` **Use Cases - stable branch** Create a new temp folder name and generate a new odh-model-controller with custom branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template_stable` -~~~ + +``` opendatahub/scripts/gen_odh_model_manifests.sh -p -b release-v0.11.0-alpha -n -~~~ +``` **Use Cases - custom branch** Create a new temp folder name and generate a new odh-model-controller with custom branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template` -~~~ + +``` opendatahub/scripts/gen_odh_model_manifests.sh -b release-v0.11.0-alpha -n -~~~ +``` ## [gen_odh_modelmesh_manifests.sh](../scripts/gen_odh_modelmesh_manifests.sh) @@ -62,7 +66,7 @@ This script clones a specific branch of the odh-modelmesh-controller repository **Script Usage** -~~~ +``` $ opendatahub/scripts/gen_odh_modelmesh_manifests.sh --help usage: opendatahub/scripts/gen_odh_modelmesh_manifests.sh [flags] @@ -73,38 +77,42 @@ Flags: -c, --copy-current-config-dir (optional) Use a current config directory to compare. By default, it uses the existing config directory instead of cloning git repository (default false). Generate odh-manifest for odh-modelmesh-controller -~~~ +``` **Use Cases - main branch** Create a new temp folder name and generate a new odh-modelmesh-controller with main branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template` -~~~ + +``` opendatahub/scripts/gen_odh_modelmesh_manifests.sh -n -~~~ +``` **Use Cases - stable branch** Create a new temp folder name and generate a new odh-modelmesh-controller with custom branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template_stable` -~~~ + +``` opendatahub/scripts/gen_odh_modelmesh_manifests.sh -p -b release-v0.11.0-alpha -n -~~~ +``` **Use Cases - custom branch** Create a new temp folder name and generate a new odh-modelmesh-controller with custom branch. Customize the manifests with `opendatahub/odh-manifests/model-mesh_template` -~~~ + +``` opendatahub/scripts/gen_odh_modelmesh_manifests.sh -b release-v0.11.0-alpha -n -~~~ +``` ## [gen_copy_new_manifests.sh](../scripts/gen_copy_new_manifests.h) This script does the following: + - Move `opendatahub/odh-manifests/model-mesh` to `opendatahub/odh-manifests/model-mesh-ori` - Copy `/tmp/modelmesh-XXXX` to `opendatahub/odh-manifests/model-mesh` or `opendatahub/odh-manifests/model-mesh_stable` - + **Script Usage** -~~~ +``` $ opendatahub/scripts/gen_copy_new_manifests.sh --help usage: opendatahub/scripts/gen_copy_new_manifests.sh [flags] @@ -112,18 +120,20 @@ Flags: -p, --stable-manifests (optional) Use stable manifests. By default, it will use the latest manifests (default false). Copy the generated new odh-manifest to opendatahub/odh-manifests/model-mesh,model-mesh_stable -~~~ +``` **Use Cases - main branch** Move `opendatahub/odh-manifests/model-mesh` to `opendatahub/odh-manifests/model-mesh_ori` and move `/tmp/modelmesh-XXX` to `opendatahub/odh-manifests/model-mesh`. -~~~ + +``` opendatahub/scripts/gen_copy_new_manifests.sh -~~~ +``` **Use Cases - stable branch** Move `opendatahub/odh-manifests/model-mesh_stable` to `opendatahub/odh-manifests/model-mesh_stable_ori` and move `/tmp/modelmesh-XXX` to `opendatahub/odh-manifests/model-mesh_stable`. -~~~ -opendatahub/scripts/gen_copy_new_manifests.sh -p -~~~ + +``` +opendatahub/scripts/gen_copy_new_manifests.sh -p +``` diff --git a/opendatahub/odh-manifests/model-mesh/base/params.env b/opendatahub/odh-manifests/model-mesh/base/params.env index 84af5752..c2f7c5f4 100644 --- a/opendatahub/odh-manifests/model-mesh/base/params.env +++ b/opendatahub/odh-manifests/model-mesh/base/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0-rc0 -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0-rc0 -odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0-rc0 +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:fast +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:fast +odh-modelmesh=quay.io/opendatahub/modelmesh:fast odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0-rc0 -odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0-rc0 +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:fast +odh-model-controller=quay.io/opendatahub/odh-model-controller:fast diff --git a/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml b/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml index 180d0e98..50e78230 100644 --- a/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml +++ b/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml @@ -3,13 +3,13 @@ kind: ClusterRole metadata: name: kserve-prometheus-k8s rules: -- apiGroups: - - "" - resources: - - services - - endpoints - - pods - verbs: - - get - - list - - watch + - apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/role.yaml b/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/role.yaml index e24b76eb..46f45d50 100644 --- a/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/role.yaml +++ b/opendatahub/odh-manifests/model-mesh/odh-model-controller/rbac/role.yaml @@ -4,204 +4,204 @@ kind: ClusterRole metadata: name: odh-model-controller-role rules: -- apiGroups: - - "" - resources: - - configmaps - - endpoints - - namespaces - - pods - - secrets - - serviceaccounts - - services - verbs: - - create - - get - - list - - patch - - update - - watch -- apiGroups: - - maistra.io - resources: - - servicemeshcontrolplanes - verbs: - - create - - get - - list - - patch - - update - - use - - watch -- apiGroups: - - maistra.io - resources: - - servicemeshmemberrolls - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - maistra.io - resources: - - servicemeshmembers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - maistra.io - resources: - - servicemeshmembers/finalizers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - monitoring.coreos.com - resources: - - podmonitors - - servicemonitors - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - networking.istio.io - resources: - - virtualservices - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - networking.istio.io - resources: - - virtualservices/finalizers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - networking.k8s.io - resources: - - networkpolicies - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - clusterrolebindings - - rolebindings - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - route.openshift.io - resources: - - routes - verbs: - - create - - get - - list - - patch - - update - - watch -- apiGroups: - - security.istio.io - resources: - - peerauthentications - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - serving.kserve.io - resources: - - inferenceservices - verbs: - - get - - list - - watch -- apiGroups: - - serving.kserve.io - resources: - - inferenceservices/finalizers - verbs: - - get - - list - - update - - watch -- apiGroups: - - serving.kserve.io - resources: - - servingruntimes - verbs: - - create - - get - - list - - update - - watch -- apiGroups: - - serving.kserve.io - resources: - - servingruntimes/finalizers - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - telemetry.istio.io - resources: - - telemetries - verbs: - - create - - delete - - get - - list - - patch - - update - - watch + - apiGroups: + - "" + resources: + - configmaps + - endpoints + - namespaces + - pods + - secrets + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - maistra.io + resources: + - servicemeshcontrolplanes + verbs: + - create + - get + - list + - patch + - update + - use + - watch + - apiGroups: + - maistra.io + resources: + - servicemeshmemberrolls + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - maistra.io + resources: + - servicemeshmembers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - maistra.io + resources: + - servicemeshmembers/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - servicemonitors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.istio.io + resources: + - virtualservices + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.istio.io + resources: + - virtualservices/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + - rolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - route.openshift.io + resources: + - routes + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - security.istio.io + resources: + - peerauthentications + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - serving.kserve.io + resources: + - inferenceservices + verbs: + - get + - list + - watch + - apiGroups: + - serving.kserve.io + resources: + - inferenceservices/finalizers + verbs: + - get + - list + - update + - watch + - apiGroups: + - serving.kserve.io + resources: + - servingruntimes + verbs: + - create + - get + - list + - update + - watch + - apiGroups: + - serving.kserve.io + resources: + - servingruntimes/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - telemetry.istio.io + resources: + - telemetries + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml new file mode 100644 index 00000000..50e78230 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-model-controller/rbac/kserve_prometheus_clusterrole.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kserve-prometheus-k8s +rules: + - apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dashboard/ModelMeshMetricsDashboard.json b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dashboard/ModelMeshMetricsDashboard.json new file mode 100644 index 00000000..6f1e5636 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dashboard/ModelMeshMetricsDashboard.json @@ -0,0 +1,4017 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.3.2" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 81, + "panels": [], + "title": "Global Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 86400, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 6, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Age at Eviction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2CC0C", + "mode": "fixed" + } + }, + { + "id": "custom.drawStyle", + "value": "points" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 1 + }, + "id": 82, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(modelmesh_instance_lru_age_seconds{namespace=\"$namespace\",pod=~\"$servicename-.*\"})", + "interval": "", + "legendFormat": "LRU Age", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(rate(modelmesh_age_at_eviction_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval])/rate(modelmesh_age_at_eviction_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/1000", + "hide": false, + "interval": "", + "legendFormat": "Age at Eviction", + "range": true, + "refId": "B" + } + ], + "title": "Global LRU age", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "triton capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mlserver capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "triton usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mlserver usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "total usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "total capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 1 + }, + "id": 83, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(modelmesh_instance_used_bytes{namespace=\"$namespace\",pod=~\"$servicename-.*\"}/1024)", + "interval": "", + "legendFormat": "total usage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(modelmesh_instance_capacity_bytes{namespace=\"$namespace\",pod=~\"$servicename-.*\"}/1024)", + "hide": false, + "interval": "", + "legendFormat": "total capacity", + "range": true, + "refId": "B" + } + ], + "title": "Cluster capacity and utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 73, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "count(container_memory_usage_bytes{namespace=\"$namespace\",pod=~\"$servicename-.*\",container=\"$mm_container\"})", + "interval": "", + "legendFormat": "Count", + "refId": "A" + } + ], + "title": "Number of Pods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "models with load failure" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "palette-classic" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 84, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(modelmesh_models_managed_total{namespace=\"$namespace\",pod=~\"$servicename-.*\"})", + "interval": "", + "legendFormat": "Managed", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(modelmesh_models_loaded_total{namespace=\"$namespace\",pod=~\"$servicename-.*\"})", + "hide": false, + "interval": "", + "legendFormat": "Loaded", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(modelmesh_models_with_failure_total{namespace=\"$namespace\",pod=~\"$servicename-.*\"})", + "hide": false, + "interval": "", + "legendFormat": "Failed", + "range": true, + "refId": "C" + } + ], + "title": "Model Counts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 71, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(modelmesh_instance_models_total{namespace=\"$namespace\",pod=~\"$servicename-.*\"}, \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "Model Counts per Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of processed request per second", + "axisPlacement": "left", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 86, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(modelmesh_api_request_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "External API", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(modelmesh_invoke_model_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "Internal API", + "range": true, + "refId": "B" + } + ], + "title": "Inference API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 87, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rate(modelmesh_request_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/avg(rate(modelmesh_request_size_bytes_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Request Size", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "avg(rate(modelmesh_response_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/avg(rate(modelmesh_response_size_bytes_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "Response Size", + "refId": "B" + } + ], + "title": "Average Inference Request and Response Sizes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of processed request per second", + "axisPlacement": "left", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 62, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(sum by (pod)(rate(modelmesh_api_request_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[1m])), \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")", + "hide": false, + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "External Inference API Request Rate Per Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(rate(modelmesh_api_request_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\",code=\"OK\"}[$__rate_interval])/rate(modelmesh_api_request_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\",code=\"OK\"}[$__rate_interval]), \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "External Inference API Response Times by Pod (excluding errors)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of processed request per second", + "axisPlacement": "left", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 30 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(rate(modelmesh_invoke_model_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]), \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")", + "hide": false, + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "Internal Inference API Requests Rate Per Pod", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 30 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(\n rate(modelmesh_invoke_model_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\",code=\"OK\"}[$__rate_interval]) /\n rate(modelmesh_invoke_model_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\",code=\"OK\"}[$__rate_interval]),\n \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "Internal Inference API Response Time By Pod (excluding errors)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 30 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(rate(modelmesh_req_queue_delay_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[5m])/rate(modelmesh_req_queue_delay_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[5m]), \"short_podname\", \"$1\", \"pod\", \"$servicename-(.*)\")\n", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + } + ], + "title": "Inference API Queue Delay Time By Pod", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of missed cache models", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cache misses" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": ["Cache Misses"], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 39 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(modelmesh_cache_miss_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "interval": "10m", + "legendFormat": "Cache Misses", + "range": true, + "refId": "A" + } + ], + "title": "Cache Misses (per 10min)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Cache misses as percentage of requests", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cache misses" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 8, + "y": 39 + }, + "id": 74, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(modelmesh_cache_miss_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/sum(increase(modelmesh_api_request_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "20m", + "legendFormat": "Cache Miss Rate", + "range": true, + "refId": "A" + } + ], + "title": "Cache Miss Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Average request delay due to cache miss", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 17, + "y": 39 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "avg(rate(modelmesh_cache_miss_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[5m]))/avg(rate(modelmesh_cache_miss_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[5m]))", + "interval": "", + "legendFormat": "Cache miss delay", + "refId": "A" + } + ], + "title": "Cache Miss Delay (average)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of model loading per second", + "axisPlacement": "auto", + "axisSoftMax": 5, + "axisSoftMin": -5, + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 29, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Model Unloads" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model Evictions" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Load Failures" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 9, + "x": 0, + "y": 47 + }, + "id": 88, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(modelmesh_loadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "format": "time_series", + "interval": "5m", + "intervalFactor": 1, + "legendFormat": "Model Loads", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "-sum(increase(modelmesh_unloadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "5m", + "intervalFactor": 1, + "legendFormat": "Model Unloads", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "-sum(increase(modelmesh_age_at_eviction_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "5m", + "intervalFactor": 1, + "legendFormat": "Model Evictions", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(modelmesh_loadmodel_failure{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "5m", + "intervalFactor": 1, + "legendFormat": "Load Failures", + "range": true, + "refId": "D" + } + ], + "title": "Model Loads/Unloads (per 5min)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 7, + "x": 9, + "y": 47 + }, + "id": 89, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rate(modelmesh_loaded_model_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/avg(rate(modelmesh_loaded_model_size_bytes_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "Loaded Model Size", + "range": true, + "refId": "A" + } + ], + "title": "Loaded Model Sizes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 47 + }, + "id": 90, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rate(modelmesh_loadmodel_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/avg(rate(modelmesh_loadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Loading Time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rate(modelmesh_model_sizing_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))/avg(rate(modelmesh_model_sizing_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-.*\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "Sizing Time", + "range": true, + "refId": "B" + } + ], + "title": "Model Loading Times", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 79, + "panels": [], + "title": "Deployment Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "deckbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "triton capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mlserver capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "triton usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "mlserver usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "total usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "total capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 58 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " sum by (deployment) ( \n\tlabel_replace(\n sum by (pod) (modelmesh_instance_used_bytes{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}/1024),\n \"deployment\",\n \"$2 usage\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (deployment) ( \n\tlabel_replace(\n sum by (pod) (modelmesh_instance_capacity_bytes{namespace=\"$namespace\",pod=~\"servicename-$runtime-.*\"}/1024),\n \"deployment\",\n \"$2 capacity\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + } + ], + "title": "Cluster capacity and utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "models with load failure" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "palette-classic" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 58 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (deployment) ( \n\tlabel_replace(\n sum by (pod) (modelmesh_models_with_failure_total{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}) ,\n \"deployment\",\n \"$2-failed models\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n ))", + "hide": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (deployment) ( \n\tlabel_replace(\n sum by (pod) (modelmesh_instance_models_total{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}),\n \"deployment\",\n \"$2-loaded models\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Model Counts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 58 + }, + "id": 91, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (deployment) (\n label_replace(\n count by (pod) (container_memory_usage_bytes{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\",container=\"$mm_container\"}),\n \"deployment\",\n \"$2\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)\n", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Number of Pods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of processed request per second", + "axisPlacement": "left", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 21, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 65 + }, + "id": 92, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (deployment) ( \n\tlabel_replace(\n sum by (pod) (rate(modelmesh_invoke_model_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval])),\n \"deployment\",\n \"$2 external request\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n ))", + "hide": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Inference API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 65 + }, + "id": 65, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_request_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 request size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )\n/\n avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_request_size_bytes_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 request size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_response_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 response size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )\n/\n avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_response_size_bytes_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 response size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Average Inference Request and Response Sizes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Number of model loading per second", + "axisPlacement": "auto", + "axisSoftMax": 5, + "axisSoftMin": -5, + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 29, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Model Unloads" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model Evictions" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Load Failures" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 9, + "x": 0, + "y": 72 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (deployment) ( \n\tlabel_replace(\nsum by (pod) (increase(modelmesh_loadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval])),\n \"deployment\",\n \"$2 loads\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "interval": "5m", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "-sum by (deployment) ( \n\tlabel_replace(\nsum by (pod) (increase(modelmesh_unloadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval])),\n \"deployment\",\n \"$2 unloads\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "interval": "5m", + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "-sum by (deployment) ( \n\tlabel_replace(\nsum by (pod) (increase(modelmesh_age_at_eviction_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval])),\n \"deployment\",\n \"$2 evictions\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "interval": "5m", + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (deployment) ( \n\tlabel_replace(\nsum by (pod) (increase(modelmesh_loadmodel_failure{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval])),\n \"deployment\",\n \"$2 failures\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)", + "hide": false, + "interval": "5m", + "legendFormat": "__auto", + "range": true, + "refId": "D" + } + ], + "title": "Model Loads/Unloads (per 5min)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 7, + "x": 9, + "y": 72 + }, + "id": 61, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_loaded_model_size_bytes_sum{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 model size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)\n\n/\n\navg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_loaded_model_size_bytes_count{namespace=\"modelmesh-serving\",pod=~\"modelmesh-serving-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 model size\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n)\n\n", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Loaded Model Sizes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 72 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_loadmodel_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 loading time\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )\n/\n avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_loadmodel_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 loading time\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_model_sizing_milliseconds_sum{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 sizing time\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )\n/\n avg by (deployment) (\n label_replace(\n avg by (pod) (\n rate(\n modelmesh_model_sizing_milliseconds_count{namespace=\"$namespace\",pod=~\"$servicename-$runtime-.*\"}[$__rate_interval]\n )\n ),\n \"deployment\",\n \"$2 sizing time\",\n \"pod\",\n \"(modelmesh-serving)-(.*?)-(.*)\"\n )\n )", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Model Loading Times", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 82 + }, + "id": 69, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 2, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 3, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "core" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Allocation" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 48, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\",pod=~\"$servicename-.*\", container=\"$mm_container\"}[$__rate_interval]), \"short_podname\", \"$1\", \"pod\",\"$servicename-(.*)\")", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "avg(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\",pod=~\"$servicename-.*\",container=\"$mm_container\"})", + "hide": false, + "interval": "", + "legendFormat": "Allocation", + "refId": "B" + } + ], + "title": "ModelMesh Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Memory Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 50, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "label_replace(container_memory_usage_bytes{namespace=\"$namespace\",pod=~\"$servicename-.*\", container=\"$mm_container\"}/1024/1024,\"short_podname\", \"$1\", \"pod\",\"$servicename-(.*)\")", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + }, + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "avg(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\",container=\"$mm_container\",pod=~\"$servicename-.*\"}/1024/1024)", + "hide": false, + "interval": "", + "legendFormat": "Allocation", + "refId": "B" + } + ], + "title": "Model Mesh Container Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 2, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": false, + "expr": "rate(container_cpu_usage_seconds_total{namespace=\"$namespace\",pod=~\"$servicename-.*\",container!=\"$mm_container\",container!=\"\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "{{container}}", + "refId": "A" + }, + { + "datasource": { + "0": "P", + "1": "1", + "2": "8", + "3": "0", + "4": "9", + "5": "F", + "6": "7", + "7": "C", + "8": "D", + "9": "0", + "10": "C", + "11": "7", + "12": "5", + "13": "A", + "14": "C", + "15": "F", + "16": "3" + }, + "exemplar": true, + "expr": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\",pod=~\"$servicename-.*\",container!=\"$mm_container\",container!=\"\"}", + "hide": false, + "interval": "", + "legendFormat": "{{container}}-alloc", + "refId": "B" + } + ], + "title": "Serving Runtime Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Memory Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 77, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "label_replace(container_memory_usage_bytes{namespace=\"$namespace\",pod=~\"$servicename-.*\",container!=\"$mm_container\",container!=\"\"}/1024/1024,\"short_podname\", \"$1\", \"pod\",\"$servicename-(.*)\")", + "interval": "", + "legendFormat": "{{short_podname}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "avg(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\",container!=\"$mm_container\",container!=\"\",pod=~\"$servicename-.*\"}/1024/1024)", + "hide": false, + "interval": "", + "legendFormat": "Allocation", + "refId": "B" + } + ], + "title": "Serving Runtime Container Memory Usage", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Container Resource Utilization", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "my-namespace", + "value": "my-namespace" + }, + "hide": 0, + "name": "namespace", + "options": [ + { + "selected": true, + "text": "my-namespace", + "value": "my-namespace" + } + ], + "query": "my-namespace", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "my-service-name", + "value": "my-service-name" + }, + "hide": 0, + "name": "servicename", + "options": [ + { + "selected": true, + "text": "my-service-name", + "value": "my-service-name" + } + ], + "query": "my-service-name", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": "mm-runtime", + "value": "mm-runtime" + }, + "hide": 0, + "includeAll": false, + "label": "mm container", + "multi": false, + "name": "mm_container", + "options": [ + { + "selected": false, + "text": "mm", + "value": "mm" + }, + { + "selected": true, + "text": "mm-runtime", + "value": "mm-runtime" + }, + { + "selected": false, + "text": "modelmesh-runtime", + "value": "modelmesh-runtime" + } + ], + "query": "mm,mm-runtime,modelmesh-runtime", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": true, + "text": ["mlserver"], + "value": ["mlserver"] + }, + "hide": 0, + "includeAll": false, + "label": "runtime", + "multi": true, + "name": "runtime", + "options": [ + { + "selected": true, + "text": "mlserver", + "value": "mlserver" + }, + { + "selected": false, + "text": "triton", + "value": "triton" + }, + { + "selected": false, + "text": "ovms", + "value": "ovms" + }, + { + "selected": false, + "text": "torchserve", + "value": "torchserve" + } + ], + "query": "mlserver,triton,ovms,torchserve", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-2d", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "ModelMesh Dashboard", + "uid": "vMm_rt-7z-new", + "version": 1, + "weekStart": "" +} diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/prometheus/servicemonitor.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/prometheus/servicemonitor.yaml new file mode 100644 index 00000000..87c9e827 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/prometheus/servicemonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + modelmesh-service: modelmesh-serving + name: modelmesh-service-monitor + namespace: monitoring +spec: + endpoints: + - path: /metrics + port: "prometheus" + scheme: "https" + tlsConfig: + insecureSkipVerify: true + selector: + matchLabels: + modelmesh-service: modelmesh-serving + namespaceSelector: + matchNames: + - modelmesh-serving diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/rbac/common/networkpolicy-webhook.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/rbac/common/networkpolicy-webhook.yaml new file mode 100644 index 00000000..8c337ac2 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/rbac/common/networkpolicy-webhook.yaml @@ -0,0 +1,29 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: modelmesh-webhook +spec: + podSelector: + matchLabels: + app.kubernetes.io/managed-by: modelmesh-controller + control-plane: modelmesh-controller + ingress: + # exposed for webhook + - ports: + - port: 9443 + protocol: TCP + policyTypes: + - Ingress diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/runtimes/mlserver-1.x.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/runtimes/mlserver-1.x.yaml new file mode 100644 index 00000000..5acf187a --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/runtimes/mlserver-1.x.yaml @@ -0,0 +1,75 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: serving.kserve.io/v1alpha1 +kind: ClusterServingRuntime +metadata: + name: mlserver-1.x + labels: + name: modelmesh-serving-mlserver-1.x-SR +spec: + supportedModelFormats: + - name: sklearn + version: "0" # v0.23.1 + autoSelect: true + - name: xgboost + version: "1" # v1.1.1 + autoSelect: true + - name: lightgbm + version: "3" # v3.2.1 + autoSelect: true + + protocolVersions: + - grpc-v2 + multiModel: true + + grpcEndpoint: "port:8085" + grpcDataEndpoint: "port:8001" + + containers: + - name: mlserver + image: mlserver-1:replace + env: + - name: MLSERVER_MODELS_DIR + value: "/models/_mlserver_models/" + - name: MLSERVER_GRPC_PORT + value: "8001" + # default value for HTTP port is 8080 which conflicts with MMesh's + # Litelinks port + - name: MLSERVER_HTTP_PORT + value: "8002" + - name: MLSERVER_LOAD_MODELS_AT_STARTUP + value: "false" + # Set a dummy model name via environment so that MLServer doesn't + # error on a RepositoryIndex call when no models exist + - name: MLSERVER_MODEL_NAME + value: dummy-model-fixme + # Set server addr to localhost to ensure MLServer only listen inside the pod + - name: MLSERVER_HOST + value: "127.0.0.1" + # Increase gRPC max message size to support larger payloads + # Unlimited because it will be restricted at the model mesh layer + - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH + value: "-1" + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "5" + memory: 1Gi + builtInAdapter: + serverType: mlserver + runtimeManagementPort: 8001 + memBufferBytes: 134217728 + modelLoadingTimeoutMillis: 90000 diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomization.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomization.yaml new file mode 100644 index 00000000..a189c7e5 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomization.yaml @@ -0,0 +1,21 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +resources: + - manifests.yaml + - service.yaml +configurations: + - kustomizeconfig.yaml +commonAnnotations: + service.beta.openshift.io/inject-cabundle: "true" diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomizeconfig.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomizeconfig.yaml new file mode 100644 index 00000000..487da1e6 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/kustomizeconfig.yaml @@ -0,0 +1,31 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the following config is for teaching kustomize where to look at when substituting vars. +# It requires kustomize v2.1.0 or newer to work properly. +nameReference: + - kind: Service + version: v1 + fieldSpecs: + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + +namespace: + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true + +varReference: + - path: metadata/annotations diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/manifests.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/manifests.yaml new file mode 100644 index 00000000..28debc1d --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/manifests.yaml @@ -0,0 +1,40 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: modelmesh-servingruntime.serving.kserve.io +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + caBundle: Cg== + service: + name: modelmesh-webhook-server-service + path: /validate-serving-modelmesh-io-v1alpha1-servingruntime + port: 9443 + failurePolicy: Fail + name: servingruntime.modelmesh-webhook-server.default + rules: + - apiGroups: + - serving.kserve.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - clusterservingruntimes + - servingruntimes + sideEffects: None diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/service.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/service.yaml new file mode 100644 index 00000000..3957d853 --- /dev/null +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/webhook/service.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: v1 +kind: Service +metadata: + name: modelmesh-webhook-server-service + annotations: + service.beta.openshift.io/serving-cert-secret-name: modelmesh-webhook-server-cert +spec: + ports: + - port: 9443 + protocol: TCP + targetPort: webhook + selector: + control-plane: modelmesh-controller diff --git a/opendatahub/odh-manifests/model-mesh_templates/base/params.env b/opendatahub/odh-manifests/model-mesh_templates/base/params.env index 84af5752..c2f7c5f4 100644 --- a/opendatahub/odh-manifests/model-mesh_templates/base/params.env +++ b/opendatahub/odh-manifests/model-mesh_templates/base/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0-rc0 -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0-rc0 -odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0-rc0 +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:fast +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:fast +odh-modelmesh=quay.io/opendatahub/modelmesh:fast odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0-rc0 -odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0-rc0 +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:fast +odh-model-controller=quay.io/opendatahub/odh-model-controller:fast diff --git a/opendatahub/odh-manifests/model-mesh_templates_stable/base/params.env b/opendatahub/odh-manifests/model-mesh_templates_stable/base/params.env index 30d7d266..c90a63ed 100644 --- a/opendatahub/odh-manifests/model-mesh_templates_stable/base/params.env +++ b/opendatahub/odh-manifests/model-mesh_templates_stable/base/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.10.0 -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0-alpha -odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0-alpha -odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-gpu -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0-alpha -odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0-alpha +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0 +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0 +odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0 +odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0 +odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0 diff --git a/opendatahub/scripts/gen-manifests/odh_modelmesh_manifests_stable.sh b/opendatahub/scripts/gen-manifests/odh_modelmesh_manifests_stable.sh index 104a5e10..ef27bb59 100755 --- a/opendatahub/scripts/gen-manifests/odh_modelmesh_manifests_stable.sh +++ b/opendatahub/scripts/gen-manifests/odh_modelmesh_manifests_stable.sh @@ -37,3 +37,25 @@ echo -n ".. Add trustAI option into config-defaults.yaml" yq eval '."payloadProcessors" = ""' -i ${MODELMESH_CONTROLLER_DIR}/default/config-defaults.yaml echo -e "\r ✓" +echo -n ".. Remove CertManager related from default/kustomization.yaml" +sed '/certmanager/d' -i ${MODELMESH_CONTROLLER_DIR}/default/kustomization.yaml + +licenseNum=$(grep -n vars ${MODELMESH_CONTROLLER_DIR}/default/kustomization.yaml |cut -d':' -f1) +configMapGeneratorStartLine=$(grep -n configMapGenerator ${MODELMESH_CONTROLLER_DIR}/default/kustomization.yaml |cut -d':' -f1) +configMapGeneratorBeforeLine=$((configMapGeneratorStartLine-1)) +sed -i "${licenseNum},${configMapGeneratorBeforeLine}d" ${MODELMESH_CONTROLLER_DIR}/default/kustomization.yaml + +# remove webhookcainjection_patch.yaml +sed -i '/webhookcainjection_patch.yaml/d' ${MODELMESH_CONTROLLER_DIR}/default/kustomization.yaml +echo -e "\r ✓" + +echo -n ".. Add serving-cert-secret-name to webhook/service.yaml" +yq eval '.metadata.annotations."service.beta.openshift.io/serving-cert-secret-name"="modelmesh-webhook-server-cert"' -i ${MODELMESH_CONTROLLER_DIR}/webhook/service.yaml +echo -e "\r ✓" + +echo -n ".. Add inject-cabundle into webhook/kustomization.yaml" +yq eval '.commonAnnotations += {"service.beta.openshift.io/inject-cabundle": "true"}' -i ${MODELMESH_CONTROLLER_DIR}/webhook/kustomization.yaml + +echo -n ".. Remove namespace " +sed '/namespace/d' -i ${MODELMESH_CONTROLLER_DIR}/webhook/service.yaml +echo -e "\r ✓" diff --git a/opendatahub/scripts/manifests/params.env b/opendatahub/scripts/manifests/params.env index 84af5752..c90a63ed 100644 --- a/opendatahub/scripts/manifests/params.env +++ b/opendatahub/scripts/manifests/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0-rc0 -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0-rc0 -odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0-rc0 +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0 +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0 +odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0 odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0-rc0 -odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0-rc0 +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0 +odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0 diff --git a/opendatahub/scripts/manifests/runtimes/kustomization.yaml b/opendatahub/scripts/manifests/runtimes/kustomization.yaml index 2ea7ce81..e89f335d 100644 --- a/opendatahub/scripts/manifests/runtimes/kustomization.yaml +++ b/opendatahub/scripts/manifests/runtimes/kustomization.yaml @@ -12,27 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. resources: -- triton-2.x.yaml -- mlserver-1.x.yaml -- ovms-1.x.yaml -- torchserve-0.x.yaml - - - + - triton-2.x.yaml + - mlserver-1.x.yaml + - ovms-1.x.yaml + - torchserve-0.x.yaml images: -- name: tritonserver-2 - newName: nvcr.io/nvidia/tritonserver - newTag: 23.04-py3 -- name: mlserver-1 - newName: seldonio/mlserver - newTag: 1.3.2 -- name: ovms-1 - newName: openvino/model_server - newTag: "2022.3" -- name: torchserve-0 - newName: pytorch/torchserve - newTag: 0.7.1-cpu + - name: tritonserver-2 + newName: nvcr.io/nvidia/tritonserver + newTag: 23.04-py3 + - name: mlserver-1 + newName: seldonio/mlserver + newTag: 1.3.2 + - name: ovms-1 + newName: openvino/model_server + newTag: "2022.3" + - name: torchserve-0 + newName: pytorch/torchserve + newTag: 0.7.1-cpu apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/scripts/download-images-on-nodes.sh b/scripts/download-images-on-nodes.sh index ce5084a5..6e778761 100755 --- a/scripts/download-images-on-nodes.sh +++ b/scripts/download-images-on-nodes.sh @@ -2,9 +2,9 @@ TRITON_SERVER=nvcr.io/nvidia/tritonserver:23.04-py3 ML_SERVER=seldonio/mlserver:1.3.2 OPENVINO=openvino/model_server:2022.3 TORCHSERVE=pytorch/torchserve:0.7.1-cpu -MODELMESH=kserve/modelmesh:v0.11.0-rc0 -MODELMESH_RUNTIME=kserve/modelmesh-runtime-adapter:v0.11.0-rc0 -REST_PROXY=kserve/rest-proxy:v0.11.0-rc0 +MODELMESH=kserve/modelmesh:v0.11.0 +MODELMESH_RUNTIME=kserve/modelmesh-runtime-adapter:v0.11.0 +REST_PROXY=kserve/rest-proxy:v0.11.0 # TODO - automation # TRITON_SERVER_IMG=nvcr.io/nvidia/tritonserver