From da1c8c80a5fe2e742c7df99abf62fcad032a18b5 Mon Sep 17 00:00:00 2001 From: MoritzWeber Date: Fri, 17 May 2024 22:34:00 +0200 Subject: [PATCH] feat(monitoring): Filter active sessions for session variables Filter active sessions by tool, tool version, connection method and session type. The value are available as annotations on the Kubernetes service, deployment and pod. Prometheus relabels the annotations to labels. In Grafana, a dropdown menu allows filtering. In addition, the dashboard also shows sessions with failed activity monitoring. --- .../capellacollab/core/database/migration.py | 3 + .../capellacollab/sessions/operators/k8s.py | 11 +- backend/capellacollab/sessions/routes.py | 16 +- .../k8s_operator/test_session_k8s_operator.py | 1 + .../sessions/test_session_environment.py | 9 +- .../tool-details/tool-details.component.ts | 7 +- helm/config/grafana/active-sessions.json | 150 ++++++++++++++++-- .../prometheus/prometheus.configmap.yaml | 73 +++++++-- .../prometheus/prometheus.deployment.yaml | 6 +- 9 files changed, 238 insertions(+), 38 deletions(-) diff --git a/backend/capellacollab/core/database/migration.py b/backend/capellacollab/core/database/migration.py index 7e9b6120f..0c7134465 100644 --- a/backend/capellacollab/core/database/migration.py +++ b/backend/capellacollab/core/database/migration.py @@ -133,6 +133,7 @@ def get_eclipse_session_configuration() -> ( connection=tools_models.ToolSessionConnection( methods=[ tools_models.GuacamoleConnectionMethod( + id="guacamole", name="Classic (Guacamole)", description=( "Old connection method using Guacamole. " @@ -143,6 +144,7 @@ def get_eclipse_session_configuration() -> ( environment={"CONNECTION_METHOD": "xrdp"}, ), tools_models.HTTPConnectionMethod( + id="xpra", name="Experimental (Xpra)", description=( "Experimental connection method using Xpra. " @@ -280,6 +282,7 @@ def create_jupyter_tool(db: orm.Session) -> tools_models.DatabaseTool: connection=tools_models.ToolSessionConnection( methods=[ tools_models.HTTPConnectionMethod( + id="jupyter-direct", name="Direct Jupyter connection (Browser)", description="The only available connection method for Jupyter.", ports=tools_models.HTTPPorts(http=8888, metrics=9118), diff --git a/backend/capellacollab/sessions/operators/k8s.py b/backend/capellacollab/sessions/operators/k8s.py index f71d71389..9ede0ebff 100644 --- a/backend/capellacollab/sessions/operators/k8s.py +++ b/backend/capellacollab/sessions/operators/k8s.py @@ -98,6 +98,7 @@ def start_session( ports: dict[str, int], volumes: list[models.Volume], init_volumes: list[models.Volume], + annotations: dict[str, str], prometheus_path="/metrics", prometheus_port=9118, ) -> Session: @@ -123,6 +124,7 @@ def start_session( volumes=volumes, init_volumes=init_volumes, tool_resources=tool.config.resources, + annotations=annotations, ) self._create_disruption_budget( @@ -136,6 +138,7 @@ def start_session( ports=ports, prometheus_path=prometheus_path, prometheus_port=prometheus_port, + annotations=annotations, ) log.info( @@ -447,6 +450,7 @@ def _create_deployment( volumes: list[models.Volume], init_volumes: list[models.Volume], tool_resources: tools_models.Resources, + annotations: dict[str, str], ) -> client.V1Deployment: k8s_volumes, k8s_volume_mounts = self._map_volumes_to_k8s_volumes( volumes @@ -530,14 +534,15 @@ def _create_deployment( deployment: client.V1Deployment = client.V1Deployment( kind="Deployment", api_version="apps/v1", - metadata=client.V1ObjectMeta(name=name), + metadata=client.V1ObjectMeta(name=name, annotations=annotations), spec=client.V1DeploymentSpec( replicas=1, strategy=client.V1DeploymentStrategy(type="Recreate"), selector=client.V1LabelSelector(match_labels={"app": name}), template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( - labels={"app": name, "workload": "session"} + labels={"app": name, "workload": "session"}, + annotations=annotations, ), spec=client.V1PodSpec( automount_service_account_token=False, @@ -627,6 +632,7 @@ def _create_service( ports: dict[str, int], prometheus_path: str, prometheus_port: int, + annotations: dict[str, str], ) -> client.V1Service: service: client.V1Service = client.V1Service( kind="Service", @@ -638,6 +644,7 @@ def _create_service( "prometheus.io/scrape": "true", "prometheus.io/path": prometheus_path, "prometheus.io/port": f"{prometheus_port}", + **annotations, }, ), spec=client.V1ServiceSpec( diff --git a/backend/capellacollab/sessions/routes.py b/backend/capellacollab/sessions/routes.py index b9eb49750..fa5f88294 100644 --- a/backend/capellacollab/sessions/routes.py +++ b/backend/capellacollab/sessions/routes.py @@ -139,11 +139,24 @@ def request_session( docker_image = util.get_docker_image(version, body.session_type) + annotations: dict[str, str] = { + "capellacollab/owner-name": user.name, + "capellacollab/owner-id": str(user.id), + "capellacollab/tool-name": tool.name, + "capellacollab/tool-id": str(tool.id), + "capellacollab/tool-version-name": version.name, + "capellacollab/tool-version-id": str(version.id), + "capellacollab/session-type": body.session_type.value, + "capellacollab/session-id": session_id, + "capellacollab/connection-method-id": connection_method.id, + "capellacollab/connection-method-name": connection_method.name, + } + session = operator.start_session( session_id=session_id, image=docker_image, username=user.name, - session_type=models.SessionType.PERSISTENT, + session_type=body.session_type, tool=tool, version=version, environment=environment, @@ -151,6 +164,7 @@ def request_session( ports=connection_method.ports.model_dump(), volumes=volumes, init_volumes=init_volumes, + annotations=annotations, prometheus_path=tool.config.monitoring.prometheus.path, prometheus_port=connection_method.ports.metrics, ) diff --git a/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py b/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py index b5b3de493..a6af69f75 100644 --- a/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py +++ b/backend/tests/sessions/k8s_operator/test_session_k8s_operator.py @@ -73,6 +73,7 @@ def create_namespaced_pod_disruption_budget(namespace, budget): ports={"rdp": 3389}, volumes=[], init_volumes=[], + annotations={}, ) assert deployment_counter == 1 diff --git a/backend/tests/sessions/test_session_environment.py b/backend/tests/sessions/test_session_environment.py index c0074f673..8df44b934 100644 --- a/backend/tests/sessions/test_session_environment.py +++ b/backend/tests/sessions/test_session_environment.py @@ -70,11 +70,6 @@ def fixture_patch_irrelevant_request_session_calls( monkeypatch: pytest.MonkeyPatch, tool: tools_models.DatabaseTool, ): - monkeypatch.setattr( - tools_injectables, - "get_existing_tool_version", - lambda *args, **kwargs: None, - ) monkeypatch.setattr( sessions_util, "get_connection_method", @@ -112,7 +107,9 @@ def fixture_patch_irrelevant_request_session_calls( ) -@pytest.mark.usefixtures("patch_irrelevant_request_session_calls") +@pytest.mark.usefixtures( + "patch_irrelevant_request_session_calls", "tool_version" +) def test_environment_behaviour( monkeypatch: pytest.MonkeyPatch, operator: MockOperator, diff --git a/frontend/src/app/settings/core/tools-settings/tool-details/tool-details.component.ts b/frontend/src/app/settings/core/tools-settings/tool-details/tool-details.component.ts index 0aca83820..09088c3ef 100644 --- a/frontend/src/app/settings/core/tools-settings/tool-details/tool-details.component.ts +++ b/frontend/src/app/settings/core/tools-settings/tool-details/tool-details.component.ts @@ -13,7 +13,7 @@ import { filter, map, mergeMap, tap } from 'rxjs'; import { BreadcrumbsService } from 'src/app/general/breadcrumbs/breadcrumbs.service'; import { EditorComponent } from 'src/app/helpers/editor/editor.component'; import { ToastService } from 'src/app/helpers/toast/toast.service'; -import { CreateToolInput, Tool, ToolsService } from 'src/app/openapi'; +import { Tool, ToolsService } from 'src/app/openapi'; import { ApiDocumentationComponent } from '../../../../general/api-documentation/api-documentation.component'; import { EditorComponent as EditorComponent_1 } from '../../../../helpers/editor/editor.component'; import { ToolWrapperService } from '../tool.service'; @@ -66,9 +66,10 @@ export class ToolDetailsComponent { }); } - submitValue(value: CreateToolInput): void { + submitValue(value: Tool): void { + const { id, ...valueWithoutID } = value; // eslint-disable-line @typescript-eslint/no-unused-vars this.toolsService - .updateTool(this.selectedTool!.id, value) + .updateTool(this.selectedTool!.id, valueWithoutID) .pipe( tap((tool) => { this.toastService.showSuccess( diff --git a/helm/config/grafana/active-sessions.json b/helm/config/grafana/active-sessions.json index 42490c8d0..8e2bf7300 100644 --- a/helm/config/grafana/active-sessions.json +++ b/helm/config/grafana/active-sessions.json @@ -80,8 +80,8 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "sum(count by(instance) (idletime_minutes))", + "editorMode": "code", + "expr": "sum(count(up{connection_method_name=~\"$connection_method\", tool_version_name=~\"$tool_version\", tool_name=~\"$tool\", session_type=~\"$session_type\", job=\"sessions\"})) OR on() vector(0)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -147,7 +147,38 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Sessions with scraping errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Number of sessions" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-blue", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 7, @@ -161,7 +192,7 @@ "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": false + "showLegend": true }, "tooltip": { "mode": "single", @@ -175,11 +206,25 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "sum(count by(instance) (idletime_minutes))", - "legendFormat": "__auto", + "editorMode": "code", + "expr": "sum(count(up{connection_method_name=~\"$connection_method\", tool_version_name=~\"$tool_version\", session_type=~\"$session_type\", tool_name=~\"$tool\", job=\"sessions\"})) OR on() vector(0)", + "hide": false, + "legendFormat": "Number of sessions", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "(count(up{connection_method_name=~\"$connection_method\", tool_version_name=~\"$tool_version\", tool_name=~\"$tool\", session_type=~\"$session_type\", job=\"sessions\"}) == 0) OR vector(0)", + "hide": false, + "instant": false, + "legendFormat": "Sessions with scraping errors", + "range": true, + "refId": "B" } ], "title": "Active Sessions (instances)", @@ -272,8 +317,8 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "idletime_minutes", - "legendFormat": "{{app}}", + "expr": "idletime_minutes{tool_name=~\"$tool\", tool_version_name=~\"$tool_version\", connection_method_name=~\"$connection_method\", session_type=~\"$session_type\"}", + "legendFormat": "{{session_id}} ({{tool_name}} {{tool_version_name}} with {{connection_method_name}})", "range": true, "refId": "A" } @@ -287,7 +332,90 @@ "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": false, + "text": "7", + "value": "7" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "up{job=\"sessions\"}", + "hide": 0, + "includeAll": false, + "label": "Tool and Version", + "multi": true, + "name": "tool_version_id", + "options": [], + "query": { + "query": "up{job=\"sessions\"}", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "/tool_version_name_full=\"(?[^\"]+)|tool_version_id=\"(?[^\"]+)/g", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [""], + "value": [""] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "up{job=\"sessions\", tool_version_id~=\"$tool_version_id\"}", + "description": "", + "hide": 0, + "includeAll": false, + "label": "Connection Method", + "multi": true, + "name": "connection_method_id", + "options": [], + "query": { + "query": "up{job=\"sessions\", tool_version_id~=\"$tool_version_id\"}", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "/connection_method_name=\"(?[^\"]+)|connection_method_id=\"(?[^\"]+)/g", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [""], + "value": [""] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(up{job=\"sessions\", connection_method_id~=},session_type)", + "hide": 0, + "includeAll": false, + "label": "Session Type", + "multi": true, + "name": "session_type", + "options": [], + "query": { + "query": "label_values(up{job=\"sessions\", connection_method_id~=},session_type)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-6h", @@ -297,6 +425,6 @@ "timezone": "", "title": "Active sessions", "uid": "0kK_I7T4k", - "version": 1, + "version": 4, "weekStart": "" } diff --git a/helm/templates/prometheus/prometheus.configmap.yaml b/helm/templates/prometheus/prometheus.configmap.yaml index 53755b67e..a7d2431e5 100644 --- a/helm/templates/prometheus/prometheus.configmap.yaml +++ b/helm/templates/prometheus/prometheus.configmap.yaml @@ -40,11 +40,11 @@ data: metrics_path: /prometheus/metrics static_configs: - targets: ["localhost:9090"] - - job_name: "kubernetes-services" + - job_name: "sessions" kubernetes_sd_configs: - role: service namespaces: - own_namespace: true + own_namespace: false names: - {{ .Values.backend.k8sSessionNamespace }} relabel_configs: @@ -65,15 +65,64 @@ data: regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - # Transfer app label from kubernetes (name of the deployment) - - source_labels: [__meta_kubernetes_service_label_app] - action: replace - target_label: app - # Set label kubernetes_namespace - - source_labels: [__meta_kubernetes_namespace] + + # Convert annotation `capellacollab/tool-name` to label `tool-name` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_tool_name] + target_label: tool_name + + # Convert annotation `capellacollab/tool-id` to label `tool-id` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_tool_id] + target_label: tool_id + + # Convert annotation `capellacollab/tool-version-name` to label `tool-version-name` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_tool_version_name] + target_label: tool_version_name + + # Convert annotation `capellacollab/tool-version-id` to label `tool-version-id` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_tool_version_id] + target_label: tool_version_id + + # Convert annotation `capellacollab/session-type` to label `session-type` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_session_type] + target_label: session_type + + # Convert annotation `capellacollab/session-id` to label `session-id` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_session_id] + target_label: session_id + + # Convert annotation `capellacollab/connection-method-id` to label `connection-method-id` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_connection_method_id] + target_label: connection_method_id + + # Convert annotation `capellacollab/connection-method-name` to label `connection-method-name` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_connection_method_name] + target_label: connection_method_name + + # Convert annotation `capellacollab/tool-name` and `capellacollab/tool-version-name` to label `tool_version_name_full` + - source_labels: [__meta_kubernetes_service_annotation_capellacollab_tool_name, __meta_kubernetes_service_annotation_capellacollab_tool_version_name] + separator: " " + target_label: tool_version_name_full + - job_name: "management-portal" + kubernetes_sd_configs: + - role: service + namespaces: + own_namespace: true + relabel_configs: + # Scrape pods with annotation prometheus.io/scrape = "true" + - source_labels: + [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + # Scrape pods on path specified by prometheus.io/path + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace - target_label: kubernetes_namespace - # Set label kubernetes_pod_name - - source_labels: [__meta_kubernetes_service_name] + target_label: __metrics_path__ + regex: (.+) + # Scrape pods on port specified by prometheus.io/port + - source_labels: + [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace - target_label: kubernetes_service_name + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + # Transfer app label from kubernetes (name of the deployment) diff --git a/helm/templates/prometheus/prometheus.deployment.yaml b/helm/templates/prometheus/prometheus.deployment.yaml index 7652de499..97df812aa 100644 --- a/helm/templates/prometheus/prometheus.deployment.yaml +++ b/helm/templates/prometheus/prometheus.deployment.yaml @@ -40,11 +40,11 @@ spec: resources: {{ if .Values.development }} limits: - cpu: "0.1" - memory: 50Mi + cpu: "0.2" + memory: 200Mi requests: cpu: "0.05" - memory: 5Mi + memory: 50Mi {{ else }} requests: cpu: 500m