From 538ec92c24052c41e1b8b7e5d89e27b7496df53a Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 30 Jun 2023 11:41:36 -0700 Subject: [PATCH 01/10] Removing duplicate alerts from ci recommended alerts --- .../templates/ci_recommended_alerts.json | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/mixins/kubernetes/rules/recording_and_alerting_rules/templates/ci_recommended_alerts.json b/mixins/kubernetes/rules/recording_and_alerting_rules/templates/ci_recommended_alerts.json index 85a5263e0..0c0f8a615 100644 --- a/mixins/kubernetes/rules/recording_and_alerting_rules/templates/ci_recommended_alerts.json +++ b/mixins/kubernetes/rules/recording_and_alerting_rules/templates/ci_recommended_alerts.json @@ -46,44 +46,6 @@ "enabled": true, "interval": "PT5M", "rules": [ - { - "alert": "Average CPU usage per container is greater than 95%", - "expression": "sum (rate(container_cpu_usage_seconds_total{image!=\"\", container!=\"POD\"}[5m])) by (pod,cluster,container,namespace) / sum(container_spec_cpu_quota{image!=\"\", container!=\"POD\"}/container_spec_cpu_period{image!=\"\", container!=\"POD\"}) by (pod,cluster,container,namespace) > .95", - "for": "PT5M", - "annotations": { - "description": "Average CPU usage per container is greater than 95%" - }, - "enabled": true, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT15M" - }, - "actions": [ - { - "actionGroupId": "[parameters('actionGroupResourceId')]" - } - ] - }, - { - "alert": "Average Memory usage per container is greater than 95%.", - "expression": "(container_memory_working_set_bytes{container!=\"\", image!=\"\", container!=\"POD\"} / on(namespace,cluster,pod,container) group_left kube_pod_container_resource_limits{resource=\"memory\", node!=\"\"}) > .95 ", - "for": "PT10M", - "annotations": { - "description": "Average Memory usage per container is greater than 95%" - }, - "enabled": true, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "[parameters('actionGroupResourceId')]" - } - ] - }, { "alert": "Number of OOM killed containers is greater than 0", "expression": "sum by (cluster,container,namespace)(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\"}) > 0", From 428070693aa5a58e504563047b39011eb3753b2e Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Tue, 18 Jul 2023 10:34:20 -0700 Subject: [PATCH 02/10] Remove test branch --- .pipelines/azure-pipeline-build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 674b36be2..13354c152 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -2,7 +2,6 @@ trigger: branches: include: - main - - incpubsizesoham pr: autoCancel: true branches: From 12ad6c4f46cbb67644c885c70dae40c129521d7b Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Thu, 10 Aug 2023 09:45:50 -0700 Subject: [PATCH 03/10] Remove preview keyword from policy readme --- AddonPolicyTemplate/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AddonPolicyTemplate/README.md b/AddonPolicyTemplate/README.md index a68cbd34c..84392e8f5 100644 --- a/AddonPolicyTemplate/README.md +++ b/AddonPolicyTemplate/README.md @@ -1,6 +1,6 @@ You can create the policy definition using a command like : -```az policy definition create --name "(Preview) Prometheus Metrics addon" --display-name "(Preview) Prometheus Metrics addon" --mode Indexed --metadata version=1.0.0 category=Kubernetes --rules .\AddonPolicyMetricsProfile.rules.json --params .\AddonPolicyMetricsProfile.parameters.json``` +```az policy definition create --name "Prometheus Metrics addon" --display-name "Prometheus Metrics addon" --mode Indexed --metadata version=1.0.0 category=Kubernetes --rules .\AddonPolicyMetricsProfile.rules.json --params .\AddonPolicyMetricsProfile.parameters.json``` **NOTE** From 9c44459dec12818b5461d13f9bae4b1968426b52 Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 22 Sep 2023 17:45:29 -0700 Subject: [PATCH 04/10] Add cicd and prod near ring cluster monitoring for managed prometheus --- AddonBicepTemplate/README.md | 2 +- internal/alerts/ci-dev-aks-mac-eus.json | 209 +- internal/alerts/ci-dev-aks-wcus.json | 459 - internal/alerts/ci-dev-arc-wcus.json | 191 + internal/alerts/ci-prod-aks-mac-weu.json | 192 +- internal/alerts/ci-prod-aks-msi-eus2.json | 275 - internal/alerts/ci-prod-arc-wcus.json | 191 + internal/alerts/ci_prod_aks_eus.json | 459 - .../monitoring-metrics-prod-aks-eus2euap.json | 296 + .../monitoring-metrics-prod-aks-wcus.json | 296 + internal/monitoring/README.md | 37 +- .../dashboards/ci-dev-aks-mac-eus-db.json | 1631 --- .../dashboards/ci-dev-aks-wcus-db.json | 1634 --- .../dashboards/ci-prod-aks-eus-db.json | 1625 --- .../dashboards/ci-prod-aks-mac-weu-db.json | 1552 --- internal/monitoring/dashboards/cicd-db.json | 8848 +++++++++++++++++ .../dashboards/prod-near-ring-db.json | 4432 +++++++++ 17 files changed, 14321 insertions(+), 8008 deletions(-) delete mode 100644 internal/alerts/ci-dev-aks-wcus.json create mode 100644 internal/alerts/ci-dev-arc-wcus.json delete mode 100644 internal/alerts/ci-prod-aks-msi-eus2.json create mode 100644 internal/alerts/ci-prod-arc-wcus.json delete mode 100644 internal/alerts/ci_prod_aks_eus.json create mode 100644 internal/alerts/monitoring-metrics-prod-aks-eus2euap.json create mode 100644 internal/alerts/monitoring-metrics-prod-aks-wcus.json delete mode 100644 internal/monitoring/dashboards/ci-dev-aks-mac-eus-db.json delete mode 100644 internal/monitoring/dashboards/ci-dev-aks-wcus-db.json delete mode 100644 internal/monitoring/dashboards/ci-prod-aks-eus-db.json delete mode 100644 internal/monitoring/dashboards/ci-prod-aks-mac-weu-db.json create mode 100644 internal/monitoring/dashboards/cicd-db.json create mode 100644 internal/monitoring/dashboards/prod-near-ring-db.json diff --git a/AddonBicepTemplate/README.md b/AddonBicepTemplate/README.md index 84c21dccf..85af60539 100644 --- a/AddonBicepTemplate/README.md +++ b/AddonBicepTemplate/README.md @@ -5,7 +5,7 @@ You can deploy the templates using a command like : In order to deploy community alerts and ci recommended alerts through template, deploy using command like: -```az deployment group create -g -n --template-file .\AzureMonitorAlertsProfileParameters.json --parameters .\AzureMonitorAlertsProfileParameters.json``` +```az deployment group create -g -n --template-file .\AzureMonitorAlertsProfile.bicep --parameters .\AzureMonitorAlertsProfileParameters.json``` **NOTE** diff --git a/internal/alerts/ci-dev-aks-mac-eus.json b/internal/alerts/ci-dev-aks-mac-eus.json index 3c91fa6a8..776f93d18 100644 --- a/internal/alerts/ci-dev-aks-mac-eus.json +++ b/internal/alerts/ci-dev-aks-mac-eus.json @@ -10,9 +10,10 @@ "apiVersion": "2023-03-01", "location": "eastus", "properties": { - "description": "rule group for cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus in MAC: /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac", + "description": "rule group for cluster ci-dev-aks-mac-eus in MAC: ci-dev-aks-eus-mac", "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac" + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus" ], "rules": [ { @@ -32,9 +33,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -55,9 +54,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -78,9 +75,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -101,9 +96,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -124,9 +117,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -147,9 +138,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -170,9 +159,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -193,9 +180,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -216,9 +201,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -239,15 +222,13 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.9", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.9", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -262,15 +243,13 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { - "alert": "CPU usage % greater than 5 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.5", + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.5", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -285,21 +264,19 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { - "alert": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-mac-eus\", namespace=\"monitoring\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-aks-mac-eus\", namespace=\"monitoring\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 0.9", + "alert": "Memory usage is high for prometheus-collector containers on cluster ci-dev-aks-mac-eus", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" }, "annotations": { - "description": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-mac-eus" + "description": "Memory usage is high for prometheus-collector containers on cluster ci-dev-aks-mac-eus" }, "severity": 4, "resolveConfiguration": { @@ -308,147 +285,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-dev-aks-mac-eus", - "expression": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"})> 0.00025", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-dev-aks-mac-eus", - "expression": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"}) > 0.0008", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for k-s-m on cluster ci-dev-aks-mac-eus", - "expression": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"}) > 0.00001183", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for kube state metrics on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for k-s-m on cluster ci-dev-aks-mac-eus", - "expression": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"}) > 0.00023", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for kube state metrics on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-dev-aks-mac-eus", - "expression": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"}) > 0.00225", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-dev-aks-mac-eus", - "expression": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"}) > 0.0062", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-dev-aks-mac-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] } @@ -456,4 +293,4 @@ } } ] -} \ No newline at end of file +} diff --git a/internal/alerts/ci-dev-aks-wcus.json b/internal/alerts/ci-dev-aks-wcus.json deleted file mode 100644 index 3d439ac85..000000000 --- a/internal/alerts/ci-dev-aks-wcus.json +++ /dev/null @@ -1,459 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_dev_wcus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "westcentralus", - "properties": { - "description": "rule group for cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-wcus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-wcus in MAC: /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-wcus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-wcus-mac", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-wcus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-wcus-mac" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster ci-dev-aks-wcus", - "expression": "absent(node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Amd64 metric missing in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = node in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster ci-dev-aks-wcus", - "expression": "up{cluster=\"ci-dev-aks-wcus\", job=\"windows-exporter\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-apiserver\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster ci-dev-aks-wcus", - "expression": "up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy-windows\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster ci-dev-aks-wcus", - "expression": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-dns\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-wcus\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "CPU usage % greater than 5 for prometheus-collector containers on cluster ci-dev-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-dev-aks-wcus\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-wcus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-dev-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) - sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.103", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-dev-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.0078", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for k-s-m on cluster ci-dev-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{ image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*kube-state-metrics.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.00014", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for kube state metrics on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for k-s-m on cluster ci-dev-aks-wcus", - "expression": "sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*kube-state-metrics.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.002", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for kube state metrics on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-dev-aks-wcus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector.*\"}) - sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"})) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.0157", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-dev-aks-wcus", - "expression": "sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.033", - "for": "PT15M", - "labels": { - "cluster": "ci-dev-aks-wcus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-dev-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - } - ] - } - } - ] -} \ No newline at end of file diff --git a/internal/alerts/ci-dev-arc-wcus.json b/internal/alerts/ci-dev-arc-wcus.json new file mode 100644 index 000000000..eaa360657 --- /dev/null +++ b/internal/alerts/ci-dev-arc-wcus.json @@ -0,0 +1,191 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "variables": {}, + "resources": [ + { + "name": "containerinsights_dev_mac_eus_alerts", + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-03-01", + "location": "eastus", + "properties": { + "description": "rule group for cluster ci-dev-arc-wcus in MAC: ci-dev-arc-amw", + "scopes": [ + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-wcus", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-amw" + ], + "rules": [ + { + "alert": "Amd64 metric missing in cluster ci-dev-arc-wcus", + "expression": "absent(node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "Amd64 metric missing in cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = node in cluster ci-dev-arc-wcus", + "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"node\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = node in cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kubelet in cluster ci-dev-arc-wcus", + "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"kubelet\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kubelet in cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-state-metrics in cluster ci-dev-arc-wcus", + "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = cadvisor in cluster ci-dev-arc-wcus", + "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = cadvisor in cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-arc-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\"}) )) by (container, pod) > 0.9", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-dev-arc-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\"}) )) by (container, pod) > 0.5", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "Memory usage is high for prometheus-collector containers on cluster ci-dev-arc-wcus", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "for": "PT3M", + "labels": { + "cluster": "ci-dev-arc-wcus" + }, + "annotations": { + "description": "Memory usage is high for prometheus-collector containers on cluster ci-dev-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + } + ] + } + } + ] +} diff --git a/internal/alerts/ci-prod-aks-mac-weu.json b/internal/alerts/ci-prod-aks-mac-weu.json index 4d8a119eb..0d8b4e735 100644 --- a/internal/alerts/ci-prod-aks-mac-weu.json +++ b/internal/alerts/ci-prod-aks-mac-weu.json @@ -10,9 +10,10 @@ "apiVersion": "2023-03-01", "location": "westeurope", "properties": { - "description": "rule group for cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu in MAC: /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.Monitor/accounts/ci-prod-aks-weu-mac", + "description": "rule group for cluster ci-prod-aks-mac-weu in MAC: ci-prod-aks-weu-mac", "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.Monitor/accounts/ci-prod-aks-weu-mac" + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.Monitor/accounts/ci-prod-aks-weu-mac", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-mac-weu" ], "rules": [ { @@ -32,9 +33,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -55,9 +54,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -78,9 +75,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -101,9 +96,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -124,9 +117,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -147,9 +138,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -170,9 +159,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -193,9 +180,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -216,9 +201,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, @@ -239,15 +222,13 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu\"}) )) by (container, pod) > 0.9", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\"}) )) by (container, pod) > 0.9", "for": "PT3M", "labels": { "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" @@ -262,136 +243,19 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { - "alert": "Memory usage % greater than 90 for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu\", namespace=\"monitoring\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu\", namespace=\"monitoring\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 0.9", + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-prod-aks-mac-weu", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\"}) )) by (container, pod) > 0.5", "for": "PT3M", "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "Memory usage % greater than 90 for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for ama-metrics replicaset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"}) > 0.000200", - "for": "PT15M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "CPU usage % exceeded for ama-metrics replicaset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for ama-metrics daemonset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"}) > 0.000510", - "for": "PT15M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "CPU usage % exceeded for ama-metrics daemonset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for ama-metrics kube state metrics on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"}) > 0.00000953", - "for": "PT15M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": " CPU usage % exceeded for ama-metrics kube state metrics on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for ama-metrics kube state metrics on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"}) > 0.000230", - "for": "PT15M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": " Memory usage % exceeded for ama-metrics kube state metrics on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for ama-metrics daemonset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"}) > 0.00740", - "for": "PT15M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" + "cluster": "ci-prod-aks-mac-weu" }, "annotations": { - "description": "Memory usage % exceeded for ama-metrics daemonset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" + "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-prod-aks-mac-weu" }, "severity": 4, "resolveConfiguration": { @@ -400,21 +264,19 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] }, { - "alert": "Build over build alert - Memory usage % exceeded for ama-metrics replicaset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"}) > 0.00265", - "for": "PT15M", + "alert": "Memory usage is high for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "for": "PT3M", "labels": { "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" }, "annotations": { - "description": "Memory usage % exceeded for ama-metrics replicaset on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" + "description": "Memory usage is high for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" }, "severity": 4, "resolveConfiguration": { @@ -423,9 +285,7 @@ }, "actions": [ { - "ActionProperties": { - "Icm.Enabled": "True" - } + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" } ] } @@ -433,4 +293,4 @@ } } ] -} \ No newline at end of file +} diff --git a/internal/alerts/ci-prod-aks-msi-eus2.json b/internal/alerts/ci-prod-aks-msi-eus2.json deleted file mode 100644 index c545bc91c..000000000 --- a/internal/alerts/ci-prod-aks-msi-eus2.json +++ /dev/null @@ -1,275 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsightsprodclusteraccount_alerts_2", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus2euap", - "properties": { - "description": "rule group for cluster ci-prod-aks-msi-eus2 in containerinsightsprodclusteraccount", - "scopes": [ - "/subscriptions/ad2f1a83-caac-4e21-9d2a-9ca3f87105e2/resourcegroups/divyaj-test/providers/microsoft.monitor/accounts/containerinsightsprodclusteraccounteus2euap" - ], - "rules": [ - { - "alert": "up metric missing for target = node in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"node\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kubelet\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"windows-exporter\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kube-proxy\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kube-apiserver\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kube-proxy-windows\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kube-state-metrics\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"cadvisor\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster ci-prod-aks-msi-eus2", - "expression": "absent(up{cluster=\"ci-prod-aks-msi-eus2\", job=\"kube-dns\"}) == 1", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-msi-eus2", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-msi-eus2\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-msi-eus2\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-msi-eus2", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-msi-eus2\", namespace=\"monitoring\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-aks-msi-eus2\", namespace=\"monitoring\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-msi-eus2" - }, - "annotations": { - "description": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-msi-eus2" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - } - ] - } - } - ] -} \ No newline at end of file diff --git a/internal/alerts/ci-prod-arc-wcus.json b/internal/alerts/ci-prod-arc-wcus.json new file mode 100644 index 000000000..443eb296d --- /dev/null +++ b/internal/alerts/ci-prod-arc-wcus.json @@ -0,0 +1,191 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "variables": {}, + "resources": [ + { + "name": "containerinsights_dev_mac_eus_alerts", + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-03-01", + "location": "eastus", + "properties": { + "description": "rule group for cluster ci-prod-arc-wcus in MAC: ci-dev-aks-eus-mac", + "scopes": [ + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/microsoft.monitor/accounts/ci-prod-arc-wcus", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-prod-arc-wcus" + ], + "rules": [ + { + "alert": "Amd64 metric missing in cluster ci-prod-arc-wcus", + "expression": "absent(node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "Amd64 metric missing in cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = node in cluster ci-prod-arc-wcus", + "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"node\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = node in cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kubelet in cluster ci-prod-arc-wcus", + "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"kubelet\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kubelet in cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-state-metrics in cluster ci-prod-arc-wcus", + "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = cadvisor in cluster ci-prod-arc-wcus", + "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = cadvisor in cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-prod-arc-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\"}) )) by (container, pod) > 0.9", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-prod-arc-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\"}) )) by (container, pod) > 0.5", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "Memory usage is high for prometheus-collector containers on cluster ci-prod-arc-wcus", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "for": "PT3M", + "labels": { + "cluster": "ci-prod-arc-wcus" + }, + "annotations": { + "description": "Memory usage is high for prometheus-collector containers on cluster ci-prod-arc-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + } + ] + } + } + ] +} diff --git a/internal/alerts/ci_prod_aks_eus.json b/internal/alerts/ci_prod_aks_eus.json deleted file mode 100644 index e6ffb3056..000000000 --- a/internal/alerts/ci_prod_aks_eus.json +++ /dev/null @@ -1,459 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_prod_eus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus", - "properties": { - "description": "rule group for cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-eus in MAC: /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-eus-rg/providers/microsoft.monitor/accounts/ci-prod-aks-eus-mac", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-eus-rg/providers/microsoft.monitor/accounts/ci-prod-aks-eus-mac" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster ci-prod-aks-eus", - "expression": "absent(node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Amd64 metric missing in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = node in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster ci-prod-aks-eus", - "expression": "up{cluster=\"ci-prod-aks-eus\", job=\"windows-exporter\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-apiserver\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster ci-prod-aks-eus", - "expression": "up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy-windows\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster ci-prod-aks-eus", - "expression": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-dns\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-eus\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "CPU usage % greater than 5 for prometheus-collector containers on cluster ci-prod-aks-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"monitoring\", cluster=\"ci-prod-aks-eus\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-eus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Memory usage % greater than 90 for prometheus-collector containers on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-prod-aks-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) - sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.28", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for replicaset on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-prod-aks-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.0105", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for daemonset on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - CPU usage % exceeded for k-s-m on cluster ci-prod-aks-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{ image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*kube-state-metrics.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"}) > 0.000238", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - CPU usage % exceeded for kube state metrics on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for k-s-m on cluster ci-prod-aks-eus", - "expression": "sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*kube-state-metrics.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.0022", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for kube state metrics on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-prod-aks-eus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector.*\"}) - sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"})) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.023", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for replicaset on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - }, - { - "alert": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-prod-aks-eus", - "expression": "sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"}) > 0.0445", - "for": "PT15M", - "labels": { - "cluster": "ci-prod-aks-eus" - }, - "annotations": { - "description": "Build over build alert - Memory usage % exceeded for daemonset on cluster ci-prod-aks-eus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "ActionProperties": { - "Icm.Enabled": "True" - } - } - ] - } - ] - } - } - ] -} \ No newline at end of file diff --git a/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json b/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json new file mode 100644 index 000000000..8791e5b06 --- /dev/null +++ b/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json @@ -0,0 +1,296 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "variables": {}, + "resources": [ + { + "name": "containerinsights_dev_mac_eus_alerts", + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-03-01", + "location": "eastus", + "properties": { + "description": "rule group for cluster monitoring-metrics-prod-aks-eus2euap in MAC: monitoring-metrics-amw-eus2euap", + "scopes": [ + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-eus2euap", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-eus2euap" + ], + "rules": [ + { + "alert": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = node in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = node in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kubelet in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kubelet\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kubelet\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kubelet in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = windows-exporter in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"windows-exporter\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"windows-exporter\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = windows-exporter in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-proxy in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-proxy in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-apiserver\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy-windows\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = cadvisor in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = cadvisor in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-dns in cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-dns\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-dns\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-dns in cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\"}) )) by (container, pod) > 0.9", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "CPU usage greater than 90% for prometheus-collector on cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\"}) )) by (container, pod) > 0.5", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "CPU usage greater than 5% for prometheus-collector on cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-eus2euap" + }, + "annotations": { + "description": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + } + ] + } + } + ] +} diff --git a/internal/alerts/monitoring-metrics-prod-aks-wcus.json b/internal/alerts/monitoring-metrics-prod-aks-wcus.json new file mode 100644 index 000000000..feff2e6ae --- /dev/null +++ b/internal/alerts/monitoring-metrics-prod-aks-wcus.json @@ -0,0 +1,296 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "variables": {}, + "resources": [ + { + "name": "containerinsights_dev_mac_eus_alerts", + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-03-01", + "location": "eastus", + "properties": { + "description": "rule group for cluster monitoring-metrics-prod-aks-wcus in MAC: monitoring-metrics-amw-wcus", + "scopes": [ + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-wcus", + "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-wcus" + ], + "rules": [ + { + "alert": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = node in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = node in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kubelet in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kubelet\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kubelet in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = windows-exporter in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"windows-exporter\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"windows-exporter\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = windows-exporter in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-proxy in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-proxy in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-apiserver\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy-windows\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = cadvisor in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = cadvisor in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "up metric missing for target = kube-dns in cluster monitoring-metrics-prod-aks-wcus", + "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-dns\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-dns\"} == 0", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "up metric is not flowing for target = kube-dns in cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\"}) )) by (container, pod) > 0.9", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "CPU usage greater than 90% for prometheus-collector on cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\"}) )) by (container, pod) > 0.5", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "CPU usage greater than 5% for prometheus-collector on cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + }, + { + "alert": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", + "expression": "(sum(container_memory_working_set_bytes{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "for": "PT3M", + "labels": { + "cluster": "monitoring-metrics-prod-aks-wcus" + }, + "annotations": { + "description": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus" + }, + "severity": 4, + "resolveConfiguration": { + "autoResolved": true, + "timeToResolve": "PT10M" + }, + "actions": [ + { + "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" + } + ] + } + ] + } + } + ] +} diff --git a/internal/monitoring/README.md b/internal/monitoring/README.md index 87c9d0f0e..7c50ef28e 100644 --- a/internal/monitoring/README.md +++ b/internal/monitoring/README.md @@ -1,32 +1,29 @@ -### **This wiki contains links of all the resources related to alerts and dashboards of the CI CD clusters** +### **This wiki contains links of all the resources related to alerts and dashboards of the CI CD and prod monitoring near ring clusters** -**Links to all AKS clusters used in CI CD** +Below is the linking of the AKS cluster to Azure Monitor Workspace to Grafana for cicd and prod monitoring clusters: -Below are the links to all the AKS clusters used in CI CD. -We have 2 dev clusters and 2 prod clusters. +ci/cd clusters (cluster --> amw --> grafana) +============================================ -* ci-prod-aks-eus - [link](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-eus/overview) +[AKS] +dev=[ci-dev-aks-mac-eus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus/overview) --> [ci-dev-aks-eus-mac](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac/resourceOverviewId) --> [cicd-graf-metrics-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.Dashboard/grafana/cicd-graf-metrics-wcus/overview) -* ci-prod-aks-mac-weu - [link](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-mac-weu/overview) +prod=[ci-prod-aks-mac-weu](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-mac-weu/overview) --> [ci-prod-aks-weu-mac](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/microsoft.monitor/accounts/ci-prod-aks-weu-mac/resourceOverviewId) --> [cicd-graf-metrics-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.Dashboard/grafana/cicd-graf-metrics-wcus/overview) -* ci-dev-aks-mac-eus - [link](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus/overview) +[ARC] +dev=[ci-dev-arc-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-dev-arc-wcus/overview) --> [ci-dev-arc-amw](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-amw/resourceOverviewId) --> [cicd-graf-metrics-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.Dashboard/grafana/cicd-graf-metrics-wcus/overview) -* ci-dev-aks-wcus - [link](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-wcus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-wcus/overview) +prod=[ci-prod-arc-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-prod-arc-wcus/overview)--> [ci-prod-arc-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/microsoft.monitor/accounts/ci-prod-arc-wcus/resourceOverviewId) --> [cicd-graf-metrics-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.Dashboard/grafana/cicd-graf-metrics-wcus/overview) -**Links of Grafana Instances** -* ci-prod-aks-eus - [link](https://ci-prod-aks-eus-graf-dueya5aadyd2ghef.eus.grafana.azure.com) +canary/prod monitoring clusters (cluster --> amw -->grafana) +=========================================================== -* ci-prod-aks-mac-weu - [link](https://ci-prod-aks-weu-graf-fffvdrhqgkg6dxgm.weu.grafana.azure.com) +[monitoring-metrics-prod-aks-eus2euap](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-eus2euap/overview) --> [monitoring-metrics-amw-eus2euap](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-eus2euap/resourceOverviewId) --> [monitoring-grafana-metrics-westus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/microsoft.dashboard/grafana/mon-graf-metric-westus/overview) +[monitoring-metrics-prod-aks-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-wcus/overview) --> [monitoring-metrics-amw-wcus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-wcus/resourceOverviewId) --> [monitoring-grafana-metrics-westus](https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/microsoft.dashboard/grafana/mon-graf-metric-westus/overview) -* ci-dev-aks-mac-eus - [link](https://ci-dev-aks-eus-graf-aje3bpf7d3ctc9h6.eus.grafana.azure.com) -* ci-dev-aks-wcus - [link](https://ci-dev-aks-wcus-graf-acfcb0a6emefghgn.wcus.grafana.azure.com) +**Dashboard for CI CD and prod monitoring clusters** -**Dashboard fo CI CD alerts** -* ci-prod-aks-eus - [link](https://ci-prod-aks-eus-graf-dueya5aadyd2ghef.eus.grafana.azure.com/d/dmzAODN4k/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1) +* CICD - [link](https://cicd-graf-metrics-wcus-dkechtfecuadeuaw.wcus.grafana.azure.com/d/gp9556IVy/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1) -* ci-prod-aks-mac-weu - [link](https://ci-prod-aks-weu-graf-fffvdrhqgkg6dxgm.weu.grafana.azure.com/d/gp9556IVz/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1) - -* ci-dev-aks-mac-eus - [link](https://ci-dev-aks-eus-graf-aje3bpf7d3ctc9h6.eus.grafana.azure.com/d/gp9556IVz/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1&from=1667414623523&to=1667457823523) - -* ci-dev-aks-wcus - [link](https://ci-dev-aks-wcus-graf-acfcb0a6emefghgn.wcus.grafana.azure.com/d/dmzAODN4k/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1) +* Prod near ring - [link](https://mon-graf-metric-westus-f5hvdcaxc3hjdcdm.wus.grafana.azure.com/d/gp9556IVy/cpu-and-memory-utilization-k-s-m-replicaset-and-daemonset?orgId=1) diff --git a/internal/monitoring/dashboards/ci-dev-aks-mac-eus-db.json b/internal/monitoring/dashboards/ci-dev-aks-mac-eus-db.json deleted file mode 100644 index fe3bd4c72..000000000 --- a/internal/monitoring/dashboards/ci-dev-aks-mac-eus-db.json +++ /dev/null @@ -1,1631 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 58, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"} == 0\r\n\r\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Amd64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "{__name__=\"node_uname_info\", cluster=\"ci-dev-aks-mac-eus\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 32, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"aarch64\"} == 0\r\n\r\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Aarch64 metric missing ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00025, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.00025 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Replicaset CPU %", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.00023 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", - "format": "time_series", - "range": true, - "refId": "A" - } - ], - "title": "K-S-M Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0008, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.0008 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"}) ", - "refId": "A" - } - ], - "title": "Daemonset CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0074, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0062 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Daemonset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00001183, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.00000953 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"})", - "refId": "A" - } - ], - "title": "K-S-M CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00265, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.00225 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Replicaset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 28, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = cadvisor in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-dns\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-dns in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy-windows\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy-windows in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Up{cluster=\"ci-dev-aks-mac-eus\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-state-metrics in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 20, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 22, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-apiserver\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kubelet \"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kubelet in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"windows-exporter\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = windows-exporter in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 65 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-dev-aks-eus-mac" - }, - "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = node in cluster ci-dev-aks-mac-eus", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-12h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", - "uid": "gp9556IVz", - "version": 15, - "weekStart": "" -} \ No newline at end of file diff --git a/internal/monitoring/dashboards/ci-dev-aks-wcus-db.json b/internal/monitoring/dashboards/ci-dev-aks-wcus-db.json deleted file mode 100644 index 8852bc26d..000000000 --- a/internal/monitoring/dashboards/ci-dev-aks-wcus-db.json +++ /dev/null @@ -1,1634 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 58, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "{__name__=\"node_uname_info\", cluster=\"ci-dev-aks-wcus\", domainname=\"(none)\", instance=\"aks-lrnm-38217791-vmss000004\", job=\"node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-lrnm-38217791-vmss000004\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 32, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"x86_64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Amd64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-wcus\", machine=\"aarch64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "aarch64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00014, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.00014 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{ image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*kube-state-metrics.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"})", - "refId": "A" - } - ], - "title": "K-S-M CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.033, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.033 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Daemonset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0078, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0078 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"})", - "range": true, - "refId": "A" - } - ], - "title": "Daemonset CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.002, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.002 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*kube-state-metrics.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"})", - "range": true, - "refId": "A" - } - ], - "title": "K-S-M Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.103, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.103 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) - sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\",cluster=\"ci-dev-aks-wcus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"cpu\"})", - "refId": "A" - } - ], - "title": "Replicaset CPU %", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0157, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0157 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector.*\"}) - sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"})) / sum(kube_pod_container_resource_limits{cluster=\"ci-dev-aks-wcus\", namespace=\"monitoring\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Replicaset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Up{cluster=\"ci-dev-aks-wcus\", instance=\"10.240.0.61:9153\", job=\"kube-dns\", pod=\"coredns-59b6bf8b4f-p7c67\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"node\"} == 0\r\n", - "format": "time_series", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = node", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 22, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-apiserver\"} == 0\r\n", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-apiserver ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 20, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy\"} == 0\r\n", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kubelet\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kubelet", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-proxy-windows\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy-windows", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"windows-exporter\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"windows-exporter\"} == 0\r\n\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = windows-exporter", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "\r\nabsent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-state-metrics\"} == 0\r\n", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-state-metrics ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 28, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "expr": "absent(up{cluster=\"ci-dev-aks-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"cadvisor\"} == 0\r\n", - "refId": "A" - } - ], - "title": "up metric missing for target = cadvisor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PpvC3iv4k" - }, - "editorMode": "code", - "expr": "\r\nabsent(up{cluster=\"ci-dev-aks-wcus\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-dev-aks-wcus\", job=\"kube-dns\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-dns ", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", - "uid": "dmzAODN4k", - "version": 10, - "weekStart": "" -} \ No newline at end of file diff --git a/internal/monitoring/dashboards/ci-prod-aks-eus-db.json b/internal/monitoring/dashboards/ci-prod-aks-eus-db.json deleted file mode 100644 index 6adf58d64..000000000 --- a/internal/monitoring/dashboards/ci-prod-aks-eus-db.json +++ /dev/null @@ -1,1625 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 58, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 32, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"x86_64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Amd64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-eus\", machine=\"aarch64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "aarch64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.000238, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.000238 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{ image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*kube-state-metrics.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"})", - "range": true, - "refId": "A" - } - ], - "title": "K-S-M CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.04, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.04 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"})", - "range": true, - "refId": "A" - } - ], - "title": "Daemonset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0105, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0105 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"})", - "range": true, - "refId": "A" - } - ], - "title": "Daemonset CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0022, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0022 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*kube-state-metrics.*\"}) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"})", - "range": true, - "refId": "A" - } - ], - "title": "K-S-M Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.28, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.28 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) - sum(sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\",cluster=\"ci-prod-aks-eus\", pod=~\".*prometheus-collector-node.*\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"cpu\"})", - "range": true, - "refId": "A" - } - ], - "title": "Replicaset CPU %", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.023, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.023 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector.*\"}) - sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\",container!=\"\", image!=\"\", pod=~\".*prometheus-collector-node.*\"})) / sum(kube_pod_container_resource_limits{cluster=\"ci-prod-aks-eus\", namespace=\"monitoring\", resource=\"memory\"})", - "range": true, - "refId": "A" - } - ], - "title": "Replicaset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Up{cluster=\"cimon-aks-wcus\", instance=\"10.240.0.61:9153\", job=\"kube-dns\", pod=\"coredns-59b6bf8b4f-p7c67\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"node\"} == 0\r\n", - "format": "time_series", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = node", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 22, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-apiserver\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-apiserver ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 20, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kubelet\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kubelet", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-prod-aks-eus\", job=\"kube-proxy-windows\"} == 0", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy-windows", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus-mdm" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus-mdm" - }, - "editorMode": "code", - "expr": " up{cluster=\"ci-prod-aks-eus\", job=\"windows-exporter\"} == 0 \r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = windows-exporter", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "\r\nabsent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-state-metrics\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-state-metrics ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 28, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "absent(up{cluster=\"ci-prod-aks-eus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"cadvisor\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = cadvisor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PB5CBmv4z" - }, - "editorMode": "code", - "expr": "\r\nabsent(up{cluster=\"ci-prod-aks-eus\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-prod-aks-eus\", job=\"kube-dns\"} == 0\r\n", - "range": true, - "refId": "A" - } - ], - "title": "up metric missing for target = kube-dns ", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", - "uid": "dmzAODN4k", - "version": 45, - "weekStart": "" -} \ No newline at end of file diff --git a/internal/monitoring/dashboards/ci-prod-aks-mac-weu-db.json b/internal/monitoring/dashboards/ci-prod-aks-mac-weu-db.json deleted file mode 100644 index f5dd9758b..000000000 --- a/internal/monitoring/dashboards/ci-prod-aks-mac-weu-db.json +++ /dev/null @@ -1,1552 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 58, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 32, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Amd64 metric missing ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "editorMode": "code", - "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"aarch64\"} == 0", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "aarch64 metric missing", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0002, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0002 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"})", - "refId": "A" - } - ], - "title": "Replicaset CPU %", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00023, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.00023 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "K-S-M Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.00071 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"}) ", - "refId": "A" - } - ], - "title": "Daemonset CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.0074, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.0074 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Daemonset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00000953, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.00000953 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "editorMode": "code", - "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "K-S-M CPU%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "max": 0.00265, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 0.00265 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", - "refId": "A" - } - ], - "title": "Replicaset Memory%", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 28, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = cadvisor in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 30, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-dns\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-dns in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy-windows\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy-windows in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-state-metrics in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 20, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-proxy in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 22, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-apiserver\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kubelet\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = kubelet in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"windows-exporter\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"windows-exporter\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = windows-exporter in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 65 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "ci-prod-aks-weu-mac" - }, - "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"} == 0", - "refId": "A" - } - ], - "title": "up metric missing for target = node in cluster ci-prod-aks-mac-weu", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", - "uid": "gp9556IVz", - "version": 25, - "weekStart": "" -} \ No newline at end of file diff --git a/internal/monitoring/dashboards/cicd-db.json b/internal/monitoring/dashboards/cicd-db.json new file mode 100644 index 000000000..c77058c88 --- /dev/null +++ b/internal/monitoring/dashboards/cicd-db.json @@ -0,0 +1,8848 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 42, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 84, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 0.00095, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 48, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 41, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 49, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 50, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"ci-prod-arc-wcus\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 51, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 53, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 54, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 74 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 74 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-arc-wcus\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "expr": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster ci-prod-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"ci-prod-arc-wcus\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 82 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 90 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 90 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-arc-wcus" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "cluster = ci-prod-arc-wcus", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 35, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 109, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 110, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 111, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 112, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 113, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 114, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 115, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 116, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 117, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 118, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 119, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 120, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 121, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 122, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"ci-dev-arc-wcus\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 123, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 124, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 125, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 126, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 127, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 128, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-arc-wcus\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "id": 129, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "expr": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster ci-dev-arc-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"ci-dev-arc-wcus\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "id": 130, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 91 + }, + "id": 131, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 91 + }, + "id": 133, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-arc-amw" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "Cluster = ci-dev-arc-wcus", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 60, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 0.00095, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "id": 86, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 87, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 88, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 89, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 90, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 91, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 92, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 93, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 94, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 95, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 96, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 97, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 98, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 99, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"ci-prod-aks-mac-weu\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, + "id": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 60 + }, + "id": 101, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 68 + }, + "id": 102, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 68 + }, + "id": 103, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 76 + }, + "id": 104, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 76 + }, + "id": 105, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-prod-aks-mac-weu\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 84 + }, + "id": 106, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "expr": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"ci-prod-aks-mac-weu\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 84 + }, + "id": 107, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 92 + }, + "id": 108, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 92 + }, + "id": 134, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-prod-aks-weu-mac" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "Cluster = ci-prod-aks-mac-weu", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 85, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 0.00095, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 61, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 62, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 65, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 66, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 67, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 68, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 69, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 70, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 71, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 72, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 73, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 74, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"ci-dev-aks-mac-eus\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 75, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 76, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 77, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster ci-prod-aks-mac-weu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 69 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 80, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": " up{cluster=\"ci-dev-aks-mac-eus\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 81, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "expr": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster ci-dev-aks-mac-eus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"ci-dev-aks-mac-eus\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 85 + }, + "id": 82, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 93 + }, + "id": 83, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 93 + }, + "id": 135, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ci-dev-aks-eus-mac" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "Cluster = ci-dev-aks-mac-eus", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", + "uid": "gp9556IVy", + "version": 33, + "weekStart": "" + } diff --git a/internal/monitoring/dashboards/prod-near-ring-db.json b/internal/monitoring/dashboards/prod-near-ring-db.json new file mode 100644 index 000000000..230b7efb3 --- /dev/null +++ b/internal/monitoring/dashboards/prod-near-ring-db.json @@ -0,0 +1,4432 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 44, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 35, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 109, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 110, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 111, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 112, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 113, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 114, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 115, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 116, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 117, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 118, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 119, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 120, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 121, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 122, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"monitoring-metrics-prod-aks-wcus\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 123, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 124, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 125, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 126, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 74 + }, + "id": 127, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 74 + }, + "id": 128, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 129, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster monitoring-metrics-prod-aks-wcus", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"monitoring-metrics-prod-aks-wcus\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 82 + }, + "id": 130, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 90 + }, + "id": 131, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 90 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-wcus" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "Cluster = monitoring-metrics-prod-aks-wcus", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 84, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 0.00095, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "(sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"}))/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"cpu\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU %", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", pod=~\"ama-metrics-node.*\"}) - sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", pod=~\"ama-metrics-ksm.*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"cpu\"}) ", + "refId": "A" + } + ], + "title": "Daemonset CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-node.*\"}) ", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})/ sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"cpu\"})", + "refId": "A" + } + ], + "title": "K-S-M CPU%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum ( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container!=\"\", pod=~\"ama-metrics-ksm.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.00023 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"memory\"})", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"}) ", + "format": "time_series", + "range": true, + "refId": "A" + } + ], + "title": "K-S-M Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Daemonset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"})", + "range": true, + "refId": "A" + } + ], + "title": "Daemonset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 48, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", resource=\"memory\"})", + "refId": "A" + } + ], + "title": "Replicaset Memory%", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 41, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "(sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-node.*\"}) - sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\",container!=\"\", image!=\"\", pod=~\"ama-metrics-ksm.*\"})) ", + "range": true, + "refId": "A" + } + ], + "title": "Replicaset Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 49, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-dns\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-dns in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 50, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = cadvisor in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", instance=\"ama-metrics-ksm.kube-system.svc.cluster.local:8080\", job=\"kube-state-metrics\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 51, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy-windows\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 53, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-apiserver\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 54, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kube-proxy in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"windows-exporter\"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = windows-exporter in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": " up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kubelet \"} == 0", + "range": true, + "refId": "A" + } + ], + "title": "up metric missing for target = kubelet in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "expr": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"} == 0", + "refId": "A" + } + ], + "title": "up metric missing for target = node in cluster monitoring-metrics-amw-eus2euap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "{__name__=\"node_uname_info\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", domainname=\"(none)\", instance=\"10.240.0.115:9100\", job=\"custom-scrape-job-node\", machine=\"x86_64\", metrics_path=\"/metrics\", nodename=\"aks-agentpool-86683822-vmss000001\", release=\"5.4.0-1091-azure\", sysname=\"linux\", version=\"#96~18.04.1-ubuntu smp tue aug 30 19:15:32 utc 2022\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"aarch64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"aarch64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Aarch64 metric missing ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 91 + }, + "id": 59, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"} == 0\r\n\r\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Amd64 metric missing", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 91 + }, + "id": 133, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "monitoring-metrics-amw-eus2euap" + }, + "editorMode": "code", + "expr": "count (alerts{alertstate=\"firing\"}) by (alertname, cluster)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Firing alerts", + "type": "timeseries" + } + ], + "title": "cluster = monitoring-metrics-amw-eus2euap", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", + "uid": "gp9556IVy", + "version": 11, + "weekStart": "" + } From 86fff89eeb6ae877bf15129c1e3851b1d9aed55d Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Tue, 26 Sep 2023 20:06:18 -0700 Subject: [PATCH 05/10] Make single file for alerts and update dashboard to include unit in memory --- internal/alerts/ci-dev-arc-wcus.json | 191 ----------- internal/alerts/ci-prod-aks-mac-weu.json | 296 ------------------ internal/alerts/ci-prod-arc-wcus.json | 191 ----------- internal/alerts/cluster-scopes.txt | 24 ++ ...c-eus.json => example-alert-template.json} | 26 +- .../monitoring-metrics-prod-aks-eus2euap.json | 296 ------------------ .../monitoring-metrics-prod-aks-wcus.json | 296 ------------------ internal/monitoring/dashboards/cicd-db.json | 104 +++--- .../dashboards/prod-near-ring-db.json | 62 ++-- 9 files changed, 121 insertions(+), 1365 deletions(-) delete mode 100644 internal/alerts/ci-dev-arc-wcus.json delete mode 100644 internal/alerts/ci-prod-aks-mac-weu.json delete mode 100644 internal/alerts/ci-prod-arc-wcus.json create mode 100644 internal/alerts/cluster-scopes.txt rename internal/alerts/{ci-dev-aks-mac-eus.json => example-alert-template.json} (84%) delete mode 100644 internal/alerts/monitoring-metrics-prod-aks-eus2euap.json delete mode 100644 internal/alerts/monitoring-metrics-prod-aks-wcus.json diff --git a/internal/alerts/ci-dev-arc-wcus.json b/internal/alerts/ci-dev-arc-wcus.json deleted file mode 100644 index eaa360657..000000000 --- a/internal/alerts/ci-dev-arc-wcus.json +++ /dev/null @@ -1,191 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_dev_mac_eus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus", - "properties": { - "description": "rule group for cluster ci-dev-arc-wcus in MAC: ci-dev-arc-amw", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-wcus", - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-amw" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster ci-dev-arc-wcus", - "expression": "absent(node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-arc-wcus\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "Amd64 metric missing in cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = node in cluster ci-dev-arc-wcus", - "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster ci-dev-arc-wcus", - "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster ci-dev-arc-wcus", - "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster ci-dev-arc-wcus", - "expression": "absent(up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-arc-wcus\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-arc-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-dev-arc-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-arc-wcus\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "Memory usage is high for prometheus-collector containers on cluster ci-dev-arc-wcus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", - "for": "PT3M", - "labels": { - "cluster": "ci-dev-arc-wcus" - }, - "annotations": { - "description": "Memory usage is high for prometheus-collector containers on cluster ci-dev-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - } - ] - } - } - ] -} diff --git a/internal/alerts/ci-prod-aks-mac-weu.json b/internal/alerts/ci-prod-aks-mac-weu.json deleted file mode 100644 index 0d8b4e735..000000000 --- a/internal/alerts/ci-prod-aks-mac-weu.json +++ /dev/null @@ -1,296 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_prod_mac_weu_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "westeurope", - "properties": { - "description": "rule group for cluster ci-prod-aks-mac-weu in MAC: ci-prod-aks-weu-mac", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.Monitor/accounts/ci-prod-aks-weu-mac", - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-mac-weu" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster ci-prod-aks-mac-weu", - "expression": "absent(node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-aks-mac-weu\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "Amd64 metric missing in cluster ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = node in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"windows-exporter\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"windows-exporter\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-apiserver\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-proxy-windows\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "absent(up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-prod-aks-mac-weu\", job=\"kube-dns\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-prod-aks-mac-weu", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-aks-mac-weu\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "Memory usage is high for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-aks-mac-weu\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", - "for": "PT3M", - "labels": { - "cluster": "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "annotations": { - "description": "Memory usage is high for prometheus-collector containers on cluster /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourcegroups/ci-prod-aks-mac-weu-rg/providers/microsoft.containerservice/managedclusters/ci-prod-aks-mac-weu" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - } - ] - } - } - ] -} diff --git a/internal/alerts/ci-prod-arc-wcus.json b/internal/alerts/ci-prod-arc-wcus.json deleted file mode 100644 index 443eb296d..000000000 --- a/internal/alerts/ci-prod-arc-wcus.json +++ /dev/null @@ -1,191 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_dev_mac_eus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus", - "properties": { - "description": "rule group for cluster ci-prod-arc-wcus in MAC: ci-dev-aks-eus-mac", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/microsoft.monitor/accounts/ci-prod-arc-wcus", - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-prod-arc-wcus" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster ci-prod-arc-wcus", - "expression": "absent(node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-prod-arc-wcus\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "Amd64 metric missing in cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = node in cluster ci-prod-arc-wcus", - "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"node\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster ci-prod-arc-wcus", - "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster ci-prod-arc-wcus", - "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster ci-prod-arc-wcus", - "expression": "absent(up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-prod-arc-wcus\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-prod-arc-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-prod-arc-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-prod-arc-wcus\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "Memory usage is high for prometheus-collector containers on cluster ci-prod-arc-wcus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-prod-arc-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", - "for": "PT3M", - "labels": { - "cluster": "ci-prod-arc-wcus" - }, - "annotations": { - "description": "Memory usage is high for prometheus-collector containers on cluster ci-prod-arc-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - } - ] - } - } - ] -} diff --git a/internal/alerts/cluster-scopes.txt b/internal/alerts/cluster-scopes.txt new file mode 100644 index 000000000..222796c34 --- /dev/null +++ b/internal/alerts/cluster-scopes.txt @@ -0,0 +1,24 @@ +Here are the cluster parameters which need to be updated in example-alert-template.json before deploying the alerts templates for each cluster. Please +update the "scopes" field in the alerts template with the cluster id and AMW id from the list below depending on the cluster. +Update the labels field with the cluster name below. Update the location according to the cluster. Update the alert name accordingly. + +Cluster name Cluster id + + +ci-dev-aks-mac-eus /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus +ci-dev-arc-wcus /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-dev-arc-wcus +ci-prod-aks-mac-weu /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.ContainerService/managedClusters/ci-prod-aks-mac-weu +ci-prod-arc-wcus /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/Microsoft.ContainerService/managedClusters/ci-prod-arc-wcus +monitoring-metrics-prod-aks-eus2euap /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-eus2euap +monitoring-metrics-prod-aks-wcus /subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-wcus + + +Azure Monitor Workspace Location +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac eastus +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-arc-wcus/providers/microsoft.monitor/accounts/ci-dev-arc-amw westcentralus +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-aks-mac-weu-rg/providers/Microsoft.Monitor/accounts/ci-prod-aks-weu-mac westeurope +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-prod-arc-wcus/providers/microsoft.monitor/accounts/ci-prod-arc-wcus westcentralus +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-eus2euap eastus2euap +/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-wcus westcentralus + + diff --git a/internal/alerts/ci-dev-aks-mac-eus.json b/internal/alerts/example-alert-template.json similarity index 84% rename from internal/alerts/ci-dev-aks-mac-eus.json rename to internal/alerts/example-alert-template.json index 776f93d18..8407781c9 100644 --- a/internal/alerts/ci-dev-aks-mac-eus.json +++ b/internal/alerts/example-alert-template.json @@ -18,7 +18,7 @@ "rules": [ { "alert": "Amd64 metric missing in cluster ci-dev-aks-mac-eus", - "expression": "absent(node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"ci-dev-aks-mac-eus\", machine=\"x86_64\"} == 0", + "expression": "absent(node_uname_info{machine=\"x86_64\"}) == 1 or node_uname_info{machine=\"x86_64\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -39,7 +39,7 @@ }, { "alert": "up metric missing for target = node in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"node\"} == 0", + "expression": "absent(up{job=\"node\"}) == 1 or up{job=\"node\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -60,7 +60,7 @@ }, { "alert": "up metric missing for target = kubelet in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kubelet\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kubelet\"} == 0", + "expression": "absent(up{job=\"kubelet\"}) == 1 or up{job=\"kubelet\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -81,7 +81,7 @@ }, { "alert": "up metric missing for target = windows-exporter in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"windows-exporter\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"windows-exporter\"} == 0", + "expression": "absent(up{job=\"windows-exporter\"}) == 1 or up{job=\"windows-exporter\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -102,7 +102,7 @@ }, { "alert": "up metric missing for target = kube-proxy in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy\"} == 0", + "expression": "absent(up{job=\"kube-proxy\"}) == 1 or up{job=\"kube-proxy\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -123,7 +123,7 @@ }, { "alert": "up metric missing for target = kube-apiserver in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-apiserver\"} == 0", + "expression": "absent(up{job=\"kube-apiserver\"}) == 1 or up{job=\"kube-apiserver\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -144,7 +144,7 @@ }, { "alert": "up metric missing for target = kube-proxy-windows in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-proxy-windows\"} == 0", + "expression": "absent(up{job=\"kube-proxy-windows\"}) == 1 or up{job=\"kube-proxy-windows\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -165,7 +165,7 @@ }, { "alert": "up metric missing for target = kube-state-metrics in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-state-metrics\"} == 0", + "expression": "absent(up{job=\"kube-state-metrics\"}) == 1 or up{job=\"kube-state-metrics\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -186,7 +186,7 @@ }, { "alert": "up metric missing for target = cadvisor in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"cadvisor\"} == 0", + "expression": "absent(up{job=\"cadvisor\"}) == 1 or up{job=\"cadvisor\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -207,7 +207,7 @@ }, { "alert": "up metric missing for target = kube-dns in cluster ci-dev-aks-mac-eus", - "expression": "absent(up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-dns\"}) == 1 or up{cluster=\"ci-dev-aks-mac-eus\", job=\"kube-dns\"} == 0", + "expression": "absent(up{job=\"kube-dns\"}) == 1 or up{job=\"kube-dns\"} == 0", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -228,7 +228,7 @@ }, { "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.9", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\"}) )) by (container, pod) > 0.9", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -249,7 +249,7 @@ }, { "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"ci-dev-aks-mac-eus\"}) )) by (container, pod) > 0.5", + "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\"}) )) by (container, pod) > 0.5", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" @@ -270,7 +270,7 @@ }, { "alert": "Memory usage is high for prometheus-collector containers on cluster ci-dev-aks-mac-eus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"ci-dev-aks-mac-eus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", + "expression": "(sum(container_memory_working_set_bytes{namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", "for": "PT3M", "labels": { "cluster": "ci-dev-aks-mac-eus" diff --git a/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json b/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json deleted file mode 100644 index 8791e5b06..000000000 --- a/internal/alerts/monitoring-metrics-prod-aks-eus2euap.json +++ /dev/null @@ -1,296 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_dev_mac_eus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus", - "properties": { - "description": "rule group for cluster monitoring-metrics-prod-aks-eus2euap in MAC: monitoring-metrics-amw-eus2euap", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-eus2euap", - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-eus2euap" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-eus2euap\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = node in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kubelet\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"windows-exporter\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"windows-exporter\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-apiserver\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-proxy-windows\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-dns\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-eus2euap\", job=\"kube-dns\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-eus2euap\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"monitoring-metrics-prod-aks-eus2euap\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-eus2euap" - }, - "annotations": { - "description": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-eus2euap" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - } - ] - } - } - ] -} diff --git a/internal/alerts/monitoring-metrics-prod-aks-wcus.json b/internal/alerts/monitoring-metrics-prod-aks-wcus.json deleted file mode 100644 index feff2e6ae..000000000 --- a/internal/alerts/monitoring-metrics-prod-aks-wcus.json +++ /dev/null @@ -1,296 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": {}, - "variables": {}, - "resources": [ - { - "name": "containerinsights_dev_mac_eus_alerts", - "type": "Microsoft.AlertsManagement/prometheusRuleGroups", - "apiVersion": "2023-03-01", - "location": "eastus", - "properties": { - "description": "rule group for cluster monitoring-metrics-prod-aks-wcus in MAC: monitoring-metrics-amw-wcus", - "scopes": [ - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-amw/providers/microsoft.monitor/accounts/monitoring-metrics-amw-wcus", - "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/monitoring-metrics-prod-aks/providers/Microsoft.ContainerService/managedClusters/monitoring-metrics-prod-aks-wcus" - ], - "rules": [ - { - "alert": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"}) == 1 or node_uname_info{cluster=\"monitoring-metrics-prod-aks-wcus\", machine=\"x86_64\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "Amd64 metric missing in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = node in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"node\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = node in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kubelet in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kubelet\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kubelet\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kubelet in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = windows-exporter in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"windows-exporter\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"windows-exporter\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = windows-exporter in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-apiserver\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-apiserver\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-apiserver in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy-windows\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-proxy-windows\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-proxy-windows in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-state-metrics\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-state-metrics in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = cadvisor in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"cadvisor\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = cadvisor in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "up metric missing for target = kube-dns in cluster monitoring-metrics-prod-aks-wcus", - "expression": "absent(up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-dns\"}) == 1 or up{cluster=\"monitoring-metrics-prod-aks-wcus\", job=\"kube-dns\"} == 0", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "up metric is not flowing for target = kube-dns in cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\"}) )) by (container, pod) > 0.9", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "CPU usage greater than 90% for prometheus-collector on cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", - "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\", cluster=\"monitoring-metrics-prod-aks-wcus\"}) )) by (container, pod) > 0.5", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "CPU usage greater than 5% for prometheus-collector on cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - }, - { - "alert": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus", - "expression": "(sum(container_memory_working_set_bytes{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{cluster=\"monitoring-metrics-prod-aks-wcus\", namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", - "for": "PT3M", - "labels": { - "cluster": "monitoring-metrics-prod-aks-wcus" - }, - "annotations": { - "description": "Memory usage is high for prometheus-collector containers on cluster monitoring-metrics-prod-aks-wcus" - }, - "severity": 4, - "resolveConfiguration": { - "autoResolved": true, - "timeToResolve": "PT10M" - }, - "actions": [ - { - "actionGroupId": "/subscriptions/13d371f9-5a39-46d5-8e1b-60158c49db84/resourceGroups/ContainerInsightsPrometheusCollector-Prod/providers/microsoft.insights/actiongroups/AMA-PrometheusCollectorAlertGroup" - } - ] - } - ] - } - } - ] -} diff --git a/internal/monitoring/dashboards/cicd-db.json b/internal/monitoring/dashboards/cicd-db.json index c77058c88..547914803 100644 --- a/internal/monitoring/dashboards/cicd-db.json +++ b/internal/monitoring/dashboards/cicd-db.json @@ -700,7 +700,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -873,7 +874,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -1045,7 +1047,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -1697,8 +1700,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1788,8 +1790,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1879,8 +1880,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1968,8 +1968,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2085,8 +2084,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2177,8 +2175,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2899,7 +2896,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -3072,7 +3070,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -3244,7 +3243,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -3896,8 +3896,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3987,8 +3986,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4078,8 +4076,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4167,8 +4164,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4284,8 +4280,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4376,8 +4371,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5099,7 +5093,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -5272,7 +5267,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -5444,7 +5440,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -5709,8 +5706,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5823,8 +5819,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5914,8 +5909,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6005,8 +5999,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6096,8 +6089,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6187,8 +6179,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6278,8 +6269,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6367,8 +6357,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6484,8 +6473,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -6576,8 +6564,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -7299,7 +7286,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -7472,7 +7460,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -7644,7 +7633,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -8843,6 +8833,6 @@ "timezone": "", "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", "uid": "gp9556IVy", - "version": 33, + "version": 38, "weekStart": "" } diff --git a/internal/monitoring/dashboards/prod-near-ring-db.json b/internal/monitoring/dashboards/prod-near-ring-db.json index 230b7efb3..a96f5f673 100644 --- a/internal/monitoring/dashboards/prod-near-ring-db.json +++ b/internal/monitoring/dashboards/prod-near-ring-db.json @@ -699,7 +699,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -872,7 +873,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -1044,7 +1046,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -1878,8 +1881,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1967,8 +1969,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2084,8 +2085,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2176,8 +2176,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2899,7 +2898,8 @@ "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -2983,7 +2983,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] } @@ -3067,10 +3068,12 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -3153,7 +3156,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] } @@ -3237,10 +3241,12 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -3323,7 +3329,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3413,7 +3420,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3503,7 +3511,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3616,7 +3625,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3706,7 +3716,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3796,7 +3807,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4427,6 +4439,6 @@ "timezone": "", "title": "CPU and Memory utilization k-s-m, replicaset and daemonset", "uid": "gp9556IVy", - "version": 11, + "version": 13, "weekStart": "" } From f599e2bf39d70f4f908f49fd47bddb62c7275973 Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 29 Sep 2023 13:11:22 -0700 Subject: [PATCH 06/10] Replace label for cluster name with clusterName property --- internal/alerts/cluster-scopes.txt | 2 +- internal/alerts/example-alert-template.json | 41 +-------------------- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/internal/alerts/cluster-scopes.txt b/internal/alerts/cluster-scopes.txt index 222796c34..d9b5629ae 100644 --- a/internal/alerts/cluster-scopes.txt +++ b/internal/alerts/cluster-scopes.txt @@ -1,6 +1,6 @@ Here are the cluster parameters which need to be updated in example-alert-template.json before deploying the alerts templates for each cluster. Please update the "scopes" field in the alerts template with the cluster id and AMW id from the list below depending on the cluster. -Update the labels field with the cluster name below. Update the location according to the cluster. Update the alert name accordingly. +Update the clusterName field with the cluster name below. Update the location according to the cluster. Update the alert name accordingly. Cluster name Cluster id diff --git a/internal/alerts/example-alert-template.json b/internal/alerts/example-alert-template.json index 8407781c9..ce865c59d 100644 --- a/internal/alerts/example-alert-template.json +++ b/internal/alerts/example-alert-template.json @@ -15,14 +15,13 @@ "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/microsoft.monitor/accounts/ci-dev-aks-eus-mac", "/subscriptions/9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb/resourceGroups/ci-dev-aks-mac-eus-rg/providers/Microsoft.ContainerService/managedClusters/ci-dev-aks-mac-eus" ], + "enabled": true, + "clusterName": "ci-dev-aks-mac-eus", "rules": [ { "alert": "Amd64 metric missing in cluster ci-dev-aks-mac-eus", "expression": "absent(node_uname_info{machine=\"x86_64\"}) == 1 or node_uname_info{machine=\"x86_64\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "Amd64 metric missing in cluster ci-dev-aks-mac-eus" }, @@ -41,9 +40,6 @@ "alert": "up metric missing for target = node in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"node\"}) == 1 or up{job=\"node\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = node in cluster ci-dev-aks-mac-eus" }, @@ -62,9 +58,6 @@ "alert": "up metric missing for target = kubelet in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kubelet\"}) == 1 or up{job=\"kubelet\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kubelet in cluster ci-dev-aks-mac-eus" }, @@ -83,9 +76,6 @@ "alert": "up metric missing for target = windows-exporter in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"windows-exporter\"}) == 1 or up{job=\"windows-exporter\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = windows-exporter in cluster ci-dev-aks-mac-eus" }, @@ -104,9 +94,6 @@ "alert": "up metric missing for target = kube-proxy in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kube-proxy\"}) == 1 or up{job=\"kube-proxy\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kube-proxy in cluster ci-dev-aks-mac-eus" }, @@ -125,9 +112,6 @@ "alert": "up metric missing for target = kube-apiserver in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kube-apiserver\"}) == 1 or up{job=\"kube-apiserver\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kube-apiserver in cluster ci-dev-aks-mac-eus" }, @@ -146,9 +130,6 @@ "alert": "up metric missing for target = kube-proxy-windows in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kube-proxy-windows\"}) == 1 or up{job=\"kube-proxy-windows\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kube-proxy-windows in cluster ci-dev-aks-mac-eus" }, @@ -167,9 +148,6 @@ "alert": "up metric missing for target = kube-state-metrics in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kube-state-metrics\"}) == 1 or up{job=\"kube-state-metrics\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kube-state-metrics in cluster ci-dev-aks-mac-eus" }, @@ -188,9 +166,6 @@ "alert": "up metric missing for target = cadvisor in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"cadvisor\"}) == 1 or up{job=\"cadvisor\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = cadvisor in cluster ci-dev-aks-mac-eus" }, @@ -209,9 +184,6 @@ "alert": "up metric missing for target = kube-dns in cluster ci-dev-aks-mac-eus", "expression": "absent(up{job=\"kube-dns\"}) == 1 or up{job=\"kube-dns\"} == 0", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "up metric is not flowing for target = kube-dns in cluster ci-dev-aks-mac-eus" }, @@ -230,9 +202,6 @@ "alert": "CPU usage % greater than 90 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\"}) )) by (container, pod) > 0.9", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "CPU usage greater than 90% for prometheus-collector on cluster ci-dev-aks-mac-eus" }, @@ -251,9 +220,6 @@ "alert": "CPU usage % greater than 50 for prometheus-collector containers on cluster ci-dev-aks-mac-eus", "expression": "sum(sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\", namespace=\"kube-system\", container=\"prometheus-collector\"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\", namespace=\"kube-system\"}) )) by (container, pod) > 0.5", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "CPU usage greater than 5% for prometheus-collector on cluster ci-dev-aks-mac-eus" }, @@ -272,9 +238,6 @@ "alert": "Memory usage is high for prometheus-collector containers on cluster ci-dev-aks-mac-eus", "expression": "(sum(container_memory_working_set_bytes{namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_requests{namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 1.9", "for": "PT3M", - "labels": { - "cluster": "ci-dev-aks-mac-eus" - }, "annotations": { "description": "Memory usage is high for prometheus-collector containers on cluster ci-dev-aks-mac-eus" }, From 9f2ea9e5ca4795cfd7bebbad3a7c75abfa2a145c Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 29 Sep 2023 15:02:27 -0700 Subject: [PATCH 07/10] update helm version to pass build --- .pipelines/azure-pipeline-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index ebee81b37..6ef84b926 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -600,7 +600,7 @@ jobs: - task: HelmInstaller@1 displayName: 'Build: install Helm version' inputs: - helmVersionToInstall: latest + helmVersionToInstall: 3.12.3 - bash: | envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart.yaml && envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values.yaml From acaa2a330f947284391e9bfc0e014270b8457005 Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 29 Sep 2023 15:08:28 -0700 Subject: [PATCH 08/10] update helm version --- .pipelines/azure-pipeline-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 6ef84b926..9288c4a1c 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -643,7 +643,7 @@ jobs: - task: HelmInstaller@1 displayName: 'Build: install Helm version' inputs: - helmVersionToInstall: latest + helmVersionToInstall: 3.12.3 - bash: | export HELM_CHART_NAME=$ARC_HELM_CHART_NAME @@ -789,7 +789,7 @@ jobs: - task: HelmInstaller@1 displayName: Install Helm version inputs: - helmVersionToInstall: latest + helmVersionToInstall: 3.12.3 - bash: | for i in 1 2 3 4 5 6 7 8 9 10 do From 3f71be52355b81738a13d81023afba6bd807b6b3 Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 29 Sep 2023 16:22:41 -0700 Subject: [PATCH 09/10] revert helm version update for non-Arc --- .pipelines/azure-pipeline-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 9288c4a1c..ceb5afbb2 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -600,7 +600,7 @@ jobs: - task: HelmInstaller@1 displayName: 'Build: install Helm version' inputs: - helmVersionToInstall: 3.12.3 + helmVersionToInstall: latest - bash: | envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart.yaml && envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values.yaml @@ -789,7 +789,7 @@ jobs: - task: HelmInstaller@1 displayName: Install Helm version inputs: - helmVersionToInstall: 3.12.3 + helmVersionToInstall: latest - bash: | for i in 1 2 3 4 5 6 7 8 9 10 do From a162732878b48a650da3b82e0a7a9d8968e29684 Mon Sep 17 00:00:00 2001 From: Soham Dasgupta Date: Fri, 29 Sep 2023 17:47:27 -0700 Subject: [PATCH 10/10] update helm for multiarch --- .pipelines/azure-pipeline-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index ceb5afbb2..f3717fb0d 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -600,7 +600,7 @@ jobs: - task: HelmInstaller@1 displayName: 'Build: install Helm version' inputs: - helmVersionToInstall: latest + helmVersionToInstall: 3.12.3 - bash: | envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart.yaml && envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values.yaml