From 9fdc2f3756ff32d6aa629470fd6dac10fce08940 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Wed, 24 Jul 2024 13:29:05 -0400 Subject: [PATCH 1/4] add a textAnalyze job for checking etcdserver database size alarm --- in-cluster/default.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/in-cluster/default.yaml b/in-cluster/default.yaml index 2835f6b..eaa4689 100644 --- a/in-cluster/default.yaml +++ b/in-cluster/default.yaml @@ -371,6 +371,19 @@ spec: - pass: message: Goldpinger can communicate properly regexGroups: '"OK": ?(?P\w+)' + - textAnalyze: + checkName: Etcdserver Database Size Exceeded + exclude: "" + ignoreIfNoFiles: true + fileName: * + regex: '(etcdserver)?.*mvcc.*database space exceeded' + outcomes: + - fail: + when: "true" + message: etcdserver database has grown too large. See: https://community.replicated.com/t/kubernetes-cluster-is-down-and-reporting-etcdserver-mvcc-database-size-exceeded/1428 + - pass: + when: "false" + message: etcdserver database is not too large - nodeResources: checkName: Node status check outcomes: From 510a257721a109e1997aeab9f899e067f98b6cbd Mon Sep 17 00:00:00 2001 From: ada mancini Date: Fri, 2 Aug 2024 13:47:29 -0400 Subject: [PATCH 2/4] fix syntax don't use `:` in bare strings in YAML --- in-cluster/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-cluster/default.yaml b/in-cluster/default.yaml index eaa4689..73314d0 100644 --- a/in-cluster/default.yaml +++ b/in-cluster/default.yaml @@ -380,7 +380,7 @@ spec: outcomes: - fail: when: "true" - message: etcdserver database has grown too large. See: https://community.replicated.com/t/kubernetes-cluster-is-down-and-reporting-etcdserver-mvcc-database-size-exceeded/1428 + message: etcdserver database has grown too large. See https://community.replicated.com/t/kubernetes-cluster-is-down-and-reporting-etcdserver-mvcc-database-size-exceeded/1428 - pass: when: "false" message: etcdserver database is not too large From 78a441a9f01d6fbd1905e14abb81117d2e3cbbd3 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Fri, 2 Aug 2024 13:50:45 -0400 Subject: [PATCH 3/4] fix syntax --- in-cluster/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-cluster/default.yaml b/in-cluster/default.yaml index 73314d0..db8fb69 100644 --- a/in-cluster/default.yaml +++ b/in-cluster/default.yaml @@ -375,7 +375,7 @@ spec: checkName: Etcdserver Database Size Exceeded exclude: "" ignoreIfNoFiles: true - fileName: * + fileName: "*" regex: '(etcdserver)?.*mvcc.*database space exceeded' outcomes: - fail: From bb3182678d541fe1217a7cd8453a9f8abc51cd16 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Wed, 7 Aug 2024 11:26:32 -0400 Subject: [PATCH 4/4] fix syntax yet again --- in-cluster/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-cluster/default.yaml b/in-cluster/default.yaml index db8fb69..d38a55c 100644 --- a/in-cluster/default.yaml +++ b/in-cluster/default.yaml @@ -380,7 +380,7 @@ spec: outcomes: - fail: when: "true" - message: etcdserver database has grown too large. See https://community.replicated.com/t/kubernetes-cluster-is-down-and-reporting-etcdserver-mvcc-database-size-exceeded/1428 + message: "etcdserver database has grown too large. See https://community.replicated.com/t/kubernetes-cluster-is-down-and-reporting-etcdserver-mvcc-database-size-exceeded/1428" - pass: when: "false" message: etcdserver database is not too large