From fd42f0e07f6428765263d4a15555a513509f2c23 Mon Sep 17 00:00:00 2001 From: Grace Wehner Date: Tue, 28 May 2024 18:57:45 -0700 Subject: [PATCH] Test: fix flaky test timeouts (#897) --- .pipelines/azure-pipeline-build.yml | 2 +- .../test/containerstatus/container_status_test.go | 2 +- otelcollector/test/livenessprobe/liveness_test.go | 12 ++++++------ .../test/prometheusui/prometheus_ui_test.go | 9 +++++++-- .../test/querymetrics/query_metrics_test.go | 10 +++++----- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 45eb9a6da..879e8b532 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -1638,7 +1638,7 @@ stages: displayName: "Apply TestKube CRs, scrape configs and pod/service monitors" - bash: | - sleep 300 + sleep 360 exit 0 displayName: "Wait for cluster to be ready" diff --git a/otelcollector/test/containerstatus/container_status_test.go b/otelcollector/test/containerstatus/container_status_test.go index da2a8bc5b..9653c94b7 100644 --- a/otelcollector/test/containerstatus/container_status_test.go +++ b/otelcollector/test/containerstatus/container_status_test.go @@ -106,7 +106,7 @@ var _ = DescribeTable("All processes are running", "MonAgentCore", }, Label(utils.WindowsLabel), - FlakeAttempts(1), + FlakeAttempts(3), ), ) diff --git a/otelcollector/test/livenessprobe/liveness_test.go b/otelcollector/test/livenessprobe/liveness_test.go index 1f5faf876..de5f275d5 100644 --- a/otelcollector/test/livenessprobe/liveness_test.go +++ b/otelcollector/test/livenessprobe/liveness_test.go @@ -57,13 +57,13 @@ var _ = Describe("When the daemonset prometheus-collector container liveness pro time.Sleep(180 * time.Second) }, Entry("otelcollector is not running, the container should restart", "kube-system", "dsName", "ama-metrics-node", "prometheus-collector", - "OpenTelemetryCollector is not running", "otelcollector", int64(180), FlakeAttempts(1), + "OpenTelemetryCollector is not running", "otelcollector", int64(180), FlakeAttempts(2), ), Entry("MetricsExtension is not running, the container should restart", "kube-system", "dsName", "ama-metrics-node", "prometheus-collector", - "Metrics Extension is not running (configuration exists)", "MetricsExtension", int64(180), FlakeAttempts(1), + "Metrics Extension is not running (configuration exists)", "MetricsExtension", int64(180), FlakeAttempts(2), ), Entry("mdsd is not running, the container should restart", "kube-system", "dsName", "ama-metrics-node", "prometheus-collector", - "mdsd is not running (configuration exists)", "mdsd -a -A -e", int64(180), FlakeAttempts(1), + "mdsd is not running (configuration exists)", "mdsd -a -A -e", int64(180), FlakeAttempts(2), ), ) @@ -89,13 +89,13 @@ var _ = Describe("When the windows prometheus-collector container liveness probe time.Sleep(240 * time.Second) }, Entry("otelcollector is not running, the container should restart", "kube-system", "dsName", "ama-metrics-win-node", "prometheus-collector", - "", "otelcollector", int64(300), FlakeAttempts(1), + "", "otelcollector", int64(300), FlakeAttempts(2), ), Entry("MetricsExtension is not running, the container should restart", "kube-system", "dsName", "ama-metrics-win-node", "prometheus-collector", - "", "MetricsExtension.Native", int64(300), FlakeAttempts(1), + "", "MetricsExtension.Native", int64(300), FlakeAttempts(2), ), Entry("mdsd is not running, the container should restart", "kube-system", "dsName", "ama-metrics-win-node", "prometheus-collector", - "", "MonAgentLauncher", int64(300), FlakeAttempts(1), + "", "MonAgentLauncher", int64(300), FlakeAttempts(2), ), ) diff --git a/otelcollector/test/prometheusui/prometheus_ui_test.go b/otelcollector/test/prometheusui/prometheus_ui_test.go index e0231e956..ccc487d33 100644 --- a/otelcollector/test/prometheusui/prometheus_ui_test.go +++ b/otelcollector/test/prometheusui/prometheus_ui_test.go @@ -64,6 +64,7 @@ var _ = DescribeTable("The Prometheus UI API should return the scrape pools", */ var _ = DescribeTable("The Prometheus UI API should return a valid config", func(namespace string, controllerLabelName string, controllerLabelValue string, containerName string, isLinux bool) { + time.Sleep(30 * time.Second) var apiResponse utils.APIResponse err := utils.QueryPromUIFromPod(K8sClient, Cfg, namespace, controllerLabelName, controllerLabelValue, containerName, "/api/v1/status/config", isLinux, &apiResponse) Expect(err).NotTo(HaveOccurred()) @@ -88,6 +89,8 @@ var _ = DescribeTable("The Prometheus UI API should return a valid config", */ var _ = DescribeTable("The Prometheus UI API should return the targets", func(namespace string, controllerLabelName string, controllerLabelValue string, containerName string, isLinux bool) { + time.Sleep(60 * time.Second) + var apiResponse utils.APIResponse err := utils.QueryPromUIFromPod(K8sClient, Cfg, namespace, controllerLabelName, controllerLabelValue, containerName, "/api/v1/targets", isLinux, &apiResponse) Expect(err).NotTo(HaveOccurred()) @@ -115,6 +118,8 @@ var _ = DescribeTable("The Prometheus UI API should return the targets", */ var _ = DescribeTable("The Prometheus UI API should return the targets metadata", func(namespace string, controllerLabelName string, controllerLabelValue string, containerName string, isLinux bool) { + time.Sleep(90 * time.Second) + var apiResponse utils.APIResponse queryPath := "/api/v1/targets/metadata?match_target=\\{job=\\\"prometheus_ref_app\\\"\\}" if !isLinux { @@ -148,7 +153,7 @@ var _ = DescribeTable("The Prometheus UI API should return the targets metadata" var _ = DescribeTable("The Prometheus UI should return the /metrics data", func(namespace string, controllerLabelName string, controllerLabelValue string, containerName string, isLinux bool) { - time.Sleep(60 * time.Second) + time.Sleep(120 * time.Second) pods, err := utils.GetPodsWithLabel(K8sClient, namespace, controllerLabelName, controllerLabelValue) Expect(err).NotTo(HaveOccurred()) @@ -181,7 +186,7 @@ var _ = DescribeTable("The Prometheus UI API should return the targets metadata" var _ = DescribeTable("The Prometheus UI should return a 200 for its UI pages", func(namespace string, controllerLabelName string, controllerLabelValue string, containerName string, isLinux bool, uiPaths []string) { - time.Sleep(90 * time.Second) + time.Sleep(180 * time.Second) pods, err := utils.GetPodsWithLabel(K8sClient, namespace, controllerLabelName, controllerLabelValue) Expect(err).NotTo(HaveOccurred()) diff --git a/otelcollector/test/querymetrics/query_metrics_test.go b/otelcollector/test/querymetrics/query_metrics_test.go index 8afe57d59..1d5770a2b 100644 --- a/otelcollector/test/querymetrics/query_metrics_test.go +++ b/otelcollector/test/querymetrics/query_metrics_test.go @@ -24,8 +24,8 @@ var _ = Describe("Query Metrics Test Suite", func() { Expect(warnings).To(BeEmpty()) vectorResult, ok := result.(model.Vector) - Expect(ok).To(BeTrue(), "result should be of type model.Vector") - Expect(vectorResult).NotTo(BeEmpty(), "result should not be empty") + Expect(ok).To(BeTrue(), "result should be of type model.Vector for metric %s", metric) + Expect(vectorResult).NotTo(BeEmpty(), "Metric %s is missing", metric) found := false noClusterLabel := false @@ -200,9 +200,9 @@ var _ = Describe("Query Metrics Test Suite", func() { "kube_node_status_condition", // "kube_node_spec_taint", "kube_pod_container_info", - "kube_persistentvolumeclaim_access_mode", - "kube_persistentvolumeclaim_labels", - "kube_persistentvolume_status_phase", + // "kube_persistentvolumeclaim_access_mode", + // "kube_persistentvolumeclaim_labels", + // "kube_persistentvolume_status_phase", }), )