From a48b245f6ca416469926d0e845105288b02c185e Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 12:48:34 +0200 Subject: [PATCH 01/40] update deps --- go.mod | 5 +++-- go.sum | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index dfc7dfb..c8bb477 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,8 @@ module github.com/logzio/prometheus-alerts-migrator go 1.19 require ( - github.com/logzio/logzio_terraform_client v1.19.0 + github.com/logzio/logzio_terraform_client v1.20.0 + github.com/prometheus/alertmanager v0.26.0 github.com/prometheus/common v0.44.0 github.com/prometheus/prometheus v0.47.2 github.com/stretchr/testify v1.8.4 @@ -21,7 +22,7 @@ require ( github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 // indirect github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/avast/retry-go v3.0.0+incompatible // indirect - github.com/aws/aws-sdk-go v1.44.302 // indirect + github.com/aws/aws-sdk-go v1.44.317 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect diff --git a/go.sum b/go.sum index 50fac99..b499265 100644 --- a/go.sum +++ b/go.sum @@ -62,8 +62,8 @@ github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= -github.com/aws/aws-sdk-go v1.44.302 h1:ST3ko6GrJKn3Xi+nAvxjG3uk/V1pW8KC52WLeIxqqNk= -github.com/aws/aws-sdk-go v1.44.302/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.317 h1:+8XWrLmGMwPPXSRSLPzhgcGnzJ2mYkgkrcB9C/GnSOU= +github.com/aws/aws-sdk-go v1.44.317/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -264,8 +264,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/linode/linodego v1.19.0 h1:n4WJrcr9+30e9JGZ6DI0nZbm5SdAj1kSwvvt/998YUw= -github.com/logzio/logzio_terraform_client v1.19.0 h1:PV6q/ezMtzljjVDq0rEWiG7M5CxCdu7csh6ndOztZSI= -github.com/logzio/logzio_terraform_client v1.19.0/go.mod h1:hEQixCq9RPpvyzWerxIWKf0SYgangyWpPeogN7nytC0= +github.com/logzio/logzio_terraform_client v1.20.0 h1:0eynfD4nDB5H7pNwsodWeff6fh4Ccd7Cj8DGaWwRnyU= +github.com/logzio/logzio_terraform_client v1.20.0/go.mod h1:hEQixCq9RPpvyzWerxIWKf0SYgangyWpPeogN7nytC0= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= @@ -304,6 +304,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/alertmanager v0.26.0 h1:uOMJWfIwJguc3NaM3appWNbbrh6G/OjvaHMk22aBBYc= +github.com/prometheus/alertmanager v0.26.0/go.mod h1:rVcnARltVjavgVaNnmevxK7kOn7IZavyf0KNgHkbEpU= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= From 66dcf3d6c5140b475df57e160cb85e74c87b3432 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 12:50:01 +0200 Subject: [PATCH 02/40] move `utils.go` -> `common.go` --- controller/utils.go => common/common.go | 38 ++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) rename controller/utils.go => common/common.go (73%) diff --git a/controller/utils.go b/common/common.go similarity index 73% rename from controller/utils.go rename to common/common.go index 681c98d..1e8423d 100644 --- a/controller/utils.go +++ b/common/common.go @@ -1,24 +1,30 @@ -package controller +package common import ( "fmt" + "github.com/logzio/logzio_terraform_client/grafana_alerts" + "github.com/prometheus/prometheus/model/rulefmt" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/homedir" "math/rand" "os" "path/filepath" "reflect" "strconv" "time" +) - "github.com/logzio/logzio_terraform_client/grafana_alerts" - "github.com/prometheus/prometheus/model/rulefmt" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/util/homedir" +const ( + LetterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + letterIdxBits = 6 // 6 bits to represent a letter index + letterIdxMask = 1<>= letterIdxBits @@ -39,8 +45,8 @@ func generateRandomString(n int) string { return string(b) } -// parseDuration turns a duration string (example: 5m, 1h) into an int64 value -func parseDuration(durationStr string) (int64, error) { +// ParseDuration turns a duration string (example: 5m, 1h) into an int64 value +func ParseDuration(durationStr string) (int64, error) { // Check if the string is empty if durationStr == "" { return 0, fmt.Errorf("duration string is empty") @@ -62,7 +68,7 @@ func parseDuration(durationStr string) (int64, error) { return int64(duration), nil } -func createNameStub(cm *corev1.ConfigMap) string { +func CreateNameStub(cm *corev1.ConfigMap) string { name := cm.GetObjectMeta().GetName() namespace := cm.GetObjectMeta().GetNamespace() @@ -71,7 +77,7 @@ func createNameStub(cm *corev1.ConfigMap) string { // isAlertEqual compares two AlertRule objects for equality. // You should expand this function to compare all relevant fields of AlertRule. -func isAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlertRule) bool { +func IsAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlertRule) bool { // Start with name comparison; if these don't match, they're definitely not equal. if rule.Alert.Value != grafanaRule.Title { return false @@ -82,7 +88,7 @@ func isAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlert if !reflect.DeepEqual(rule.Annotations, grafanaRule.Annotations) { return false } - forAtt, _ := parseDuration(rule.For.String()) + forAtt, _ := ParseDuration(rule.For.String()) if forAtt != grafanaRule.For { return false } From 653e82a0116f48765ad3268110c3d416dc1e0785 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 12:50:23 +0200 Subject: [PATCH 03/40] create `LogzioGrafanaAlertsClient` struct --- controller/controller.go | 324 ++++--------------- logzio_alerts_client/logzio_alerts_client.go | 269 +++++++++++++++ 2 files changed, 330 insertions(+), 263 deletions(-) create mode 100644 logzio_alerts_client/logzio_alerts_client.go diff --git a/controller/controller.go b/controller/controller.go index dad7991..8e24316 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -2,13 +2,12 @@ package controller import ( "context" - "encoding/json" "fmt" + "github.com/logzio/prometheus-alerts-migrator/common" + "github.com/logzio/prometheus-alerts-migrator/logzio_alerts_client" "time" "github.com/logzio/logzio_terraform_client/grafana_alerts" - "github.com/logzio/logzio_terraform_client/grafana_datasources" - "github.com/logzio/logzio_terraform_client/grafana_folders" "github.com/prometheus/prometheus/model/rulefmt" _ "github.com/prometheus/prometheus/promql/parser" "gopkg.in/yaml.v3" @@ -30,55 +29,10 @@ import ( ) const ( - alertFolder = "prometheus-alerts" controllerAgentName = "logzio-prometheus-alerts-migrator-controller" ErrInvalidKey = "InvalidKey" - letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" - letterIdxBits = 6 // 6 bits to represent a letter index - letterIdxMask = 1< 0 { - c.writeRules(toAdd, folderUid) + c.logzioGrafanaAlertsClient.WriteRules(toAdd, folderUid) } if len(toUpdate) > 0 { - c.updateRules(toUpdate, logzioRulesMap, folderUid) + c.logzioGrafanaAlertsClient.UpdateRules(toUpdate, logzioRulesMap, folderUid) } if len(toDelete) > 0 { - c.deleteRules(toDelete, folderUid) + c.logzioGrafanaAlertsClient.DeleteRules(toDelete, folderUid) } return nil } -// deleteRules deletes the rules from logz.io -func (c *Controller) deleteRules(rulesToDelete []grafana_alerts.GrafanaAlertRule, folderUid string) { - for _, rule := range rulesToDelete { - err := c.logzioAlertClient.DeleteGrafanaAlertRule(rule.Uid) - if err != nil { - klog.Warningf("Error deleting rule: %s - %s", rule.Title, err.Error()) - } - } -} - -// updateRules updates the rules in logz.io -func (c *Controller) updateRules(rulesToUpdate []rulefmt.RuleNode, logzioRulesMap map[string]grafana_alerts.GrafanaAlertRule, folderUid string) { - for _, rule := range rulesToUpdate { - // Retrieve the existing GrafanaAlertRule to get the Uid. - existingRule := logzioRulesMap[rule.Alert.Value] - alert, err := c.generateGrafanaAlert(rule, folderUid) - if err != nil { - klog.Warning(err) - continue // Skip this rule and continue with the next - } - // Set the Uid from the existing rule. - alert.Uid = existingRule.Uid - err = c.logzioAlertClient.UpdateGrafanaAlertRule(alert) - if err != nil { - klog.Warningf("Error updating rule: %s - %s", alert.Title, err.Error()) - } - } -} - -// writeRules writes the rules to logz.io -func (c *Controller) writeRules(rulesToWrite []rulefmt.RuleNode, folderUid string) { - for _, rule := range rulesToWrite { - alert, err := c.generateGrafanaAlert(rule, folderUid) - if err != nil { - klog.Warning(err) - } - _, err = c.logzioAlertClient.CreateGrafanaAlertRule(alert) - if err != nil { - klog.Warning("Error writing rule:", alert.Title, err.Error()) - } - } -} - -// generateGrafanaAlert generates a GrafanaAlertRule from a Prometheus rule -func (c *Controller) generateGrafanaAlert(rule rulefmt.RuleNode, folderUid string) (grafana_alerts.GrafanaAlertRule, error) { - // Create promql query to return time series data for the expression. - promqlQuery := PrometheusQueryModel{ - Expr: rule.Expr.Value, - Hide: false, - RefId: refIdA, - } - // Use the ToJSON method to marshal the Query struct. - promqlModel, err := promqlQuery.ToJSON() - if err != nil { - return grafana_alerts.GrafanaAlertRule{}, err - } - queryA := grafana_alerts.GrafanaAlertQuery{ - DatasourceUid: c.rulesDataSource, - Model: promqlModel, - RefId: refIdA, - QueryType: queryType, - RelativeTimeRange: grafana_alerts.RelativeTimeRangeObj{ - From: 300, - To: 0, - }, - } - // Create reduce query to return the reduced last value of the time series data. - reduceQuery := ReduceQueryModel{ - DataSource: map[string]string{ - "type": expressionString, - "uid": expressionString, - }, - Expression: refIdA, - Hide: false, - RefId: refIdB, - Reducer: "last", - Type: "reduce", - } - reduceModel, err := reduceQuery.ToJSON() - if err != nil { - return grafana_alerts.GrafanaAlertRule{}, err - } - queryB := grafana_alerts.GrafanaAlertQuery{ - DatasourceUid: expressionString, - Model: reduceModel, - RefId: refIdB, - QueryType: "", - RelativeTimeRange: grafana_alerts.RelativeTimeRangeObj{ - From: 300, - To: 0, - }, - } - duration, err := parseDuration(rule.For.String()) - if err != nil { - return grafana_alerts.GrafanaAlertRule{}, err - } - - // Create the GrafanaAlertRule, we are alerting on the reduced last value of the time series data (query b). - grafanaAlert := grafana_alerts.GrafanaAlertRule{ - Annotations: rule.Annotations, - Condition: refIdB, - Data: []*grafana_alerts.GrafanaAlertQuery{&queryA, &queryB}, - FolderUID: folderUid, - NoDataState: grafana_alerts.NoDataOk, - ExecErrState: grafana_alerts.ErrOK, - Labels: rule.Labels, - OrgID: 1, - RuleGroup: rule.Alert.Value, - Title: rule.Alert.Value, - For: duration, - } - return grafanaAlert, nil -} - // enqueueConfigMap get the cm on the workqueue func (c *Controller) enqueueConfigMap(obj interface{}) { var key string @@ -497,50 +322,10 @@ func (c *Controller) getClusterAlertRules(mapList *corev1.ConfigMapList) *[]rule return &finalRules } -// getLogzioGrafanaAlerts builds a list of rules from all logz.io -func (c *Controller) getLogzioGrafanaAlerts(folderUid string) ([]grafana_alerts.GrafanaAlertRule, error) { - alertRules, ListLogzioRulesErr := c.logzioAlertClient.ListGrafanaAlertRules() - if ListLogzioRulesErr != nil { - return nil, ListLogzioRulesErr - } - // find all alerts inside prometheus alerts folder - var alertsInFolder []grafana_alerts.GrafanaAlertRule - for _, rule := range alertRules { - if rule.FolderUID == folderUid { - alertsInFolder = append(alertsInFolder, rule) - } - } - return alertsInFolder, nil -} - -// findOrCreatePrometheusAlertsFolder tries to find the prometheus alerts folder in logz.io, if it does not exist it creates it. -func (c *Controller) findOrCreatePrometheusAlertsFolder() (string, error) { - folders, err := c.logzioFolderClient.ListGrafanaFolders() - if err != nil { - return "", err - } - envFolderTitle := fmt.Sprintf("%s-%s", c.envId, alertFolder) - // try to find the prometheus alerts folder - for _, folder := range folders { - if folder.Title == envFolderTitle { - return folder.Uid, nil - } - } - // if not found, create the prometheus alerts folder - grafanaFolder, err := c.logzioFolderClient.CreateGrafanaFolder(grafana_folders.CreateUpdateFolder{ - Uid: fmt.Sprintf("%s-%s", envFolderTitle, generateRandomString(randomStringLength)), - Title: envFolderTitle, - }) - if err != nil { - return "", err - } - return grafanaFolder.Uid, nil -} - // extractValues extracts the rules from the configmap, and validates them func (c *Controller) extractValues(cm *corev1.ConfigMap) []rulefmt.RuleNode { - fallbackNameStub := createNameStub(cm) + fallbackNameStub := common.CreateNameStub(cm) var toalRules []rulefmt.RuleNode @@ -604,7 +389,7 @@ func (c *Controller) compareAlertRules(k8sRulesMap map[string]rulefmt.RuleNode, if !exists { // Alert doesn't exist in Logz.io, needs to be added. toAdd = append(toAdd, k8sRule) - } else if !isAlertEqual(k8sRule, logzioRule) { + } else if !common.IsAlertEqual(k8sRule, logzioRule) { // Alert exists but differs, needs to be updated. toUpdate = append(toUpdate, k8sRule) } @@ -629,11 +414,24 @@ func (c *Controller) isRuleConfigMap(cm *corev1.ConfigMap) bool { annotations := cm.GetObjectMeta().GetAnnotations() for key := range annotations { - if key == *c.interestingAnnotation { + if key == *c.rulesAnnotation { return true } } + return false +} +// isAlertManagerConfigMap checks if the configmap is a rule configmap +func (c *Controller) isAlertManagerConfigMap(cm *corev1.ConfigMap) bool { + if cm == nil { + return false + } + annotations := cm.GetObjectMeta().GetAnnotations() + for key := range annotations { + if key == *c.alertManagerAnnotation { + return true + } + } return false } @@ -645,8 +443,8 @@ func (c *Controller) haveConfigMapsChanged(mapList *corev1.ConfigMapList) bool { return false } for _, cm := range mapList.Items { - if c.isRuleConfigMap(&cm) { - stub := createNameStub(&cm) + if c.isRuleConfigMap(&cm) || c.isAlertManagerConfigMap(&cm) { + stub := common.CreateNameStub(&cm) val, ok := c.resourceVersionMap[stub] if !ok { // new configmap diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go new file mode 100644 index 0000000..df28230 --- /dev/null +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -0,0 +1,269 @@ +package logzio_alerts_client + +import ( + "encoding/json" + "fmt" + "github.com/logzio/logzio_terraform_client/grafana_alerts" + "github.com/logzio/logzio_terraform_client/grafana_contact_points" + "github.com/logzio/logzio_terraform_client/grafana_datasources" + "github.com/logzio/logzio_terraform_client/grafana_folders" + "github.com/logzio/logzio_terraform_client/grafana_notification_policies" + "github.com/logzio/prometheus-alerts-migrator/common" + "github.com/prometheus/prometheus/model/rulefmt" + "k8s.io/klog" +) + +const ( + refIdA = "A" + refIdB = "B" + expressionString = "__expr__" + queryType = "query" + alertFolder = "prometheus-alerts" + randomStringLength = 5 +) + +// ReduceQueryModel represents a reduce query for time series data +type ReduceQueryModel struct { + DataSource map[string]string `json:"datasource"` + Expression string `json:"expression"` + Hide bool `json:"hide"` + RefId string `json:"refId"` + Reducer string `json:"reducer"` + Type string `json:"type"` +} + +// ToJSON marshals the Query model into a JSON byte slice +func (r ReduceQueryModel) ToJSON() (json.RawMessage, error) { + marshaled, err := json.Marshal(r) + if err != nil { + return nil, err + } + return marshaled, nil +} + +// PrometheusQueryModel represents a Prometheus query. +type PrometheusQueryModel struct { + Expr string `json:"expr"` + Hide bool `json:"hide"` + RefId string `json:"refId"` +} + +// ToJSON marshals the Query into a JSON byte slice +func (p PrometheusQueryModel) ToJSON() (json.RawMessage, error) { + marshaled, err := json.Marshal(p) + if err != nil { + return nil, err + } + return marshaled, nil +} + +type LogzioGrafanaAlertsClient struct { + logzioAlertClient *grafana_alerts.GrafanaAlertClient + logzioFolderClient *grafana_folders.GrafanaFolderClient + logzioDataSourceClient *grafana_datasources.GrafanaDatasourceClient + logzioContactPointClient *grafana_contact_points.GrafanaContactPointClient + logzioNotificationPolicyClient *grafana_notification_policies.GrafanaNotificationPolicyClient + rulesDataSource string + envId string +} + +func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, rulesDs string, envId string) *LogzioGrafanaAlertsClient { + logzioAlertClient, err := grafana_alerts.New(logzioApiToken, logzioApiUrl) + if err != nil { + klog.Errorf("Failed to create logzio alert client: %v", err) + return nil + } + logzioFolderClient, err := grafana_folders.New(logzioApiToken, logzioApiUrl) + if err != nil { + klog.Errorf("Failed to create logzio folder client: %v", err) + return nil + } + logzioDataSourceClient, err := grafana_datasources.New(logzioApiToken, logzioApiUrl) + if err != nil { + klog.Errorf("Failed to create logzio datasource client: %v", err) + return nil + } + logzioContactPointClient, err := grafana_contact_points.New(logzioApiToken, logzioApiUrl) + if err != nil { + klog.Errorf("Failed to create logzio contact point client: %v", err) + return nil + } + logzioNotificationPolicyClient, err := grafana_notification_policies.New(logzioApiToken, logzioApiUrl) + if err != nil { + klog.Errorf("Failed to create logzio notification policy client: %v", err) + return nil + } + // get datasource uid and validate value and type + rulesDsData, err := logzioDataSourceClient.GetForAccount(rulesDs) + if err != nil || rulesDsData.Uid == "" { + klog.Errorf("Failed to get datasource uid: %v", err) + return nil + } + if rulesDsData.Type != "prometheus" { + klog.Errorf("Datasource type is not prometheus: %v", err) + return nil + } + return &LogzioGrafanaAlertsClient{ + logzioAlertClient: logzioAlertClient, + logzioFolderClient: logzioFolderClient, + logzioDataSourceClient: logzioDataSourceClient, + logzioContactPointClient: logzioContactPointClient, + logzioNotificationPolicyClient: logzioNotificationPolicyClient, + rulesDataSource: rulesDsData.Uid, + envId: envId, + } +} + +// DeleteRules deletes the rules from logz.io +func (l *LogzioGrafanaAlertsClient) DeleteRules(rulesToDelete []grafana_alerts.GrafanaAlertRule, folderUid string) { + for _, rule := range rulesToDelete { + err := l.logzioAlertClient.DeleteGrafanaAlertRule(rule.Uid) + if err != nil { + klog.Warningf("Error deleting rule: %s - %s", rule.Title, err.Error()) + } + } +} + +// UpdateRules updates the rules in logz.io +func (l *LogzioGrafanaAlertsClient) UpdateRules(rulesToUpdate []rulefmt.RuleNode, logzioRulesMap map[string]grafana_alerts.GrafanaAlertRule, folderUid string) { + for _, rule := range rulesToUpdate { + // Retrieve the existing GrafanaAlertRule to get the Uid. + existingRule := logzioRulesMap[rule.Alert.Value] + alert, err := l.generateGrafanaAlert(rule, folderUid) + if err != nil { + klog.Warning(err) + continue // Skip this rule and continue with the next + } + // Set the Uid from the existing rule. + alert.Uid = existingRule.Uid + err = l.logzioAlertClient.UpdateGrafanaAlertRule(alert) + if err != nil { + klog.Warningf("Error updating rule: %s - %s", alert.Title, err.Error()) + } + } +} + +// WriteRules writes the rules to logz.io +func (l *LogzioGrafanaAlertsClient) WriteRules(rulesToWrite []rulefmt.RuleNode, folderUid string) { + for _, rule := range rulesToWrite { + alert, err := l.generateGrafanaAlert(rule, folderUid) + if err != nil { + klog.Warning(err) + } + _, err = l.logzioAlertClient.CreateGrafanaAlertRule(alert) + if err != nil { + klog.Warning("Error writing rule:", alert.Title, err.Error()) + } + } +} + +// generateGrafanaAlert generates a GrafanaAlertRule from a Prometheus rule +func (l *LogzioGrafanaAlertsClient) generateGrafanaAlert(rule rulefmt.RuleNode, folderUid string) (grafana_alerts.GrafanaAlertRule, error) { + // Create promql query to return time series data for the expression. + promqlQuery := PrometheusQueryModel{ + Expr: rule.Expr.Value, + Hide: false, + RefId: refIdA, + } + // Use the ToJSON method to marshal the Query struct. + promqlModel, err := promqlQuery.ToJSON() + if err != nil { + return grafana_alerts.GrafanaAlertRule{}, err + } + queryA := grafana_alerts.GrafanaAlertQuery{ + DatasourceUid: l.rulesDataSource, + Model: promqlModel, + RefId: refIdA, + QueryType: queryType, + RelativeTimeRange: grafana_alerts.RelativeTimeRangeObj{ + From: 300, + To: 0, + }, + } + // Create reduce query to return the reduced last value of the time series data. + reduceQuery := ReduceQueryModel{ + DataSource: map[string]string{ + "type": expressionString, + "uid": expressionString, + }, + Expression: refIdA, + Hide: false, + RefId: refIdB, + Reducer: "last", + Type: "reduce", + } + reduceModel, err := reduceQuery.ToJSON() + if err != nil { + return grafana_alerts.GrafanaAlertRule{}, err + } + queryB := grafana_alerts.GrafanaAlertQuery{ + DatasourceUid: expressionString, + Model: reduceModel, + RefId: refIdB, + QueryType: "", + RelativeTimeRange: grafana_alerts.RelativeTimeRangeObj{ + From: 300, + To: 0, + }, + } + duration, err := common.ParseDuration(rule.For.String()) + if err != nil { + return grafana_alerts.GrafanaAlertRule{}, err + } + + // Create the GrafanaAlertRule, we are alerting on the reduced last value of the time series data (query B). + grafanaAlert := grafana_alerts.GrafanaAlertRule{ + Annotations: rule.Annotations, + Condition: refIdB, + Data: []*grafana_alerts.GrafanaAlertQuery{&queryA, &queryB}, + FolderUID: folderUid, + NoDataState: grafana_alerts.NoDataOk, + ExecErrState: grafana_alerts.ErrOK, + Labels: rule.Labels, + OrgID: 1, + RuleGroup: rule.Alert.Value, + Title: rule.Alert.Value, + For: duration, + } + return grafanaAlert, nil +} + +// GetLogzioGrafanaAlerts builds a list of rules from all logz.io +func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaAlerts(folderUid string) ([]grafana_alerts.GrafanaAlertRule, error) { + alertRules, ListLogzioRulesErr := l.logzioAlertClient.ListGrafanaAlertRules() + if ListLogzioRulesErr != nil { + return nil, ListLogzioRulesErr + } + // find all alerts inside prometheus alerts folder + var alertsInFolder []grafana_alerts.GrafanaAlertRule + for _, rule := range alertRules { + if rule.FolderUID == folderUid { + alertsInFolder = append(alertsInFolder, rule) + } + } + return alertsInFolder, nil +} + +// FindOrCreatePrometheusAlertsFolder tries to find the prometheus alerts folder in logz.io, if it does not exist it creates it. +func (l *LogzioGrafanaAlertsClient) FindOrCreatePrometheusAlertsFolder() (string, error) { + folders, err := l.logzioFolderClient.ListGrafanaFolders() + if err != nil { + return "", err + } + envFolderTitle := fmt.Sprintf("%s-%s", l.envId, alertFolder) + // try to find the prometheus alerts folder + for _, folder := range folders { + if folder.Title == envFolderTitle { + return folder.Uid, nil + } + } + // if not found, create the prometheus alerts folder + grafanaFolder, err := l.logzioFolderClient.CreateGrafanaFolder(grafana_folders.CreateUpdateFolder{ + Uid: fmt.Sprintf("%s-%s", envFolderTitle, common.GenerateRandomString(randomStringLength)), + Title: envFolderTitle, + }) + if err != nil { + return "", err + } + return grafanaFolder.Uid, nil +} From 242314e0cb55a3aa410fe02af2b306db780200b0 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 12:53:21 +0200 Subject: [PATCH 04/40] upgrade to `klog/v2` --- controller/controller.go | 2 +- go.mod | 5 ++--- go.sum | 9 ++++----- logzio_alerts_client/logzio_alerts_client.go | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 8e24316..86db929 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -18,7 +18,7 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" - "k8s.io/klog" + "k8s.io/klog/v2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/go.mod b/go.mod index c8bb477..dc1a17a 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( k8s.io/api v0.28.3 k8s.io/apimachinery v0.28.3 k8s.io/client-go v0.28.3 - k8s.io/klog v1.0.0 + k8s.io/klog/v2 v2.110.1 ) require ( @@ -31,7 +31,7 @@ require ( github.com/emicklei/go-restful/v3 v3.10.2 // indirect github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect - github.com/go-logr/logr v1.2.4 // indirect + github.com/go-logr/logr v1.3.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.20.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect @@ -86,7 +86,6 @@ require ( google.golang.org/protobuf v1.31.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/klog/v2 v2.100.1 // indirect k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect k8s.io/utils v0.0.0-20230711102312-30195339c3c7 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index b499265..b3a69c7 100644 --- a/go.sum +++ b/go.sum @@ -117,10 +117,9 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= @@ -707,8 +706,8 @@ k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4= k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo= k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= -k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= -k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM= k8s.io/utils v0.0.0-20230711102312-30195339c3c7 h1:ZgnF1KZsYxWIifwSNZFZgNtWE89WI5yiP5WwlfDoIyc= diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index df28230..f353c1c 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -10,7 +10,7 @@ import ( "github.com/logzio/logzio_terraform_client/grafana_notification_policies" "github.com/logzio/prometheus-alerts-migrator/common" "github.com/prometheus/prometheus/model/rulefmt" - "k8s.io/klog" + "k8s.io/klog/v2" ) const ( From b8544571b1b15a557588f1b9156a4bf2ab724998 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:34:05 +0200 Subject: [PATCH 05/40] go mod tidy --- go.sum | 3 --- 1 file changed, 3 deletions(-) diff --git a/go.sum b/go.sum index b3a69c7..404fc03 100644 --- a/go.sum +++ b/go.sum @@ -116,7 +116,6 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -704,8 +703,6 @@ k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A= k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8= k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4= k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo= -k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= -k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= From 0ec93d40ca371bc680a69e99cc7633449a5cecba Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:34:20 +0200 Subject: [PATCH 06/40] add `AlertManagerAnnotation` to config --- main.go | 54 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/main.go b/main.go index bee5ea4..8cdf25a 100644 --- a/main.go +++ b/main.go @@ -2,13 +2,14 @@ package main import ( "flag" + "github.com/logzio/prometheus-alerts-migrator/common" "os" "strconv" "time" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" - "k8s.io/klog" + "k8s.io/klog/v2" "github.com/logzio/prometheus-alerts-migrator/controller" "github.com/logzio/prometheus-alerts-migrator/pkg/signals" @@ -16,19 +17,21 @@ import ( // Config holds all the configuration needed for the application to run. type Config struct { - Annotation string - LogzioAPIToken string - LogzioAPIURL string - RulesDS string - EnvID string - WorkerCount int + RulesAnnotation string + AlertManagerAnnotation string + LogzioAPIToken string + LogzioAPIURL string + RulesDS string + EnvID string + WorkerCount int } // NewConfig creates a Config struct, populating it with values from command-line flags and environment variables. func NewConfig() *Config { // Define flags helpFlag := flag.Bool("help", false, "Display help") - configmapAnnotation := flag.String("annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") + rulesConfigmapAnnotation := flag.String("rules-annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") + alertManagerConfigmapAnnotation := flag.String("alertmanager-annotation", "prometheus.io/kube-alertmanager", "Annotation that states that this configmap contains alertmanager configuration") logzioAPITokenFlag := flag.String("logzio-api-token", "", "LOGZIO API token") logzioAPIURLFlag := flag.String("logzio-api-url", "https://api.logz.io", "LOGZIO API URL") rulesDSFlag := flag.String("rules-ds", "", "name of the data source for the alert rules") @@ -56,9 +59,14 @@ func NewConfig() *Config { klog.Fatal("No rules data source provided") } // Annotation must be provided either by flag or environment variable - annotation := getEnvWithFallback("CONFIGMAP_ANNOTATION", *configmapAnnotation) - if annotation == "" { - klog.Fatal("No ConfigMap annotation provided") + rulesAnnotation := getEnvWithFallback("RULES_CONFIGMAP_ANNOTATION", *rulesConfigmapAnnotation) + if rulesAnnotation == "" { + klog.Fatal("No rules configmap annotation provided") + } + // Annotation must be provided either by flag or environment variable + alertManagerAnnotation := getEnvWithFallback("ALERTMANAGER_CONFIGMAP_ANNOTATION", *alertManagerConfigmapAnnotation) + if alertManagerAnnotation == "" { + klog.Fatal("No alert manager configmap annotation provided") } workerCountStr := getEnvWithFallback("WORKERS_COOUNT", strconv.Itoa(*workerCountFlag)) workerCount, err := strconv.Atoi(workerCountStr) @@ -67,12 +75,13 @@ func NewConfig() *Config { } return &Config{ - Annotation: annotation, - LogzioAPIToken: logzioAPIToken, - LogzioAPIURL: logzioAPIURL, - RulesDS: rulesDS, - EnvID: envID, - WorkerCount: workerCount, + RulesAnnotation: rulesAnnotation, + AlertManagerAnnotation: alertManagerAnnotation, + LogzioAPIToken: logzioAPIToken, + LogzioAPIURL: logzioAPIURL, + RulesDS: rulesDS, + EnvID: envID, + WorkerCount: workerCount, } } @@ -88,7 +97,8 @@ func main() { config := NewConfig() klog.Info("Rule Updater starting.\n") - klog.Infof("ConfigMap annotation: %s\n", config.Annotation) + klog.Infof("Rules configMap annotation: %s\n", config.RulesAnnotation) + klog.Infof("AlertManager configMap annotation: %s\n", config.AlertManagerAnnotation) klog.Infof("Environment ID: %s\n", config.EnvID) klog.Infof("Logzio api url: %s\n", config.LogzioAPIURL) klog.Infof("Logzio rules data source: %s\n", config.RulesDS) @@ -97,7 +107,7 @@ func main() { // set up signals so we handle the first shutdown signal gracefully stopCh := signals.SetupSignalHandler() - cfg, err := controller.GetConfig() + cfg, err := common.GetConfig() if err != nil { klog.Fatalf("Error getting Kubernetes config: %s", err) } @@ -109,13 +119,13 @@ func main() { kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, time.Second*30) - c := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &config.Annotation, config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID) - if c == nil { + ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &config.RulesAnnotation, &config.AlertManagerAnnotation, config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID) + if ctl == nil { klog.Fatal("Error creating controller") } kubeInformerFactory.Start(stopCh) - if err = c.Run(config.WorkerCount, stopCh); err != nil { + if err = ctl.Run(config.WorkerCount, stopCh); err != nil { klog.Fatalf("Error running controller: %s", err) } } From 276c40afbbf8f142cd88207aaa1704dfa1492cbe Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:34:43 +0200 Subject: [PATCH 07/40] modify unit tests --- common/common.go | 3 +- common/common_test.go | 199 +++++++++++++ controller/controller_test.go | 269 +----------------- .../logzio_alerts_client_test.go | 81 ++++++ 4 files changed, 291 insertions(+), 261 deletions(-) create mode 100644 common/common_test.go create mode 100644 logzio_alerts_client/logzio_alerts_client_test.go diff --git a/common/common.go b/common/common.go index 1e8423d..efc1bca 100644 --- a/common/common.go +++ b/common/common.go @@ -75,8 +75,7 @@ func CreateNameStub(cm *corev1.ConfigMap) string { return fmt.Sprintf("%s-%s", namespace, name) } -// isAlertEqual compares two AlertRule objects for equality. -// You should expand this function to compare all relevant fields of AlertRule. +// IsAlertEqual isAlertEqual compares two AlertRule objects for equality. func IsAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlertRule) bool { // Start with name comparison; if these don't match, they're definitely not equal. if rule.Alert.Value != grafanaRule.Title { diff --git a/common/common_test.go b/common/common_test.go new file mode 100644 index 0000000..e5fc18f --- /dev/null +++ b/common/common_test.go @@ -0,0 +1,199 @@ +package common + +import ( + "github.com/logzio/logzio_terraform_client/grafana_alerts" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/rulefmt" + "gopkg.in/yaml.v3" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "strings" + "testing" + "time" +) + +func TestGenerateRandomString(t *testing.T) { + testCases := []struct { + name string + length int + }{ + {"length 10", 10}, + {"length 0", 0}, + {"negative length", -1}, + {"large length", 1000}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := GenerateRandomString(tc.length) + + if len(result) != tc.length && tc.length >= 0 { + t.Errorf("Expected string of length %d, got string of length %d", tc.length, len(result)) + } + + for _, char := range result { + if !strings.Contains(LetterBytes, string(char)) { + t.Errorf("generateRandomString() produced a string with invalid character: %v", char) + } + } + + if tc.length > 0 { + otherResult := GenerateRandomString(tc.length) + if result == otherResult { + t.Errorf("generateRandomString() does not seem to produce random strings") + } + } + }) + } +} + +func TestParseDuration(t *testing.T) { + tests := []struct { + input string + expected int64 + err bool + }{ + {"", 0, true}, + {"123", 123 * int64(time.Second), false}, + {"1h", int64(time.Hour), false}, + {"invalid", 0, true}, + } + + for _, test := range tests { + duration, err := ParseDuration(test.input) + if test.err && err == nil { + t.Errorf("Expected error for input %s", test.input) + } + if !test.err && duration != test.expected { + t.Errorf("Expected %d, got %d for input %s", test.expected, duration, test.input) + } + } +} + +func TestCreateNameStub(t *testing.T) { + cm := &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-name", + Namespace: "test-namespace", + }, + } + expected := "test-namespace-test-name" + stub := CreateNameStub(cm) + if stub != expected { + t.Errorf("Expected %s, got %s", expected, stub) + } +} + +func TestIsAlertEqual(t *testing.T) { + // dummy time duration + tenMinutes, _ := model.ParseDuration("10m") + tenMinutesNs := int64(10 * time.Minute) + fiveMinutes, _ := model.ParseDuration("5m") + + // dummy expression nodes + exprNode := yaml.Node{Value: "metric > 0.5"} + exprQuery := []*grafana_alerts.GrafanaAlertQuery{{Model: map[string]interface{}{"expr": "metric > 0.5"}}} + differentExprQuery := []*grafana_alerts.GrafanaAlertQuery{{Model: map[string]interface{}{"expr": "metric > 0.7"}}} + + testCases := []struct { + name string + rule rulefmt.RuleNode + grafanaRule grafana_alerts.GrafanaAlertRule + expected bool + }{ + { + name: "same rules", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "SameName"}, + Expr: exprNode, + For: tenMinutes, + Labels: map[string]string{"severity": "critical"}, + Annotations: map[string]string{"summary": "High CPU usage"}, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "SameName", + Data: exprQuery, + For: tenMinutesNs, + Labels: map[string]string{"severity": "critical"}, + Annotations: map[string]string{"summary": "High CPU usage"}, + }, + expected: true, + }, + { + name: "different titles", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "AlertName1"}, + Expr: exprNode, + For: tenMinutes, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "AlertName2", + Data: exprQuery, + For: tenMinutesNs, + }, + expected: false, + }, + { + name: "different labels", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "SameName"}, + Expr: exprNode, + Labels: map[string]string{"severity": "warning"}, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "SameName", + Labels: map[string]string{"severity": "critical"}, + Data: exprQuery, + }, + expected: false, + }, + { + name: "different annotations", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "SameName"}, + Expr: exprNode, + Annotations: map[string]string{"description": "CPU usage is high"}, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "SameName", + Annotations: map[string]string{"description": "Disk usage is high"}, + Data: exprQuery, + }, + expected: false, + }, + { + name: "different expressions", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "SameName"}, + Expr: exprNode, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "SameName", + Data: differentExprQuery, + }, + expected: false, + }, + { + name: "different durations", + rule: rulefmt.RuleNode{ + Alert: yaml.Node{Value: "SameName"}, + Expr: exprNode, + For: fiveMinutes, + }, + grafanaRule: grafana_alerts.GrafanaAlertRule{ + Title: "SameName", + Data: exprQuery, + For: tenMinutesNs, + }, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if got := IsAlertEqual(tc.rule, tc.grafanaRule); got != tc.expected { + t.Errorf("isAlertEqual() for test case %q = %v, want %v", tc.name, got, tc.expected) + } + }) + } +} diff --git a/controller/controller_test.go b/controller/controller_test.go index d1c9b0d..f88d413 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -1,27 +1,24 @@ package controller import ( - "os" - "reflect" - "strings" - "testing" - "time" - "github.com/logzio/logzio_terraform_client/grafana_alerts" - "github.com/prometheus/common/model" + "github.com/logzio/prometheus-alerts-migrator/common" "github.com/prometheus/prometheus/model/rulefmt" "gopkg.in/yaml.v3" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" - "k8s.io/klog" + "k8s.io/klog/v2" + "os" + "reflect" + "testing" ) const annotation = "test-annotation" func generateTestController() *Controller { - cfg, err := GetConfig() + cfg, err := common.GetConfig() if err != nil { klog.Fatalf("Error getting Kubernetes config: %s", err) } @@ -34,258 +31,12 @@ func generateTestController() *Controller { logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") rulesDS := os.Getenv("RULES_DS") kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, 0) - annotation := "test-annotation" - c := NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &annotation, logzioAPIToken, logzioUrl, rulesDS, "integration-test") + rulesAnnotation := "test-annotation" + alertManagerAnnotation := "am-test-annotation" + c := NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &rulesAnnotation, &alertManagerAnnotation, logzioAPIToken, logzioUrl, rulesDS, "integration-test") return c } -func TestGenerateRandomString(t *testing.T) { - testCases := []struct { - name string - length int - }{ - {"length 10", 10}, - {"length 0", 0}, - {"negative length", -1}, - {"large length", 1000}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result := generateRandomString(tc.length) - - if len(result) != tc.length && tc.length >= 0 { - t.Errorf("Expected string of length %d, got string of length %d", tc.length, len(result)) - } - - for _, char := range result { - if !strings.Contains(letterBytes, string(char)) { - t.Errorf("generateRandomString() produced a string with invalid character: %v", char) - } - } - - if tc.length > 0 { - otherResult := generateRandomString(tc.length) - if result == otherResult { - t.Errorf("generateRandomString() does not seem to produce random strings") - } - } - }) - } -} - -func TestParseDuration(t *testing.T) { - tests := []struct { - input string - expected int64 - err bool - }{ - {"", 0, true}, - {"123", 123 * int64(time.Second), false}, - {"1h", int64(time.Hour), false}, - {"invalid", 0, true}, - } - - for _, test := range tests { - duration, err := parseDuration(test.input) - if test.err && err == nil { - t.Errorf("Expected error for input %s", test.input) - } - if !test.err && duration != test.expected { - t.Errorf("Expected %d, got %d for input %s", test.expected, duration, test.input) - } - } -} - -func TestCreateNameStub(t *testing.T) { - cm := &v1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-name", - Namespace: "test-namespace", - }, - } - expected := "test-namespace-test-name" - stub := createNameStub(cm) - if stub != expected { - t.Errorf("Expected %s, got %s", expected, stub) - } -} - -func TestIsAlertEqual(t *testing.T) { - // dummy time duration - tenMinutes, _ := model.ParseDuration("10m") - tenMinutesNs := int64(10 * time.Minute) - fiveMinutes, _ := model.ParseDuration("5m") - - // dummy expression nodes - exprNode := yaml.Node{Value: "metric > 0.5"} - exprQuery := []*grafana_alerts.GrafanaAlertQuery{{Model: map[string]interface{}{"expr": "metric > 0.5"}}} - differentExprQuery := []*grafana_alerts.GrafanaAlertQuery{{Model: map[string]interface{}{"expr": "metric > 0.7"}}} - - testCases := []struct { - name string - rule rulefmt.RuleNode - grafanaRule grafana_alerts.GrafanaAlertRule - expected bool - }{ - { - name: "same rules", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "SameName"}, - Expr: exprNode, - For: tenMinutes, - Labels: map[string]string{"severity": "critical"}, - Annotations: map[string]string{"summary": "High CPU usage"}, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "SameName", - Data: exprQuery, - For: tenMinutesNs, - Labels: map[string]string{"severity": "critical"}, - Annotations: map[string]string{"summary": "High CPU usage"}, - }, - expected: true, - }, - { - name: "different titles", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "AlertName1"}, - Expr: exprNode, - For: tenMinutes, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "AlertName2", - Data: exprQuery, - For: tenMinutesNs, - }, - expected: false, - }, - { - name: "different labels", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "SameName"}, - Expr: exprNode, - Labels: map[string]string{"severity": "warning"}, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "SameName", - Labels: map[string]string{"severity": "critical"}, - Data: exprQuery, - }, - expected: false, - }, - { - name: "different annotations", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "SameName"}, - Expr: exprNode, - Annotations: map[string]string{"description": "CPU usage is high"}, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "SameName", - Annotations: map[string]string{"description": "Disk usage is high"}, - Data: exprQuery, - }, - expected: false, - }, - { - name: "different expressions", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "SameName"}, - Expr: exprNode, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "SameName", - Data: differentExprQuery, - }, - expected: false, - }, - { - name: "different durations", - rule: rulefmt.RuleNode{ - Alert: yaml.Node{Value: "SameName"}, - Expr: exprNode, - For: fiveMinutes, - }, - grafanaRule: grafana_alerts.GrafanaAlertRule{ - Title: "SameName", - Data: exprQuery, - For: tenMinutesNs, - }, - expected: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - if got := isAlertEqual(tc.rule, tc.grafanaRule); got != tc.expected { - t.Errorf("isAlertEqual() for test case %q = %v, want %v", tc.name, got, tc.expected) - } - }) - } -} - -func TestGenerateGrafanaAlert(t *testing.T) { - ctrl := generateTestController() - // Define common rule parts for reuse in test cases - baseRule := rulefmt.RuleNode{ - Alert: yaml.Node{Value: "TestAlert"}, - Expr: yaml.Node{Value: "up == 1"}, - For: model.Duration(5 * time.Minute), - Labels: map[string]string{"severity": "critical"}, - Annotations: map[string]string{"description": "Instance is down"}, - } - baseFolderUid := "folder123" - - // Test cases - testCases := []struct { - name string - rule rulefmt.RuleNode - folderUid string - wantErr bool - }{ - { - name: "valid conversion with annotations and labels", - rule: baseRule, // Already has annotations and labels - folderUid: baseFolderUid, - wantErr: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - alertRule, err := ctrl.generateGrafanaAlert(tc.rule, tc.folderUid) - - // Check for unexpected errors or lack thereof - if (err != nil) != tc.wantErr { - t.Errorf("generateGrafanaAlert() error = %v, wantErr %v", err, tc.wantErr) - return // Skip further checks if there's an unexpected error - } - if !tc.wantErr { - // Validate Title - if alertRule.Title != tc.rule.Alert.Value { - t.Errorf("generateGrafanaAlert() Title = %v, want %v", alertRule.Title, tc.rule.Alert.Value) - } - - // Validate FolderUID - if alertRule.FolderUID != tc.folderUid { - t.Errorf("generateGrafanaAlert() FolderUID = %v, want %v", alertRule.FolderUID, tc.folderUid) - } - - // Validate Labels - if !reflect.DeepEqual(alertRule.Labels, tc.rule.Labels) { - t.Errorf("generateGrafanaAlert() Labels = %v, want %v", alertRule.Labels, tc.rule.Labels) - } - - // Validate Annotations - if !reflect.DeepEqual(alertRule.Annotations, tc.rule.Annotations) { - t.Errorf("generateGrafanaAlert() Annotations = %v, want %v", alertRule.Annotations, tc.rule.Annotations) - } - } - }) - } -} - func TestExtractValues(t *testing.T) { c := generateTestController() // Define test cases @@ -411,7 +162,7 @@ func TestHaveConfigMapsChanged(t *testing.T) { }, }, } - c.resourceVersionMap[createNameStub(&knownConfigMap)] = "12345" + c.resourceVersionMap[common.CreateNameStub(&knownConfigMap)] = "12345" testCases := []struct { name string diff --git a/logzio_alerts_client/logzio_alerts_client_test.go b/logzio_alerts_client/logzio_alerts_client_test.go new file mode 100644 index 0000000..766d147 --- /dev/null +++ b/logzio_alerts_client/logzio_alerts_client_test.go @@ -0,0 +1,81 @@ +package logzio_alerts_client + +import ( + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/rulefmt" + "gopkg.in/yaml.v3" + "os" + "reflect" + "testing" + "time" +) + +func generateTestLogzioGrafanaAlertsClient() *LogzioGrafanaAlertsClient { + logzioUrl := os.Getenv("LOGZIO_API_URL") + logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") + rulesDS := os.Getenv("RULES_DS") + logzioGrafanaAlertsClient := NewLogzioGrafanaAlertsClient(logzioUrl, logzioAPIToken, rulesDS, "integration-test") + return logzioGrafanaAlertsClient + +} + +func TestGenerateGrafanaAlert(t *testing.T) { + cl := generateTestLogzioGrafanaAlertsClient() + // Define common rule parts for reuse in test cases + baseRule := rulefmt.RuleNode{ + Alert: yaml.Node{Value: "TestAlert"}, + Expr: yaml.Node{Value: "up == 1"}, + For: model.Duration(5 * time.Minute), + Labels: map[string]string{"severity": "critical"}, + Annotations: map[string]string{"description": "Instance is down"}, + } + baseFolderUid := "folder123" + + // Test cases + testCases := []struct { + name string + rule rulefmt.RuleNode + folderUid string + wantErr bool + }{ + { + name: "valid conversion with annotations and labels", + rule: baseRule, // Already has annotations and labels + folderUid: baseFolderUid, + wantErr: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + alertRule, err := cl.generateGrafanaAlert(tc.rule, tc.folderUid) + + // Check for unexpected errors or lack thereof + if (err != nil) != tc.wantErr { + t.Errorf("generateGrafanaAlert() error = %v, wantErr %v", err, tc.wantErr) + return // Skip further checks if there's an unexpected error + } + if !tc.wantErr { + // Validate Title + if alertRule.Title != tc.rule.Alert.Value { + t.Errorf("generateGrafanaAlert() Title = %v, want %v", alertRule.Title, tc.rule.Alert.Value) + } + + // Validate FolderUID + if alertRule.FolderUID != tc.folderUid { + t.Errorf("generateGrafanaAlert() FolderUID = %v, want %v", alertRule.FolderUID, tc.folderUid) + } + + // Validate Labels + if !reflect.DeepEqual(alertRule.Labels, tc.rule.Labels) { + t.Errorf("generateGrafanaAlert() Labels = %v, want %v", alertRule.Labels, tc.rule.Labels) + } + + // Validate Annotations + if !reflect.DeepEqual(alertRule.Annotations, tc.rule.Annotations) { + t.Errorf("generateGrafanaAlert() Annotations = %v, want %v", alertRule.Annotations, tc.rule.Annotations) + } + } + }) + } +} From 83c9f57e05ca6dbc305195eac485d0e20d14fda1 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:38:19 +0200 Subject: [PATCH 08/40] modify e2e test --- controller/controller_e2e_test.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/controller/controller_e2e_test.go b/controller/controller_e2e_test.go index ede69d5..6100fb7 100644 --- a/controller/controller_e2e_test.go +++ b/controller/controller_e2e_test.go @@ -3,6 +3,7 @@ package controller import ( "context" "fmt" + "github.com/logzio/prometheus-alerts-migrator/common" "github.com/logzio/prometheus-alerts-migrator/pkg/signals" "github.com/stretchr/testify/assert" "io/ioutil" @@ -63,22 +64,22 @@ func cleanupTestCluster(clientset *kubernetes.Clientset, namespace string, confi } func cleanupLogzioAlerts(ctl Controller) { - folderUid, err := ctl.findOrCreatePrometheusAlertsFolder() + folderUid, err := ctl.logzioGrafanaAlertsClient.FindOrCreatePrometheusAlertsFolder() if err != nil { log.Fatalf("Failed to get logzio alerts folder uid: %v", err) } - logzioAlerts, err := ctl.getLogzioGrafanaAlerts(folderUid) + logzioAlerts, err := ctl.logzioGrafanaAlertsClient.GetLogzioGrafanaAlerts(folderUid) if err != nil { log.Fatalf("Failed to get logzio alerts: %v", err) } // defer cleanup - ctl.deleteRules(logzioAlerts, folderUid) + ctl.logzioGrafanaAlertsClient.DeleteRules(logzioAlerts, folderUid) } // TestControllerE2E is the main function that runs the end-to-end test func TestControllerE2E(t *testing.T) { // Setup the test environment - config, err := GetConfig() + config, err := common.GetConfig() if err != nil { t.Fatalf("Failed to get Kubernetes config: %v", err) } @@ -90,12 +91,13 @@ func TestControllerE2E(t *testing.T) { logzioUrl := os.Getenv("LOGZIO_API_URL") logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") rulesDS := os.Getenv("RULES_DS") - anno := os.Getenv("CONFIGMAP_ANNOTATION") + rulesAnnotation := os.Getenv("RULES_CONFIGMAP_ANNOTATION") + alertManagerAnnotation := os.Getenv("ALERTMANAGER_CONFIGMAP_ANNOTATION") kubeInformerFactory := informers.NewSharedInformerFactory(clientset, time.Second*30) // set up signals so we handle the first shutdown signal gracefully stopCh := signals.SetupSignalHandler() // Instantiate the controller - ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), &anno, logzioAPIToken, logzioUrl, rulesDS, "integration-test") + ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), &rulesAnnotation, &alertManagerAnnotation, logzioAPIToken, logzioUrl, rulesDS, "integration-test") // defer cleanup defer cleanupLogzioAlerts(*ctrl) @@ -115,11 +117,11 @@ func TestControllerE2E(t *testing.T) { }() t.Log("going to sleep") time.Sleep(time.Second * 10) - folderUid, err := ctrl.findOrCreatePrometheusAlertsFolder() + folderUid, err := ctrl.logzioGrafanaAlertsClient.FindOrCreatePrometheusAlertsFolder() if err != nil { t.Fatalf("Failed to get logzio alerts folder uid: %v", err) } - logzioAlerts, err := ctrl.getLogzioGrafanaAlerts(folderUid) + logzioAlerts, err := ctrl.logzioGrafanaAlertsClient.GetLogzioGrafanaAlerts(folderUid) if err != nil { t.Fatalf("Failed to get logzio alerts: %v", err) } From a8e19b43a6b8333558db3fbe4e9788c31e17196c Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:47:52 +0200 Subject: [PATCH 09/40] docs --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c66ade9..610b961 100644 --- a/README.md +++ b/README.md @@ -12,14 +12,15 @@ Before running this software, ensure you have: Configure the application using the following environment variables: -| Environment Variable | Description | Default Value | -|------------------------|------------------------------------------------------------------------------------|----------------------------| -| `LOGZIO_API_TOKEN` | The API token for your Logz.io account. | `None` | -| `LOGZIO_API_URL` | The URL endpoint for the Logz.io API. | `https://api.logz.io` | -| `CONFIGMAP_ANNOTATION` | The specific annotation the controller should look for in Prometheus alert rules. | `prometheus.io/kube-rules` | -| `RULES_DS` | The metrics data source name in logz.io for the Prometheus rules. | `None` | -| `ENV_ID` | Environment identifier, usually cluster name. | `my-env` | -| `WORKER_COUNT` | The number of workers to process the alerts. | `2` | +| Environment Variable | Description | Default Value | +|-------------------------------------|---------------------------------------------------------------------------------------------------|-----------------------------------| +| `LOGZIO_API_TOKEN` | The API token for your Logz.io account. | `None` | +| `LOGZIO_API_URL` | The URL endpoint for the Logz.io API. | `https://api.logz.io` | +| `RULES_CONFIGMAP_ANNOTATION` | The specific annotation the controller should look for in Prometheus alert rules. | `prometheus.io/kube-rules` | +| `ALERTMANAGER_CONFIGMAP_ANNOTATION` | The specific annotation the controller should look for in Prometheus alert manager configuration. | `prometheus.io/kube-alertmanager` | +| `RULES_DS` | The metrics data source name in logz.io for the Prometheus rules. | `None` | +| `ENV_ID` | Environment identifier, usually cluster name. | `my-env` | +| `WORKER_COUNT` | The number of workers to process the alerts. | `2` | Please ensure to set all necessary environment variables before running the application. From 6cc63dd011223040c0c021f9f10fcaa9a4ed351b Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 13:48:05 +0200 Subject: [PATCH 10/40] add annotation to workflow --- .github/workflows/test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fb3a02c..f6761f1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -12,7 +12,8 @@ jobs: LOGZIO_API_URL: https://api.logz.io LOGZIO_API_TOKEN: ${{ secrets.LOGZIO_API_TOKEN }} RULES_DS: ${{ secrets.RULES_DS }} - CONFIGMAP_ANNOTATION: prometheus.io/kube-rules + RULES_CONFIGMAP_ANNOTATION: prometheus.io/kube-rules + ALERTMANAGER_CONFIGMAP_ANNOTATION: prometheus.io/kube-alertmanager steps: - name: Set up Go From c43e192af355384969dee7848bc1d355631f0b79 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 17:01:51 +0200 Subject: [PATCH 11/40] Add `GetLogzioGrafanaContactPoints`+`GetLogzioGrafanaNotificationPolicies` --- logzio_alerts_client/logzio_alerts_client.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index f353c1c..604472f 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -228,6 +228,23 @@ func (l *LogzioGrafanaAlertsClient) generateGrafanaAlert(rule rulefmt.RuleNode, return grafanaAlert, nil } +func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaContactPoints() ([]grafana_contact_points.GrafanaContactPoint, error) { + contactPoints, err := l.logzioContactPointClient.GetAllGrafanaContactPoints() + if err != nil { + return nil, err + } + return contactPoints, nil +} + +func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaNotificationPolicies() (grafana_notification_policies.GrafanaNotificationPolicyTree, error) { + notificationPolicies, err := l.logzioNotificationPolicyClient.GetGrafanaNotificationPolicyTree() + if err != nil { + return grafana_notification_policies.GrafanaNotificationPolicyTree{}, err + } + return notificationPolicies, nil + +} + // GetLogzioGrafanaAlerts builds a list of rules from all logz.io func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaAlerts(folderUid string) ([]grafana_alerts.GrafanaAlertRule, error) { alertRules, ListLogzioRulesErr := l.logzioAlertClient.ListGrafanaAlertRules() From f67245c54acad30e992382d68dcb7abe39256603 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 27 Nov 2023 17:02:32 +0200 Subject: [PATCH 12/40] Add `processAlertManagerConfigMaps()` --- controller/controller.go | 91 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 86db929..bc410e1 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -3,11 +3,11 @@ package controller import ( "context" "fmt" + "github.com/logzio/logzio_terraform_client/grafana_alerts" + "github.com/logzio/logzio_terraform_client/grafana_contact_points" "github.com/logzio/prometheus-alerts-migrator/common" "github.com/logzio/prometheus-alerts-migrator/logzio_alerts_client" - "time" - - "github.com/logzio/logzio_terraform_client/grafana_alerts" + alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" _ "github.com/prometheus/prometheus/promql/parser" "gopkg.in/yaml.v3" @@ -19,6 +19,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -239,13 +240,93 @@ func (c *Controller) syncHandler(key string) error { } if c.haveConfigMapsChanged(cmList) { - // handle alert manager config process + return c.processAlertManagerConfigMaps(cmList) } } return nil } +func (c *Controller) processAlertManagerConfigMaps(cmList *corev1.ConfigMapList) error { + // get contact points and notification policies from logz.io for comparison + logzioContactPoints, err := c.logzioGrafanaAlertsClient.GetLogzioGrafanaContactPoints() + if err != nil { + utilruntime.HandleError(err) + return err + } + logzioNotificationPolicies, err := c.logzioGrafanaAlertsClient.GetLogzioGrafanaNotificationPolicies() + if err != nil { + utilruntime.HandleError(err) + return err + } + receivers, routes := c.getClusterReceiversAndRoutes(cmList) + klog.Info(routes, logzioNotificationPolicies) + // Creating maps for efficient lookups + contactPointsMap := make(map[string]grafana_contact_points.GrafanaContactPoint) + for _, contactPoint := range logzioContactPoints { + contactPointsMap[contactPoint.Name] = contactPoint + } + receiversMap := make(map[string]alert_manager_config.Receiver) + for _, receiver := range receivers { + receiversMap[receiver.Name] = receiver + } + + return nil +} + +func (c *Controller) processContactPoints(contactPointsMap map[string]grafana_contact_points.GrafanaContactPoint, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) { + toAdd, toUpdate, toDelete := c.compareContactPoints(contactPointsMap, logzioContactPoints) + klog.Infof("Contact points summary: to add: %d, to update: %d, to delete: %d", len(toAdd), len(toUpdate), len(toDelete)) + if len(toAdd) > 0 { + // TODO handle + } + if len(toUpdate) > 0 { + // TODO handle + } + if len(toDelete) > 0 { + // TODO handle + } +} + +func (c *Controller) compareContactPoints(contactPointsMap map[string]grafana_contact_points.GrafanaContactPoint, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) (toAdd, toUpdate, toDelete []grafana_contact_points.GrafanaContactPoint) { + for _, contactPoint := range logzioContactPoints { + if _, ok := contactPointsMap[contactPoint.Name]; !ok { + toAdd = append(toAdd, contactPoint) + } else { + // TODO deep comparison + if contactPointsMap[contactPoint.Name].Name != contactPoint.Name { + toUpdate = append(toUpdate, contactPoint) + } + } + } + for _, contactPoint := range contactPointsMap { + if _, ok := logzioContactPoints[contactPoint.Name]; !ok { + toDelete = append(toDelete, contactPoint) + } + } + return toAdd, toUpdate, toDelete +} + +func (c *Controller) getClusterReceiversAndRoutes(cmList *corev1.ConfigMapList) ([]alert_manager_config.Receiver, []*alert_manager_config.Route) { + var routes []*alert_manager_config.Route + var receivers []alert_manager_config.Receiver + for _, cm := range cmList.Items { + if c.isAlertManagerConfigMap(&cm) { + for _, value := range cm.Data { + alertManagerConfig, err := alert_manager_config.Load(value) + if err != nil { + // TODO add descriptive error + klog.Error() + return nil, nil + } + routes = append(routes, alertManagerConfig.Route.Routes...) + receivers = append(receivers, alertManagerConfig.Receivers...) + } + } + } + return receivers, routes +} + // getConfigMap returns the ConfigMap with the specified name in the specified namespace, or nil if no such ConfigMap exists. func (c *Controller) getConfigMap(namespace, name string) (*corev1.ConfigMap, error) { configmap, err := c.configmapsLister.ConfigMaps(namespace).Get(name) @@ -256,7 +337,7 @@ func (c *Controller) getConfigMap(namespace, name string) (*corev1.ConfigMap, er return configmap, err } -// processConfigMapsChanges gets the state of alert rules from both cluster configmaps and logz.io, compares the rules and decide what crud operations to perform +// processRulesConfigMaps gets the state of alert rules from both cluster configmaps and logz.io, compares the rules and decide what crud operations to perform func (c *Controller) processRulesConfigMaps(mapList *corev1.ConfigMapList) error { alertRules := c.getClusterAlertRules(mapList) folderUid, err := c.logzioGrafanaAlertsClient.FindOrCreatePrometheusAlertsFolder() From c85388effe5c81a375a4a9a16992a325087bf1a0 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 29 Nov 2023 12:33:38 +0200 Subject: [PATCH 13/40] add `processContactPoints()` --- common/common.go | 13 ++++++++++++- controller/controller.go | 34 +++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/common/common.go b/common/common.go index efc1bca..cbed0a2 100644 --- a/common/common.go +++ b/common/common.go @@ -3,6 +3,8 @@ package common import ( "fmt" "github.com/logzio/logzio_terraform_client/grafana_alerts" + "github.com/logzio/logzio_terraform_client/grafana_contact_points" + alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/rest" @@ -75,7 +77,7 @@ func CreateNameStub(cm *corev1.ConfigMap) string { return fmt.Sprintf("%s-%s", namespace, name) } -// IsAlertEqual isAlertEqual compares two AlertRule objects for equality. +// IsAlertEqual compares two AlertRule objects for equality. func IsAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlertRule) bool { // Start with name comparison; if these don't match, they're definitely not equal. if rule.Alert.Value != grafanaRule.Title { @@ -97,6 +99,15 @@ func IsAlertEqual(rule rulefmt.RuleNode, grafanaRule grafana_alerts.GrafanaAlert return true } +// IsContactPointEqual compares two ContactPoint objects for equality. +func IsContactPointEqual(cp1 alert_manager_config.Receiver, cp2 grafana_contact_points.GrafanaContactPoint) bool { + if cp1.Name != cp2.Name { + return false + } + // TODO deep comparison + return true +} + // GetConfig returns a Kubernetes config func GetConfig() (*rest.Config, error) { var config *rest.Config diff --git a/controller/controller.go b/controller/controller.go index bc410e1..4c9ed01 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -260,7 +260,10 @@ func (c *Controller) processAlertManagerConfigMaps(cmList *corev1.ConfigMapList) return err } receivers, routes := c.getClusterReceiversAndRoutes(cmList) + + // TODO remove redundant log klog.Info(routes, logzioNotificationPolicies) + // Creating maps for efficient lookups contactPointsMap := make(map[string]grafana_contact_points.GrafanaContactPoint) for _, contactPoint := range logzioContactPoints { @@ -270,12 +273,13 @@ func (c *Controller) processAlertManagerConfigMaps(cmList *corev1.ConfigMapList) for _, receiver := range receivers { receiversMap[receiver.Name] = receiver } + c.processContactPoints(receiversMap, contactPointsMap) return nil } -func (c *Controller) processContactPoints(contactPointsMap map[string]grafana_contact_points.GrafanaContactPoint, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) { - toAdd, toUpdate, toDelete := c.compareContactPoints(contactPointsMap, logzioContactPoints) +func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) { + toAdd, toUpdate, toDelete := c.compareContactPoints(receiversMap, logzioContactPoints) klog.Infof("Contact points summary: to add: %d, to update: %d, to delete: %d", len(toAdd), len(toUpdate), len(toDelete)) if len(toAdd) > 0 { // TODO handle @@ -288,19 +292,23 @@ func (c *Controller) processContactPoints(contactPointsMap map[string]grafana_co } } -func (c *Controller) compareContactPoints(contactPointsMap map[string]grafana_contact_points.GrafanaContactPoint, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) (toAdd, toUpdate, toDelete []grafana_contact_points.GrafanaContactPoint) { - for _, contactPoint := range logzioContactPoints { - if _, ok := contactPointsMap[contactPoint.Name]; !ok { - toAdd = append(toAdd, contactPoint) - } else { - // TODO deep comparison - if contactPointsMap[contactPoint.Name].Name != contactPoint.Name { - toUpdate = append(toUpdate, contactPoint) - } +// compareContactPoints +func (c *Controller) compareContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) (toAdd, toUpdate []alert_manager_config.Receiver, toDelete []grafana_contact_points.GrafanaContactPoint) { + // Determine rules to add or update. + for receiverName, receiver := range receiversMap { + contactPoint, exists := logzioContactPoints[receiverName] + if !exists { + // Contact point doesn't exist in Logz.io, needs to be added. + toAdd = append(toAdd, receiver) + } else if !common.IsContactPointEqual(receiver, contactPoint) { + // Contact point exists but differs, needs to be updated. + toUpdate = append(toUpdate, receiver) } } - for _, contactPoint := range contactPointsMap { - if _, ok := logzioContactPoints[contactPoint.Name]; !ok { + // Determine contact points from logzio to delete. + for contactPointName, contactPoint := range logzioContactPoints { + if _, exists := receiversMap[contactPointName]; !exists { + // Contact point exists in Logz.io but not in alert manager, needs to be deleted. toDelete = append(toDelete, contactPoint) } } From e545032bdd883e2923204e4319dc2aaf8c7bdb9a Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Sun, 10 Dec 2023 17:25:27 +0700 Subject: [PATCH 14/40] handle contact points --- common/common.go | 16 +- controller/controller.go | 200 ++++++++++--------- logzio_alerts_client/logzio_alerts_client.go | 102 ++++++++++ 3 files changed, 217 insertions(+), 101 deletions(-) diff --git a/common/common.go b/common/common.go index cbed0a2..e274bb5 100644 --- a/common/common.go +++ b/common/common.go @@ -3,8 +3,6 @@ package common import ( "fmt" "github.com/logzio/logzio_terraform_client/grafana_alerts" - "github.com/logzio/logzio_terraform_client/grafana_contact_points" - alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/rest" @@ -22,7 +20,10 @@ const ( LetterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" letterIdxBits = 6 // 6 bits to represent a letter index letterIdxMask = 1< 0 { - // TODO handle +func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) (receivers []alert_manager_config.Receiver, routes []*alert_manager_config.Route) { + if c.isAlertManagerConfigMap(configmap) { + for _, value := range configmap.Data { + alertManagerConfig, err := alert_manager_config.Load(value) + if err != nil { + utilruntime.HandleError(fmt.Errorf("unable to load alert manager config; %s", err)) + return nil, nil + } + // Add prefix to distinguish between alert manager imported from alert manager and logz.io custom contact points + stub := common.CreateNameStub(configmap) + for _, receiver := range alertManagerConfig.Receivers { + receiver.Name = fmt.Sprintf("%s-%s-%s", c.envId, stub, receiver.Name) + receivers = append(receivers, receiver) + + } + for _, route := range alertManagerConfig.Route.Routes { + route.Receiver = fmt.Sprintf("%s-%s-%s", c.envId, stub, route.Receiver) + routes = append(routes, route) + } + // setting the `AlertManagerGlobalConfig` context for logzio grafana alerts client + c.logzioGrafanaAlertsClient.AlertManagerGlobalConfig = alertManagerConfig.Global + } + } + return receivers, routes +} + +func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { + contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete := c.compareContactPoints(receiversMap, logzioContactPoints) + klog.Infof("Contact points summary: to add: %d, to update: %d, to delete: %d", len(contactPointsToAdd), len(contactPointsToUpdate), len(contactPointsToDelete)) + if len(contactPointsToAdd) > 0 { + c.logzioGrafanaAlertsClient.WriteContactPoints(contactPointsToAdd) } - if len(toUpdate) > 0 { - // TODO handle + if len(contactPointsToUpdate) > 0 { + c.logzioGrafanaAlertsClient.UpdateContactPoints(contactPointsToUpdate, logzioContactPoints) } - if len(toDelete) > 0 { - // TODO handle + if len(contactPointsToDelete) > 0 { + c.logzioGrafanaAlertsClient.DeleteContactPoints(contactPointsToDelete) } } // compareContactPoints -func (c *Controller) compareContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints map[string]grafana_contact_points.GrafanaContactPoint) (toAdd, toUpdate []alert_manager_config.Receiver, toDelete []grafana_contact_points.GrafanaContactPoint) { - // Determine rules to add or update. +func (c *Controller) compareContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) (contactPointsToAdd, contactPointsToUpdate []alert_manager_config.Receiver, contactPointsToDelete []grafana_contact_points.GrafanaContactPoint) { + // Initialize a map with slices as values for Logz.io contact points + existingContactPoints := make(map[string][]grafana_contact_points.GrafanaContactPoint) + for _, contactPoint := range logzioContactPoints { + existingContactPoints[contactPoint.Name] = append(existingContactPoints[contactPoint.Name], contactPoint) + } + // Iterate over receivers to find which ones to add or update for receiverName, receiver := range receiversMap { - contactPoint, exists := logzioContactPoints[receiverName] + _, exists := existingContactPoints[receiverName] if !exists { - // Contact point doesn't exist in Logz.io, needs to be added. - toAdd = append(toAdd, receiver) - } else if !common.IsContactPointEqual(receiver, contactPoint) { - // Contact point exists but differs, needs to be updated. - toUpdate = append(toUpdate, receiver) - } - } - // Determine contact points from logzio to delete. - for contactPointName, contactPoint := range logzioContactPoints { - if _, exists := receiversMap[contactPointName]; !exists { - // Contact point exists in Logz.io but not in alert manager, needs to be deleted. - toDelete = append(toDelete, contactPoint) + // If the receiver does not exist in Logz.io contact points, add it + contactPointsToAdd = append(contactPointsToAdd, receiver) + } else { + // If the receiver exists in Logz.io contact points, override with the alert manager receiver state + contactPointsToUpdate = append(contactPointsToUpdate, receiver) } } - return toAdd, toUpdate, toDelete -} + // Iterate over Logz.io contact points to find which ones to delete + for _, contactPoints := range existingContactPoints { + for _, contactPoint := range contactPoints { + if _, exists := receiversMap[contactPoint.Name]; !exists { + // If the Logz.io contact point does not exist among the receivers, delete it -func (c *Controller) getClusterReceiversAndRoutes(cmList *corev1.ConfigMapList) ([]alert_manager_config.Receiver, []*alert_manager_config.Route) { - var routes []*alert_manager_config.Route - var receivers []alert_manager_config.Receiver - for _, cm := range cmList.Items { - if c.isAlertManagerConfigMap(&cm) { - for _, value := range cm.Data { - alertManagerConfig, err := alert_manager_config.Load(value) - if err != nil { - // TODO add descriptive error - klog.Error() - return nil, nil - } - routes = append(routes, alertManagerConfig.Route.Routes...) - receivers = append(receivers, alertManagerConfig.Receivers...) + contactPointsToDelete = append(contactPointsToDelete, contactPoint) } } } - return receivers, routes -} -// getConfigMap returns the ConfigMap with the specified name in the specified namespace, or nil if no such ConfigMap exists. -func (c *Controller) getConfigMap(namespace, name string) (*corev1.ConfigMap, error) { - configmap, err := c.configmapsLister.ConfigMaps(namespace).Get(name) - if errors.IsNotFound(err) { - utilruntime.HandleError(fmt.Errorf("configmap '%s' in work queue no longer exists", name)) - return nil, nil - } - return configmap, err + return contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete } // processRulesConfigMaps gets the state of alert rules from both cluster configmaps and logz.io, compares the rules and decide what crud operations to perform @@ -369,31 +399,23 @@ func (c *Controller) processRulesConfigMaps(mapList *corev1.ConfigMapList) error for _, alert := range logzioAlertRules { logzioRulesMap[alert.Title] = alert } - toAdd, toUpdate, toDelete := c.compareAlertRules(rulesMap, logzioRulesMap) - klog.Infof("Alert rules summary: to add: %d, to update: %d, to delete: %d", len(toAdd), len(toUpdate), len(toDelete)) - - if len(toAdd) > 0 { - c.logzioGrafanaAlertsClient.WriteRules(toAdd, folderUid) - } - if len(toUpdate) > 0 { - c.logzioGrafanaAlertsClient.UpdateRules(toUpdate, logzioRulesMap, folderUid) - } - if len(toDelete) > 0 { - c.logzioGrafanaAlertsClient.DeleteRules(toDelete, folderUid) - } - + c.processAlertRules(rulesMap, logzioRulesMap, folderUid) return nil } -// enqueueConfigMap get the cm on the workqueue -func (c *Controller) enqueueConfigMap(obj interface{}) { - var key string - var err error - if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { - utilruntime.HandleError(err) - return +func (c *Controller) processAlertRules(rulesMap map[string]rulefmt.RuleNode, logzioRulesMap map[string]grafana_alerts.GrafanaAlertRule, folderUid string) { + rulesToAdd, rulesToUpdate, rulesToDelete := c.compareAlertRules(rulesMap, logzioRulesMap) + klog.Infof("Alert rules summary: to add: %d, to update: %d, to delete: %d", len(rulesToAdd), len(rulesToUpdate), len(rulesToDelete)) + + if len(rulesToAdd) > 0 { + c.logzioGrafanaAlertsClient.WriteRules(rulesToAdd, folderUid) + } + if len(rulesToUpdate) > 0 { + c.logzioGrafanaAlertsClient.UpdateRules(rulesToUpdate, logzioRulesMap, folderUid) + } + if len(rulesToDelete) > 0 { + c.logzioGrafanaAlertsClient.DeleteRules(rulesToDelete, folderUid) } - c.workqueue.Add(key) } // getClusterAlertRules builds a list of rules from all the configmaps in the cluster @@ -471,16 +493,16 @@ func (c *Controller) extractRules(value string) (error, rulefmt.RuleNode) { // compareAlertRules compares the rules from Kubernetes with those in Logz.io. // It returns three slices of rulefmt.RuleNode and grafana_alerts.GrafanaAlertRule indicating which rules to add, update, or delete. -func (c *Controller) compareAlertRules(k8sRulesMap map[string]rulefmt.RuleNode, logzioRulesMap map[string]grafana_alerts.GrafanaAlertRule) (toAdd, toUpdate []rulefmt.RuleNode, toDelete []grafana_alerts.GrafanaAlertRule) { +func (c *Controller) compareAlertRules(k8sRulesMap map[string]rulefmt.RuleNode, logzioRulesMap map[string]grafana_alerts.GrafanaAlertRule) (rulesToAdd, rulesToUpdate []rulefmt.RuleNode, rulesToDelete []grafana_alerts.GrafanaAlertRule) { // Determine rules to add or update. for alertName, k8sRule := range k8sRulesMap { logzioRule, exists := logzioRulesMap[alertName] if !exists { // Alert doesn't exist in Logz.io, needs to be added. - toAdd = append(toAdd, k8sRule) + rulesToAdd = append(rulesToAdd, k8sRule) } else if !common.IsAlertEqual(k8sRule, logzioRule) { // Alert exists but differs, needs to be updated. - toUpdate = append(toUpdate, k8sRule) + rulesToUpdate = append(rulesToUpdate, k8sRule) } } @@ -488,11 +510,11 @@ func (c *Controller) compareAlertRules(k8sRulesMap map[string]rulefmt.RuleNode, for alertName := range logzioRulesMap { if _, exists := k8sRulesMap[alertName]; !exists { // Alert is in Logz.io but not in Kubernetes, needs to be deleted. - toDelete = append(toDelete, logzioRulesMap[alertName]) + rulesToDelete = append(rulesToDelete, logzioRulesMap[alertName]) } } - return toAdd, toUpdate, toDelete + return rulesToAdd, rulesToUpdate, rulesToDelete } // isRuleConfigMap checks if the configmap is a rule configmap diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index 604472f..24ce85e 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -9,6 +9,7 @@ import ( "github.com/logzio/logzio_terraform_client/grafana_folders" "github.com/logzio/logzio_terraform_client/grafana_notification_policies" "github.com/logzio/prometheus-alerts-migrator/common" + alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" "k8s.io/klog/v2" ) @@ -58,6 +59,7 @@ func (p PrometheusQueryModel) ToJSON() (json.RawMessage, error) { } type LogzioGrafanaAlertsClient struct { + AlertManagerGlobalConfig *alert_manager_config.GlobalConfig logzioAlertClient *grafana_alerts.GrafanaAlertClient logzioFolderClient *grafana_folders.GrafanaFolderClient logzioDataSourceClient *grafana_datasources.GrafanaDatasourceClient @@ -114,6 +116,106 @@ func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, ru } } +// WriteContactPoints writes the contact points to logz.io +func (l *LogzioGrafanaAlertsClient) WriteContactPoints(contactPointsToWrite []alert_manager_config.Receiver) { + for _, contactPoint := range contactPointsToWrite { + contactPointsList := l.generateGrafanaContactPoint(contactPoint) + for _, cp := range contactPointsList { + _, err := l.logzioContactPointClient.CreateGrafanaContactPoint(cp) + if err != nil { + klog.Warningf("Failed to create contact point: %v", err) + } + } + } +} + +// DeleteContactPoints deletes the contact points from logz.io +func (l *LogzioGrafanaAlertsClient) DeleteContactPoints(contactPointsToDelete []grafana_contact_points.GrafanaContactPoint) { + for _, contactPoint := range contactPointsToDelete { + err := l.logzioContactPointClient.DeleteGrafanaContactPoint(contactPoint.Uid) + if err != nil { + klog.Warningf("Failed to delete contact point: %v", err) + } + } +} + +// UpdateContactPoints updates the contact points in logz.io +func (l *LogzioGrafanaAlertsClient) UpdateContactPoints(contactPointsToUpdate []alert_manager_config.Receiver, contactPointsMap []grafana_contact_points.GrafanaContactPoint) { + for _, contactPoint := range contactPointsToUpdate { + contactPointsList := l.generateGrafanaContactPoint(contactPoint) + for _, cp := range contactPointsList { + for _, logzioContactPoint := range contactPointsMap { + if logzioContactPoint.Name == cp.Name { + cp.Uid = logzioContactPoint.Uid + err := l.logzioContactPointClient.UpdateContactPoint(cp) + if err != nil { + klog.Warningf("Failed to update contact point: %v", err) + } + } + } + } + } +} + +// generateGrafanaContactPoint generates a GrafanaContactPoint from a alert_manager_config.Receiver +func (l *LogzioGrafanaAlertsClient) generateGrafanaContactPoint(receiver alert_manager_config.Receiver) (contactPointsList []grafana_contact_points.GrafanaContactPoint) { + // check for email type configs + for _, emailConfig := range receiver.EmailConfigs { + contactPoint := grafana_contact_points.GrafanaContactPoint{ + Name: receiver.Name, + Type: common.TypeEmail, + Uid: common.GenerateRandomString(9), + DisableResolveMessage: false, + Settings: map[string]interface{}{ + "addresses": emailConfig.To, + "message": emailConfig.HTML, + "singleEmail": true, + }, + } + contactPointsList = append(contactPointsList, contactPoint) + } + // check for slack type configs + for _, slackConfig := range receiver.SlackConfigs { + var url string + if slackConfig.APIURL.String() != "" { + url = slackConfig.APIURL.String() + } else { + url = l.AlertManagerGlobalConfig.SlackAPIURL.String() + } + contactPoint := grafana_contact_points.GrafanaContactPoint{ + Name: receiver.Name, + Type: common.TypeSlack, + Uid: common.GenerateRandomString(9), + DisableResolveMessage: false, + Settings: map[string]interface{}{ + "url": url, + "recipient": slackConfig.Channel, + "text": slackConfig.Text, + "title": slackConfig.Title, + "username": slackConfig.Username, + }, + } + contactPointsList = append(contactPointsList, contactPoint) + } + // check for pagerduty type configs + for _, pagerdutyConfig := range receiver.PagerdutyConfigs { + contactPoint := grafana_contact_points.GrafanaContactPoint{ + Name: receiver.Name, + Type: common.TypePagerDuty, + Uid: common.GenerateRandomString(9), + DisableResolveMessage: false, + Settings: map[string]interface{}{ + "integrationKey": pagerdutyConfig.ServiceKey, + "description": pagerdutyConfig.Description, + "client": pagerdutyConfig.Client, + "clientUrl": pagerdutyConfig.ClientURL, + }, + } + contactPointsList = append(contactPointsList, contactPoint) + } + return contactPointsList +} + // DeleteRules deletes the rules from logz.io func (l *LogzioGrafanaAlertsClient) DeleteRules(rulesToDelete []grafana_alerts.GrafanaAlertRule, folderUid string) { for _, rule := range rulesToDelete { From 9c1656a87151bda3ca6cf8a082ebca08177406f2 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Sun, 10 Dec 2023 17:31:49 +0700 Subject: [PATCH 15/40] change `GetLogzioManagedGrafanaContactPoints()` --- controller/controller.go | 9 ++------- logzio_alerts_client/logzio_alerts_client.go | 12 ++++++++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 40787d7..3e7b160 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -271,7 +271,7 @@ func (c *Controller) getConfigMap(namespace, name string) (*corev1.ConfigMap, er func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) error { // get contact points and notification policies from logz.io for comparison - logzioContactPoints, err := c.logzioGrafanaAlertsClient.GetLogzioGrafanaContactPoints() + logzioContactPoints, err := c.logzioGrafanaAlertsClient.GetLogzioManagedGrafanaContactPoints() if err != nil { utilruntime.HandleError(err) return err @@ -284,17 +284,12 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) // get receivers and routes from alert manager configmap receivers, routes := c.getClusterReceiversAndRoutes(configmap) - - //contactPointsMap := make(map[string]grafana_contact_points.GrafanaContactPoint) - //for _, contactPoint := range logzioContactPoints { - // contactPointsMap[contactPoint.Name] = contactPoint - //} // Creating maps for efficient lookups - receiversMap := make(map[string]alert_manager_config.Receiver) for _, receiver := range receivers { receiversMap[receiver.Name] = receiver } + c.processContactPoints(receiversMap, logzioContactPoints) // TODO remove redundant log diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index 24ce85e..fb73e0f 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -12,6 +12,7 @@ import ( alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" "k8s.io/klog/v2" + "strings" ) const ( @@ -330,12 +331,19 @@ func (l *LogzioGrafanaAlertsClient) generateGrafanaAlert(rule rulefmt.RuleNode, return grafanaAlert, nil } -func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaContactPoints() ([]grafana_contact_points.GrafanaContactPoint, error) { +func (l *LogzioGrafanaAlertsClient) GetLogzioManagedGrafanaContactPoints() ([]grafana_contact_points.GrafanaContactPoint, error) { contactPoints, err := l.logzioContactPointClient.GetAllGrafanaContactPoints() if err != nil { return nil, err } - return contactPoints, nil + var managedContactPoints []grafana_contact_points.GrafanaContactPoint + for _, contactPoint := range contactPoints { + // check if the contact point name contains the env id to determine if it is a managed contact point + if strings.Contains(contactPoint.Name, l.envId) { + managedContactPoints = append(managedContactPoints, contactPoint) + } + } + return managedContactPoints, nil } func (l *LogzioGrafanaAlertsClient) GetLogzioGrafanaNotificationPolicies() (grafana_notification_policies.GrafanaNotificationPolicyTree, error) { From 4b3c94b56679eb16ab93adcd1fca39f2c4d9d3b8 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 14:15:09 +0700 Subject: [PATCH 16/40] handle notification policy tree --- controller/controller.go | 28 +++--- logzio_alerts_client/logzio_alerts_client.go | 92 ++++++++++++++++++++ 2 files changed, 105 insertions(+), 15 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 3e7b160..2d01a24 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -270,20 +270,15 @@ func (c *Controller) getConfigMap(namespace, name string) (*corev1.ConfigMap, er } func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) error { - // get contact points and notification policies from logz.io for comparison + // get contact points from logz.io for comparison logzioContactPoints, err := c.logzioGrafanaAlertsClient.GetLogzioManagedGrafanaContactPoints() if err != nil { utilruntime.HandleError(err) return err } - logzioNotificationPolicies, err := c.logzioGrafanaAlertsClient.GetLogzioGrafanaNotificationPolicies() - if err != nil { - utilruntime.HandleError(err) - return err - } // get receivers and routes from alert manager configmap - receivers, routes := c.getClusterReceiversAndRoutes(configmap) + receivers, routeTree := c.getClusterReceiversAndRoutes(configmap) // Creating maps for efficient lookups receiversMap := make(map[string]alert_manager_config.Receiver) for _, receiver := range receivers { @@ -292,19 +287,21 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) c.processContactPoints(receiversMap, logzioContactPoints) - // TODO remove redundant log - klog.Info(routes, logzioNotificationPolicies) + // Handle the notification policies after contact points are processed, to prevent missing contact points at logzio + c.logzioGrafanaAlertsClient.SetNotificationPolicyTree(routeTree) return nil } -func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) (receivers []alert_manager_config.Receiver, routes []*alert_manager_config.Route) { +func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ([]alert_manager_config.Receiver, *alert_manager_config.Route) { + var receivers []alert_manager_config.Receiver + var routeTree alert_manager_config.Route if c.isAlertManagerConfigMap(configmap) { for _, value := range configmap.Data { alertManagerConfig, err := alert_manager_config.Load(value) if err != nil { utilruntime.HandleError(fmt.Errorf("unable to load alert manager config; %s", err)) - return nil, nil + return nil, &alert_manager_config.Route{} } // Add prefix to distinguish between alert manager imported from alert manager and logz.io custom contact points stub := common.CreateNameStub(configmap) @@ -313,15 +310,17 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( receivers = append(receivers, receiver) } - for _, route := range alertManagerConfig.Route.Routes { + // Add prefix to routes to match with contact points + routeTree = *alertManagerConfig.Route + routeTree.Receiver = fmt.Sprintf("%s-%s-%s", c.envId, stub, routeTree.Receiver) + for _, route := range routeTree.Routes { route.Receiver = fmt.Sprintf("%s-%s-%s", c.envId, stub, route.Receiver) - routes = append(routes, route) } // setting the `AlertManagerGlobalConfig` context for logzio grafana alerts client c.logzioGrafanaAlertsClient.AlertManagerGlobalConfig = alertManagerConfig.Global } } - return receivers, routes + return receivers, &routeTree } func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { @@ -361,7 +360,6 @@ func (c *Controller) compareContactPoints(receiversMap map[string]alert_manager_ for _, contactPoint := range contactPoints { if _, exists := receiversMap[contactPoint.Name]; !exists { // If the Logz.io contact point does not exist among the receivers, delete it - contactPointsToDelete = append(contactPointsToDelete, contactPoint) } } diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index fb73e0f..ae378be 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -11,7 +11,9 @@ import ( "github.com/logzio/prometheus-alerts-migrator/common" alert_manager_config "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/model/rulefmt" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/klog/v2" + "regexp" "strings" ) @@ -117,6 +119,96 @@ func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, ru } } +// SetNotificationPolicyTree converts route tree to grafana notification policy tree and writes it to logz.io +func (l *LogzioGrafanaAlertsClient) SetNotificationPolicyTree(routeTree *alert_manager_config.Route) { + // getting logzio contact points to ensure it exists for the notification policy tree + logzioContactPoints, err := l.GetLogzioManagedGrafanaContactPoints() + if err != nil { + klog.Errorf("Failed to get logz.io managed contact points: %v", err) + return + } + // create contact points map for efficient lookup + existingContactPoints := make(map[string]bool) + for _, contactPoint := range logzioContactPoints { + existingContactPoints[contactPoint.Name] = true + } + notificationPolicyTree := l.convertRouteTreeToNotificationPolicyTree(routeTree, existingContactPoints) + err = l.logzioNotificationPolicyClient.SetupGrafanaNotificationPolicyTree(notificationPolicyTree) + if err != nil { + klog.Errorf("Failed to create notification policy tree: %v", err) + } +} + +func (l *LogzioGrafanaAlertsClient) convertRouteTreeToNotificationPolicyTree(routeTree *alert_manager_config.Route, existingContactPoints map[string]bool) (notificationPolicyTree grafana_notification_policies.GrafanaNotificationPolicyTree) { + // checking for empty values to avoid nil pointer errors + if routeTree.GroupByStr != nil { + notificationPolicyTree.GroupBy = routeTree.GroupByStr + } + if routeTree.GroupInterval != nil { + notificationPolicyTree.GroupInterval = routeTree.GroupInterval.String() + } + if routeTree.GroupWait != nil { + notificationPolicyTree.GroupWait = routeTree.GroupWait.String() + } + if routeTree.RepeatInterval != nil { + notificationPolicyTree.RepeatInterval = routeTree.RepeatInterval.String() + } + notificationPolicyTree.Receiver = routeTree.Receiver + for _, childRoute := range routeTree.Routes { + // check if the receiver of the child route exists in `existingContactPoints` + if _, ok := existingContactPoints[childRoute.Receiver]; ok { + notificationPolicy := l.generateGrafanaNotificationPolicy(childRoute) + notificationPolicyTree.Routes = append(notificationPolicyTree.Routes, notificationPolicy) + } + } + return notificationPolicyTree +} + +// generateGrafanaNotificationPolicy generates a GrafanaNotificationPolicy from a alert_manager_config.Route +func (l *LogzioGrafanaAlertsClient) generateGrafanaNotificationPolicy(route *alert_manager_config.Route) (notificationPolicy grafana_notification_policies.GrafanaNotificationPolicy) { + // checking for empty values to avoid nil pointer errors + if route.GroupInterval != nil { + notificationPolicy.GroupInterval = route.GroupInterval.String() + } + if route.GroupWait != nil { + notificationPolicy.GroupWait = route.GroupWait.String() + } + if route.RepeatInterval != nil { + notificationPolicy.RepeatInterval = route.RepeatInterval.String() + } + if route.GroupByStr != nil { + notificationPolicy.GroupBy = route.GroupByStr + } + notificationPolicy.Receiver = route.Receiver + routeMatchersYaml, err := route.Matchers.MarshalYAML() + if err != nil { + utilruntime.HandleError(err) + return grafana_notification_policies.GrafanaNotificationPolicy{} + } + // converting the route matchers to the Grafana format + routeMatchersList := routeMatchersYaml.([]string) + grafanaObjMatchers := grafana_notification_policies.MatchersObj{} + for _, routeMatcher := range routeMatchersList { + // we split the route matcher by the regex (=|~|=|!=) to convert it to the Grafana format + regex := regexp.MustCompile(`(=|~=?|!=)`) + parts := regex.FindStringSubmatchIndex(routeMatcher) + if len(parts) > 0 { + // Extracting the key, operator, and value + key := routeMatcher[:parts[0]] + operator := routeMatcher[parts[0]:parts[1]] + value := routeMatcher[parts[1]:] + grafanaObjMatchers = append(grafanaObjMatchers, grafana_notification_policies.MatcherObj{key, operator, value}) + } + } + notificationPolicy.ObjectMatchers = grafanaObjMatchers + // repeat the process for nested policies + for _, childRoute := range route.Routes { + childNotificationPolicy := l.generateGrafanaNotificationPolicy(childRoute) + notificationPolicy.Routes = append(notificationPolicy.Routes, childNotificationPolicy) + } + return notificationPolicy +} + // WriteContactPoints writes the contact points to logz.io func (l *LogzioGrafanaAlertsClient) WriteContactPoints(contactPointsToWrite []alert_manager_config.Receiver) { for _, contactPoint := range contactPointsToWrite { From 5f52fa8b898ed6bb89d02317dda7b006d838ca4c Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 14:15:48 +0700 Subject: [PATCH 17/40] update before adding new contact points --- controller/controller.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 2d01a24..915114b 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -326,12 +326,12 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete := c.compareContactPoints(receiversMap, logzioContactPoints) klog.Infof("Contact points summary: to add: %d, to update: %d, to delete: %d", len(contactPointsToAdd), len(contactPointsToUpdate), len(contactPointsToDelete)) - if len(contactPointsToAdd) > 0 { - c.logzioGrafanaAlertsClient.WriteContactPoints(contactPointsToAdd) - } if len(contactPointsToUpdate) > 0 { c.logzioGrafanaAlertsClient.UpdateContactPoints(contactPointsToUpdate, logzioContactPoints) } + if len(contactPointsToAdd) > 0 { + c.logzioGrafanaAlertsClient.WriteContactPoints(contactPointsToAdd) + } if len(contactPointsToDelete) > 0 { c.logzioGrafanaAlertsClient.DeleteContactPoints(contactPointsToDelete) } From cdd22833e2afb324ef493fb27c4b3aba6f25cedd Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 15:01:54 +0700 Subject: [PATCH 18/40] Add `ignoreSlackText` + `ignoreSlackTitle` --- common/common.go | 100 +++++++++++++++++++ controller/controller.go | 15 +-- logzio_alerts_client/logzio_alerts_client.go | 15 ++- main.go | 93 ++--------------- 4 files changed, 126 insertions(+), 97 deletions(-) diff --git a/common/common.go b/common/common.go index e274bb5..324a978 100644 --- a/common/common.go +++ b/common/common.go @@ -1,6 +1,7 @@ package common import ( + "flag" "fmt" "github.com/logzio/logzio_terraform_client/grafana_alerts" "github.com/prometheus/prometheus/model/rulefmt" @@ -8,6 +9,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/util/homedir" + "k8s.io/klog/v2" "math/rand" "os" "path/filepath" @@ -26,6 +28,91 @@ const ( TypePagerDuty = "pagerduty" // # of letter indices fitting in 63 bits ) +// NewConfig creates a Config struct, populating it with values from command-line flags and environment variables. +func NewConfig() *Config { + // Define flags + helpFlag := flag.Bool("help", false, "Display help") + rulesConfigmapAnnotation := flag.String("rules-annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") + alertManagerConfigmapAnnotation := flag.String("alertmanager-annotation", "prometheus.io/kube-alertmanager", "Annotation that states that this configmap contains alertmanager configuration") + logzioAPITokenFlag := flag.String("logzio-api-token", "", "LOGZIO API token") + logzioAPIURLFlag := flag.String("logzio-api-url", "https://api.logz.io", "LOGZIO API URL") + rulesDSFlag := flag.String("rules-ds", "", "name of the data source for the alert rules") + envIDFlag := flag.String("env-id", "my-env", "environment identifier, usually cluster name") + workerCountFlag := flag.Int("workers", 2, "The number of workers to process the alerts") + ignoreSlackTextFlag := flag.Bool("ignore-slack-text", false, "Ignore slack text field") + ignoreSlackTitleFlag := flag.Bool("ignore-slack-title", false, "Ignore slack title field") + + // Parse the flags + flag.Parse() + + if *helpFlag { + flag.PrintDefaults() + os.Exit(0) + } + + // Environment variables have lower precedence than flags + logzioAPIURL := getEnvWithFallback("LOGZIO_API_URL", *logzioAPIURLFlag) + envID := getEnvWithFallback("ENV_ID", *envIDFlag) + + ignoreSlackText := getEnvWithFallback("IGNORE_SLACK_TEXT", strconv.FormatBool(*ignoreSlackTextFlag)) + ignoreSlackTextBool, err := strconv.ParseBool(ignoreSlackText) + if err != nil { + klog.Fatal("Invalid value for IGNORE_SLACK_TEXT") + } + + ignoreSlackTitle := getEnvWithFallback("IGNORE_SLACK_TITLE", strconv.FormatBool(*ignoreSlackTitleFlag)) + ignoreSlackTitleBool, err := strconv.ParseBool(ignoreSlackTitle) + if err != nil { + klog.Fatal("Invalid value for IGNORE_SLACK_TITLE") + } + + // api token is mandatory + logzioAPIToken := getEnvWithFallback("LOGZIO_API_TOKEN", *logzioAPITokenFlag) + if logzioAPIToken == "" { + klog.Fatal("No logzio api token provided") + } + rulesDS := getEnvWithFallback("RULES_DS", *rulesDSFlag) + if rulesDS == "" { + klog.Fatal("No rules data source provided") + } + // Annotation must be provided either by flag or environment variable + rulesAnnotation := getEnvWithFallback("RULES_CONFIGMAP_ANNOTATION", *rulesConfigmapAnnotation) + if rulesAnnotation == "" { + klog.Fatal("No rules configmap annotation provided") + } + // Annotation must be provided either by flag or environment variable + alertManagerAnnotation := getEnvWithFallback("ALERTMANAGER_CONFIGMAP_ANNOTATION", *alertManagerConfigmapAnnotation) + if alertManagerAnnotation == "" { + klog.Fatal("No alert manager configmap annotation provided") + } + workerCountStr := getEnvWithFallback("WORKERS_COOUNT", strconv.Itoa(*workerCountFlag)) + workerCount, err := strconv.Atoi(workerCountStr) + + if err != nil { + workerCount = 2 // default value + } + + return &Config{ + RulesAnnotation: rulesAnnotation, + AlertManagerAnnotation: alertManagerAnnotation, + LogzioAPIToken: logzioAPIToken, + LogzioAPIURL: logzioAPIURL, + RulesDS: rulesDS, + EnvID: envID, + WorkerCount: workerCount, + IgnoreSlackText: ignoreSlackTextBool, + IgnoreSlackTitle: ignoreSlackTitleBool, + } +} + +// getEnvWithFallback tries to get the value from an environment variable and falls back to the given default value if not found. +func getEnvWithFallback(envName, defaultValue string) string { + if value, exists := os.LookupEnv(envName); exists { + return value + } + return defaultValue +} + // GenerateRandomString borrowed from here https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-go func GenerateRandomString(n int) string { if n <= 0 { @@ -119,3 +206,16 @@ func GetConfig() (*rest.Config, error) { return config, nil } + +// Config holds all the configuration needed for the application to run. +type Config struct { + RulesAnnotation string + AlertManagerAnnotation string + LogzioAPIToken string + LogzioAPIURL string + RulesDS string + EnvID string + WorkerCount int + IgnoreSlackText bool + IgnoreSlackTitle bool +} diff --git a/controller/controller.go b/controller/controller.go index 915114b..6808219 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -70,12 +70,7 @@ type MultiRuleGroups struct { func NewController( kubeclientset *kubernetes.Clientset, configmapInformer corev1informers.ConfigMapInformer, - rulesAnnotation *string, - alertManagerAnnotation *string, - logzioApiToken string, - logzioApiUrl string, - rulesDs string, - envId string, + config common.Config, ) *Controller { utilruntime.Must(scheme.AddToScheme(scheme.Scheme)) @@ -84,7 +79,7 @@ func NewController( eventBroadcaster.StartLogging(klog.Infof) eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeclientset.CoreV1().Events("")}) recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerAgentName}) - logzioGrafanaAlertsClient := logzio_alerts_client.NewLogzioGrafanaAlertsClient(logzioApiToken, logzioApiUrl, rulesDs, envId) + logzioGrafanaAlertsClient := logzio_alerts_client.NewLogzioGrafanaAlertsClient(config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID, config.IgnoreSlackText, config.IgnoreSlackTitle) if logzioGrafanaAlertsClient == nil { klog.Errorf("Failed to create logzio grafana alerts client") return nil @@ -96,11 +91,11 @@ func NewController( configmapsSynced: configmapInformer.Informer().HasSynced, workqueue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), recorder: recorder, - rulesAnnotation: rulesAnnotation, - alertManagerAnnotation: alertManagerAnnotation, + rulesAnnotation: &config.RulesAnnotation, + alertManagerAnnotation: &config.AlertManagerAnnotation, resourceVersionMap: make(map[string]string), logzioGrafanaAlertsClient: logzioGrafanaAlertsClient, - envId: envId, + envId: config.EnvID, } controller.configmapEventRecorderFunc = controller.recordEventOnConfigMap diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index ae378be..17cfcc4 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -70,9 +70,11 @@ type LogzioGrafanaAlertsClient struct { logzioNotificationPolicyClient *grafana_notification_policies.GrafanaNotificationPolicyClient rulesDataSource string envId string + ignoreSlackText bool + ignoreSlackTitle bool } -func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, rulesDs string, envId string) *LogzioGrafanaAlertsClient { +func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, rulesDs string, envId string, ignoreSlackText bool, ignoreSlackTitle bool) *LogzioGrafanaAlertsClient { logzioAlertClient, err := grafana_alerts.New(logzioApiToken, logzioApiUrl) if err != nil { klog.Errorf("Failed to create logzio alert client: %v", err) @@ -116,6 +118,8 @@ func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, ru logzioNotificationPolicyClient: logzioNotificationPolicyClient, rulesDataSource: rulesDsData.Uid, envId: envId, + ignoreSlackText: ignoreSlackText, + ignoreSlackTitle: ignoreSlackTitle, } } @@ -283,11 +287,16 @@ func (l *LogzioGrafanaAlertsClient) generateGrafanaContactPoint(receiver alert_m Settings: map[string]interface{}{ "url": url, "recipient": slackConfig.Channel, - "text": slackConfig.Text, - "title": slackConfig.Title, "username": slackConfig.Username, }, } + // Adding title and text fields based on program flags + if !l.ignoreSlackTitle { + contactPoint.Settings["title"] = slackConfig.Title + } + if !l.ignoreSlackText { + contactPoint.Settings["text"] = slackConfig.Text + } contactPointsList = append(contactPointsList, contactPoint) } // check for pagerduty type configs diff --git a/main.go b/main.go index 8cdf25a..93cdeee 100644 --- a/main.go +++ b/main.go @@ -1,10 +1,7 @@ package main import ( - "flag" "github.com/logzio/prometheus-alerts-migrator/common" - "os" - "strconv" "time" "k8s.io/client-go/informers" @@ -15,86 +12,8 @@ import ( "github.com/logzio/prometheus-alerts-migrator/pkg/signals" ) -// Config holds all the configuration needed for the application to run. -type Config struct { - RulesAnnotation string - AlertManagerAnnotation string - LogzioAPIToken string - LogzioAPIURL string - RulesDS string - EnvID string - WorkerCount int -} - -// NewConfig creates a Config struct, populating it with values from command-line flags and environment variables. -func NewConfig() *Config { - // Define flags - helpFlag := flag.Bool("help", false, "Display help") - rulesConfigmapAnnotation := flag.String("rules-annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") - alertManagerConfigmapAnnotation := flag.String("alertmanager-annotation", "prometheus.io/kube-alertmanager", "Annotation that states that this configmap contains alertmanager configuration") - logzioAPITokenFlag := flag.String("logzio-api-token", "", "LOGZIO API token") - logzioAPIURLFlag := flag.String("logzio-api-url", "https://api.logz.io", "LOGZIO API URL") - rulesDSFlag := flag.String("rules-ds", "", "name of the data source for the alert rules") - envIDFlag := flag.String("env-id", "my-env", "environment identifier, usually cluster name") - workerCountFlag := flag.Int("workers", 2, "The number of workers to process the alerts") - - // Parse the flags - flag.Parse() - - if *helpFlag { - flag.PrintDefaults() - os.Exit(0) - } - - // Environment variables have lower precedence than flags - logzioAPIURL := getEnvWithFallback("LOGZIO_API_URL", *logzioAPIURLFlag) - envID := getEnvWithFallback("ENV_ID", *envIDFlag) - // api token is mandatory - logzioAPIToken := getEnvWithFallback("LOGZIO_API_TOKEN", *logzioAPITokenFlag) - if logzioAPIToken == "" { - klog.Fatal("No logzio api token provided") - } - rulesDS := getEnvWithFallback("RULES_DS", *rulesDSFlag) - if rulesDS == "" { - klog.Fatal("No rules data source provided") - } - // Annotation must be provided either by flag or environment variable - rulesAnnotation := getEnvWithFallback("RULES_CONFIGMAP_ANNOTATION", *rulesConfigmapAnnotation) - if rulesAnnotation == "" { - klog.Fatal("No rules configmap annotation provided") - } - // Annotation must be provided either by flag or environment variable - alertManagerAnnotation := getEnvWithFallback("ALERTMANAGER_CONFIGMAP_ANNOTATION", *alertManagerConfigmapAnnotation) - if alertManagerAnnotation == "" { - klog.Fatal("No alert manager configmap annotation provided") - } - workerCountStr := getEnvWithFallback("WORKERS_COOUNT", strconv.Itoa(*workerCountFlag)) - workerCount, err := strconv.Atoi(workerCountStr) - if err != nil { - workerCount = 2 // default value - } - - return &Config{ - RulesAnnotation: rulesAnnotation, - AlertManagerAnnotation: alertManagerAnnotation, - LogzioAPIToken: logzioAPIToken, - LogzioAPIURL: logzioAPIURL, - RulesDS: rulesDS, - EnvID: envID, - WorkerCount: workerCount, - } -} - -// getEnvWithFallback tries to get the value from an environment variable and falls back to the given default value if not found. -func getEnvWithFallback(envName, defaultValue string) string { - if value, exists := os.LookupEnv(envName); exists { - return value - } - return defaultValue -} - func main() { - config := NewConfig() + config := common.NewConfig() klog.Info("Rule Updater starting.\n") klog.Infof("Rules configMap annotation: %s\n", config.RulesAnnotation) @@ -103,6 +22,12 @@ func main() { klog.Infof("Logzio api url: %s\n", config.LogzioAPIURL) klog.Infof("Logzio rules data source: %s\n", config.RulesDS) klog.Infof("Number of workers: %d\n", config.WorkerCount) + if config.IgnoreSlackText == true { + klog.Info("Slack text field will be ignored") + } + if config.IgnoreSlackTitle == true { + klog.Info("Slack title field will be ignored") + } // set up signals so we handle the first shutdown signal gracefully stopCh := signals.SetupSignalHandler() @@ -118,8 +43,8 @@ func main() { } kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, time.Second*30) - - ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &config.RulesAnnotation, &config.AlertManagerAnnotation, config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID) + ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), *config) + //ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &config.RulesAnnotation, &config.AlertManagerAnnotation, config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID) if ctl == nil { klog.Fatal("Error creating controller") } From 23e2ffd0bc5962a9c97af44ceca3801672131e34 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 18:12:55 +0700 Subject: [PATCH 19/40] remove comments --- main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/main.go b/main.go index 93cdeee..d842464 100644 --- a/main.go +++ b/main.go @@ -44,7 +44,6 @@ func main() { kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, time.Second*30) ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), *config) - //ctl := controller.NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &config.RulesAnnotation, &config.AlertManagerAnnotation, config.LogzioAPIToken, config.LogzioAPIURL, config.RulesDS, config.EnvID) if ctl == nil { klog.Fatal("Error creating controller") } From a6868bcccd67c9055eb2af10e628b74a6f5f645a Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 18:13:47 +0700 Subject: [PATCH 20/40] use `common.NewConfig()` in tests --- controller/controller_e2e_test.go | 9 ++------- controller/controller_test.go | 9 ++------- logzio_alerts_client/logzio_alerts_client_test.go | 8 +++----- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/controller/controller_e2e_test.go b/controller/controller_e2e_test.go index 6100fb7..b953ced 100644 --- a/controller/controller_e2e_test.go +++ b/controller/controller_e2e_test.go @@ -13,7 +13,6 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "log" - "os" "testing" "time" ) @@ -88,16 +87,12 @@ func TestControllerE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to create Kubernetes clientset: %v", err) } - logzioUrl := os.Getenv("LOGZIO_API_URL") - logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") - rulesDS := os.Getenv("RULES_DS") - rulesAnnotation := os.Getenv("RULES_CONFIGMAP_ANNOTATION") - alertManagerAnnotation := os.Getenv("ALERTMANAGER_CONFIGMAP_ANNOTATION") + ctlConfig := common.NewConfig() kubeInformerFactory := informers.NewSharedInformerFactory(clientset, time.Second*30) // set up signals so we handle the first shutdown signal gracefully stopCh := signals.SetupSignalHandler() // Instantiate the controller - ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), &rulesAnnotation, &alertManagerAnnotation, logzioAPIToken, logzioUrl, rulesDS, "integration-test") + ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), *ctlConfig) // defer cleanup defer cleanupLogzioAlerts(*ctrl) diff --git a/controller/controller_test.go b/controller/controller_test.go index f88d413..dc48373 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -10,7 +10,6 @@ import ( "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" - "os" "reflect" "testing" ) @@ -27,13 +26,9 @@ func generateTestController() *Controller { if err != nil { klog.Fatalf("Error building kubernetes clientset: %s", err) } - logzioUrl := os.Getenv("LOGZIO_API_URL") - logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") - rulesDS := os.Getenv("RULES_DS") + ctlConfig := common.NewConfig() kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, 0) - rulesAnnotation := "test-annotation" - alertManagerAnnotation := "am-test-annotation" - c := NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), &rulesAnnotation, &alertManagerAnnotation, logzioAPIToken, logzioUrl, rulesDS, "integration-test") + c := NewController(kubeClient, kubeInformerFactory.Core().V1().ConfigMaps(), *ctlConfig) return c } diff --git a/logzio_alerts_client/logzio_alerts_client_test.go b/logzio_alerts_client/logzio_alerts_client_test.go index 766d147..1c5fa1b 100644 --- a/logzio_alerts_client/logzio_alerts_client_test.go +++ b/logzio_alerts_client/logzio_alerts_client_test.go @@ -1,20 +1,18 @@ package logzio_alerts_client import ( + "github.com/logzio/prometheus-alerts-migrator/common" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/rulefmt" "gopkg.in/yaml.v3" - "os" "reflect" "testing" "time" ) func generateTestLogzioGrafanaAlertsClient() *LogzioGrafanaAlertsClient { - logzioUrl := os.Getenv("LOGZIO_API_URL") - logzioAPIToken := os.Getenv("LOGZIO_API_TOKEN") - rulesDS := os.Getenv("RULES_DS") - logzioGrafanaAlertsClient := NewLogzioGrafanaAlertsClient(logzioUrl, logzioAPIToken, rulesDS, "integration-test") + ctlConfig := common.NewConfig() + logzioGrafanaAlertsClient := NewLogzioGrafanaAlertsClient(ctlConfig.LogzioAPIToken, ctlConfig.LogzioAPIURL, ctlConfig.RulesDS, ctlConfig.EnvID, ctlConfig.IgnoreSlackTitle, ctlConfig.IgnoreSlackTitle) return logzioGrafanaAlertsClient } From 60c10e6e841c3826a19dc9e5a4cf478fb18c317f Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 13 Dec 2023 18:15:38 +0700 Subject: [PATCH 21/40] typo --- common/common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.go b/common/common.go index 324a978..83c4ee7 100644 --- a/common/common.go +++ b/common/common.go @@ -85,7 +85,7 @@ func NewConfig() *Config { if alertManagerAnnotation == "" { klog.Fatal("No alert manager configmap annotation provided") } - workerCountStr := getEnvWithFallback("WORKERS_COOUNT", strconv.Itoa(*workerCountFlag)) + workerCountStr := getEnvWithFallback("WORKERS_COUNT", strconv.Itoa(*workerCountFlag)) workerCount, err := strconv.Atoi(workerCountStr) if err != nil { From c295ee4cd89b2d088222e19b5e1cc38e1012ede3 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Thu, 14 Dec 2023 13:07:51 +0700 Subject: [PATCH 22/40] fix comment --- common/common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.go b/common/common.go index 83c4ee7..6f9a42f 100644 --- a/common/common.go +++ b/common/common.go @@ -50,7 +50,7 @@ func NewConfig() *Config { os.Exit(0) } - // Environment variables have lower precedence than flags + // Environment variables have higher precedence than flags logzioAPIURL := getEnvWithFallback("LOGZIO_API_URL", *logzioAPIURLFlag) envID := getEnvWithFallback("ENV_ID", *envIDFlag) From 019f4771b3a46fe62f8d2534bcdd3f2bb33cf268 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Thu, 14 Dec 2023 13:12:53 +0700 Subject: [PATCH 23/40] docs + changelog --- README.md | 6 ++++++ common/common.go | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 610b961..26bb916 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ Configure the application using the following environment variables: | `RULES_DS` | The metrics data source name in logz.io for the Prometheus rules. | `None` | | `ENV_ID` | Environment identifier, usually cluster name. | `my-env` | | `WORKER_COUNT` | The number of workers to process the alerts. | `2` | +| `IGNORE_SLACK_TEXT` | Ignore slack contact points `text` field. | `flase` | +| `IGNORE_SLACK_TITLE` | Ignore slack contact points `title` field. | `false` | Please ensure to set all necessary environment variables before running the application. @@ -62,6 +64,10 @@ data: - Deploy the configmap to your cluster `kubectl apply -f .yml` ## Changelog +- v1.0.3 + - Handle Prometheus alert manager configuration file + - Add CRUD operations for contact points and notification policies + - Add `IGNORE_SLACK_TEXT` and `IGNORE_SLACK_TITLE` flags - v1.0.2 - Add `reduce` query to alerts (grafana alerts can evaluate alerts only from reduced data) - v1.0.1 diff --git a/common/common.go b/common/common.go index 6f9a42f..f4f2278 100644 --- a/common/common.go +++ b/common/common.go @@ -39,8 +39,8 @@ func NewConfig() *Config { rulesDSFlag := flag.String("rules-ds", "", "name of the data source for the alert rules") envIDFlag := flag.String("env-id", "my-env", "environment identifier, usually cluster name") workerCountFlag := flag.Int("workers", 2, "The number of workers to process the alerts") - ignoreSlackTextFlag := flag.Bool("ignore-slack-text", false, "Ignore slack text field") - ignoreSlackTitleFlag := flag.Bool("ignore-slack-title", false, "Ignore slack title field") + ignoreSlackTextFlag := flag.Bool("ignore-slack-text", false, "Ignore slack contact points text field") + ignoreSlackTitleFlag := flag.Bool("ignore-slack-title", false, "Ignore slack contact points title field") // Parse the flags flag.Parse() From 65a58fddc2cd5f9f83739cbbda44b72e03453597 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Thu, 14 Dec 2023 13:17:41 +0700 Subject: [PATCH 24/40] alert-manager configmap example --- README.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 26bb916..6359f06 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,12 @@ To start using the controller: 2. Navigate to the project directory. 3. Run the controller `make run-local`. -### ConfigMap Format +### ConfigMap format The controller is designed to process ConfigMaps containing Prometheus alert rules. These ConfigMaps must be annotated with a specific key that matches the value of the `ANNOTATION` environment variable for the controller to process them. -### Example ConfigMap +### Example rules configMap -Below is an example of how a ConfigMap should be structured: +Below is an example of how a rules configMap should be structured: ```yaml apiVersion: v1 @@ -63,6 +63,63 @@ data: - Replace `prometheus.io/kube-rules` with the actual annotation you use to identify relevant ConfigMaps. The data section should contain your Prometheus alert rules in YAML format. - Deploy the configmap to your cluster `kubectl apply -f .yml` +### Example alert manager configMap + +Below is an example of how a alert manager ConfigMap should be structured: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: logzio-rules + namespace: monitoring + annotations: + prometheus.io/kube-alertmanager: "true" +data: + all_instances_down_otel_collector: | + global: + # Global configurations, adjust these to your SMTP server details + smtp_smarthost: 'smtp.example.com:587' + smtp_from: 'alertmanager@example.com' + smtp_auth_username: 'alertmanager' + smtp_auth_password: 'password' + # The root route on which each incoming alert enters. + route: + receiver: 'default-receiver' + group_by: ['alertname', 'env'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + # Child routes + routes: + - match: + env: production + receiver: 'slack-production' + continue: true + - match: + env: staging + receiver: 'slack-staging' + continue: true + + # Receivers defines ways to send notifications about alerts. + receivers: + - name: 'default-receiver' + email_configs: + - to: 'alerts@example.com' + - name: 'slack-production' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX' + channel: '#prod-alerts' + - name: 'slack-staging' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B11111111/YYYYYYYYYYYYYYYYYYYYYYYY' + channel: '#staging-alerts' + +``` +- Replace `prometheus.io/kube-alertmanager` with the actual annotation you use to identify relevant ConfigMaps. The data section should contain your Prometheus alert rules in YAML format. +- Deploy the configmap to your cluster `kubectl apply -f .yml` + + ## Changelog - v1.0.3 - Handle Prometheus alert manager configuration file From fc5f4d3f37c0e7d6b96455f31c7ac1537228622c Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Thu, 14 Dec 2023 13:34:30 +0700 Subject: [PATCH 25/40] docs --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6359f06..41e24bd 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,13 @@ Before running this software, ensure you have: - Access to a Kubernetes cluster - Logz.io account with API access +## Supported contact point types +- `Email` +- `Slack` +- `Pagerduty` + +More types will be supported in the future, If you have a specific request please post an issue with your request + ## Configuration Configure the application using the following environment variables: @@ -34,7 +41,7 @@ To start using the controller: 3. Run the controller `make run-local`. ### ConfigMap format -The controller is designed to process ConfigMaps containing Prometheus alert rules. These ConfigMaps must be annotated with a specific key that matches the value of the `ANNOTATION` environment variable for the controller to process them. +The controller is designed to process ConfigMaps containing Prometheus alert rules and promethium alert manager configuration. These ConfigMaps must be annotated with a specific key that matches the value of the `RULES_CONFIGMAP_ANNOTATION` or `ALERTMANAGER_CONFIGMAP_ANNOTATION` environment variables for the controller to process them. ### Example rules configMap From 7467aafd3f328e9e834a00b94ccca5c5ccccf982 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Mon, 18 Dec 2023 16:54:38 +0700 Subject: [PATCH 26/40] Improve tests --- common/common.go | 47 +++++++--- ...e_test.go => controller_e2e_rules_test.go} | 9 +- controller/controller_test.go | 4 +- logzio_alerts_client/logzio_alerts_client.go | 5 ++ .../logzio_alerts_client_test.go | 85 +++++++++++++++++++ testdata/cm.yml | 6 +- 6 files changed, 135 insertions(+), 21 deletions(-) rename controller/{controller_e2e_test.go => controller_e2e_rules_test.go} (96%) diff --git a/common/common.go b/common/common.go index f4f2278..b7eaaff 100644 --- a/common/common.go +++ b/common/common.go @@ -28,20 +28,45 @@ const ( TypePagerDuty = "pagerduty" // # of letter indices fitting in 63 bits ) +var ( + helpFlag, ignoreSlackTextFlag, ignoreSlackTitleFlag *bool + logzioAPITokenFlag, rulesConfigmapAnnotation, alertManagerConfigmapAnnotation, logzioAPIURLFlag, rulesDSFlag, envIDFlag *string + workerCountFlag *int +) + // NewConfig creates a Config struct, populating it with values from command-line flags and environment variables. func NewConfig() *Config { // Define flags - helpFlag := flag.Bool("help", false, "Display help") - rulesConfigmapAnnotation := flag.String("rules-annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") - alertManagerConfigmapAnnotation := flag.String("alertmanager-annotation", "prometheus.io/kube-alertmanager", "Annotation that states that this configmap contains alertmanager configuration") - logzioAPITokenFlag := flag.String("logzio-api-token", "", "LOGZIO API token") - logzioAPIURLFlag := flag.String("logzio-api-url", "https://api.logz.io", "LOGZIO API URL") - rulesDSFlag := flag.String("rules-ds", "", "name of the data source for the alert rules") - envIDFlag := flag.String("env-id", "my-env", "environment identifier, usually cluster name") - workerCountFlag := flag.Int("workers", 2, "The number of workers to process the alerts") - ignoreSlackTextFlag := flag.Bool("ignore-slack-text", false, "Ignore slack contact points text field") - ignoreSlackTitleFlag := flag.Bool("ignore-slack-title", false, "Ignore slack contact points title field") - + if flag.Lookup("help") == nil { + helpFlag = flag.Bool("help", false, "Display help") + } + if flag.Lookup("rules-annotation") == nil { + rulesConfigmapAnnotation = flag.String("rules-annotation", "prometheus.io/kube-rules", "Annotation that states that this configmap contains prometheus rules") + } + if flag.Lookup("alertmanager-annotation") == nil { + alertManagerConfigmapAnnotation = flag.String("alertmanager-annotation", "prometheus.io/kube-alertmanager", "Annotation that states that this configmap contains alertmanager configuration") + } + if flag.Lookup("logzio-api-token") == nil { + logzioAPITokenFlag = flag.String("logzio-api-token", "", "LOGZIO API token") + } + if flag.Lookup("logzio-api-url") == nil { + logzioAPIURLFlag = flag.String("logzio-api-url", "https://api.logz.io", "LOGZIO API URL") + } + if flag.Lookup("rules-ds") == nil { + rulesDSFlag = flag.String("rules-ds", "", "name of the data source for the alert rules") + } + if flag.Lookup("env-id") == nil { + envIDFlag = flag.String("env-id", "my-env", "environment identifier, usually cluster name") + } + if flag.Lookup("workers") == nil { + workerCountFlag = flag.Int("workers", 2, "The number of workers to process the alerts") + } + if flag.Lookup("ignore-slack-text") == nil { + ignoreSlackTextFlag = flag.Bool("ignore-slack-text", false, "Ignore slack contact points text field") + } + if flag.Lookup("ignore-slack-title") == nil { + ignoreSlackTitleFlag = flag.Bool("ignore-slack-title", false, "Ignore slack contact points title field") + } // Parse the flags flag.Parse() diff --git a/controller/controller_e2e_test.go b/controller/controller_e2e_rules_test.go similarity index 96% rename from controller/controller_e2e_test.go rename to controller/controller_e2e_rules_test.go index b953ced..1f5e33c 100644 --- a/controller/controller_e2e_test.go +++ b/controller/controller_e2e_rules_test.go @@ -6,13 +6,13 @@ import ( "github.com/logzio/prometheus-alerts-migrator/common" "github.com/logzio/prometheus-alerts-migrator/pkg/signals" "github.com/stretchr/testify/assert" - "io/ioutil" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "log" + "os" "testing" "time" ) @@ -23,7 +23,7 @@ const testNamespace = "alert-migrator-test" func deployConfigMaps(clientset *kubernetes.Clientset, configs ...string) error { for _, config := range configs { // Read the YAML file content - yamlContent, err := ioutil.ReadFile(config) + yamlContent, err := os.ReadFile(config) if err != nil { return fmt.Errorf("failed to read YAML file %s: %v", config, err) } @@ -71,12 +71,11 @@ func cleanupLogzioAlerts(ctl Controller) { if err != nil { log.Fatalf("Failed to get logzio alerts: %v", err) } - // defer cleanup ctl.logzioGrafanaAlertsClient.DeleteRules(logzioAlerts, folderUid) } // TestControllerE2E is the main function that runs the end-to-end test -func TestControllerE2E(t *testing.T) { +func TestControllerRulesE2E(t *testing.T) { // Setup the test environment config, err := common.GetConfig() if err != nil { @@ -111,7 +110,7 @@ func TestControllerE2E(t *testing.T) { } }() t.Log("going to sleep") - time.Sleep(time.Second * 10) + time.Sleep(time.Second * 5) folderUid, err := ctrl.logzioGrafanaAlertsClient.FindOrCreatePrometheusAlertsFolder() if err != nil { t.Fatalf("Failed to get logzio alerts folder uid: %v", err) diff --git a/controller/controller_test.go b/controller/controller_test.go index dc48373..1c17ca3 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -14,7 +14,7 @@ import ( "testing" ) -const annotation = "test-annotation" +const annotation = "prometheus.io/kube-rules" func generateTestController() *Controller { cfg, err := common.GetConfig() @@ -127,7 +127,7 @@ func TestIsRuleConfigMap(t *testing.T) { configMap: &v1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - "test-annotation": "true", + annotation: "true", }, }, }, diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index 17cfcc4..12bf302 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -363,6 +363,11 @@ func (l *LogzioGrafanaAlertsClient) WriteRules(rulesToWrite []rulefmt.RuleNode, // generateGrafanaAlert generates a GrafanaAlertRule from a Prometheus rule func (l *LogzioGrafanaAlertsClient) generateGrafanaAlert(rule rulefmt.RuleNode, folderUid string) (grafana_alerts.GrafanaAlertRule, error) { + // validate the rule + validationErrs := rule.Validate() + if len(validationErrs) > 0 { + return grafana_alerts.GrafanaAlertRule{}, fmt.Errorf("invalid rule: %v", validationErrs) + } // Create promql query to return time series data for the expression. promqlQuery := PrometheusQueryModel{ Expr: rule.Expr.Value, diff --git a/logzio_alerts_client/logzio_alerts_client_test.go b/logzio_alerts_client/logzio_alerts_client_test.go index 1c5fa1b..ad011b4 100644 --- a/logzio_alerts_client/logzio_alerts_client_test.go +++ b/logzio_alerts_client/logzio_alerts_client_test.go @@ -2,9 +2,12 @@ package logzio_alerts_client import ( "github.com/logzio/prometheus-alerts-migrator/common" + "github.com/prometheus/alertmanager/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/rulefmt" + "github.com/stretchr/testify/assert" "gopkg.in/yaml.v3" + "net/url" "reflect" "testing" "time" @@ -27,6 +30,13 @@ func TestGenerateGrafanaAlert(t *testing.T) { Labels: map[string]string{"severity": "critical"}, Annotations: map[string]string{"description": "Instance is down"}, } + invalidRule := rulefmt.RuleNode{ + Alert: yaml.Node{Value: "TestAlertInvalid"}, + Expr: yaml.Node{Value: "up as== 1sadsa"}, + For: model.Duration(5 * time.Minute), + Labels: map[string]string{"severity": "critical"}, + Annotations: map[string]string{"description": "Instance is down"}, + } baseFolderUid := "folder123" // Test cases @@ -42,6 +52,12 @@ func TestGenerateGrafanaAlert(t *testing.T) { folderUid: baseFolderUid, wantErr: false, }, + { + name: "invalid rule", + rule: invalidRule, + folderUid: baseFolderUid, + wantErr: true, + }, } for _, tc := range testCases { @@ -77,3 +93,72 @@ func TestGenerateGrafanaAlert(t *testing.T) { }) } } + +func TestGenerateGrafanaContactPoint(t *testing.T) { + client := generateTestLogzioGrafanaAlertsClient() + testCases := []struct { + name string + receiver config.Receiver + expectedLength int + expectedType string + }{ + { + name: "Email Configuration", + receiver: config.Receiver{ + EmailConfigs: []*config.EmailConfig{ + { + To: "test@example.com", + }, + { + To: "test2@example.com", + }, + }, + }, + expectedLength: 2, + expectedType: common.TypeEmail, + }, + { + name: "Slack Configuration", + receiver: config.Receiver{ + SlackConfigs: []*config.SlackConfig{ + { + Channel: "#test", + APIURL: &config.SecretURL{ + URL: &url.URL{ + Scheme: "https", + Host: "api.slack.com", + Path: "/api/chat.postMessage", + }, + }, + }, + }, + }, + expectedLength: 1, + expectedType: common.TypeSlack, + }, + { + name: "Pagerduty Configuration", + receiver: config.Receiver{ + PagerdutyConfigs: []*config.PagerdutyConfig{ + { + ServiceKey: "test", + }, + }, + }, + expectedLength: 1, + expectedType: common.TypePagerDuty, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + contactPoints := client.generateGrafanaContactPoint(tc.receiver) + assert.Len(t, contactPoints, tc.expectedLength, "Incorrect number of contact points generated") + // Assert the type of contact point + if tc.expectedLength > 0 { + assert.Equal(t, tc.expectedType, contactPoints[0].Type, "Incorrect type of contact point") + // Add more assertions to check other fields like settings, name, etc. + } + }) + } +} diff --git a/testdata/cm.yml b/testdata/cm.yml index f8011ad..838441c 100644 --- a/testdata/cm.yml +++ b/testdata/cm.yml @@ -59,9 +59,9 @@ data: annotations: description: "The Splunk OpenTelemetry collector is failing to export spans with the following protocol {% raw %}{{ $labels.exporter }}{% endraw %}" causes: "Service is most likely unhealthy" - all_instances_down_otel_collector_yotams: | + all_instances_down_otel_collector: | alert: Opentelemetry_Collector_Downq - expr: sum(up{app="opentelemetry-collectord", job="kubernetes-pods"}) == 0 or absent(up{app="opentelemetry-collector", job="kubernetes-pods"}) > 0 + expr: sum(up{app="opentelemetry-collectordsd", job="kubernetes-pods"}) == 1 or absent(up{app="opentelemetry-collector", job="kubernetes-pods"}) > 0 for: 5m labels: team: "sre" @@ -71,7 +71,7 @@ data: causes: "Service is most likely down or fails healthchecks" all_instances_down_splunk_collectors: | alert: Splunk_Collector_Down - expr: sum(up{app="splunk-otel-collector", job="kubernetes-pods"}) == 0 or absent(up{app="splunk-otel-collector", job="kubernetes-pods"}) > 0 + expr: sum(up{app="splunk-otel-collectorsd", job="kubernetes-pods"}) == 1 or absent(up{app="splunk-otel-collector", job="kubernetes-pods"}) > 0 for: 5m labels: team: "sre" From 0c3f1812d9a2b014f8eea6f65a45a9cd5e5bf6f9 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 12:40:39 +0700 Subject: [PATCH 27/40] increase wait time --- controller/controller_e2e_rules_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/controller/controller_e2e_rules_test.go b/controller/controller_e2e_rules_test.go index 1f5e33c..6f33f2a 100644 --- a/controller/controller_e2e_rules_test.go +++ b/controller/controller_e2e_rules_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "github.com/logzio/prometheus-alerts-migrator/common" - "github.com/logzio/prometheus-alerts-migrator/pkg/signals" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -88,8 +87,6 @@ func TestControllerRulesE2E(t *testing.T) { } ctlConfig := common.NewConfig() kubeInformerFactory := informers.NewSharedInformerFactory(clientset, time.Second*30) - // set up signals so we handle the first shutdown signal gracefully - stopCh := signals.SetupSignalHandler() // Instantiate the controller ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), *ctlConfig) @@ -97,7 +94,7 @@ func TestControllerRulesE2E(t *testing.T) { defer cleanupLogzioAlerts(*ctrl) defer cleanupTestCluster(clientset, testNamespace, "opentelemetry-rules", "infrastructure-rules") - kubeInformerFactory.Start(stopCh) + //kubeInformerFactory.Start(stopCh) err = deployConfigMaps(clientset, "../testdata/cm.yml", "../testdata/cm2.yml") if err != nil { t.Fatalf("Failed to deploy ConfigMaps: %v", err) @@ -110,7 +107,7 @@ func TestControllerRulesE2E(t *testing.T) { } }() t.Log("going to sleep") - time.Sleep(time.Second * 5) + time.Sleep(time.Second * 10) folderUid, err := ctrl.logzioGrafanaAlertsClient.FindOrCreatePrometheusAlertsFolder() if err != nil { t.Fatalf("Failed to get logzio alerts folder uid: %v", err) @@ -119,6 +116,10 @@ func TestControllerRulesE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to get logzio alerts: %v", err) } + t.Log("logzio alert rules:") + for i, alert := range logzioAlerts { + t.Logf("%d: %v", i, alert.Title) + } assert.Equal(t, 14, len(logzioAlerts)) } From c76118a5ec36292d5218faec6749825b1a741642 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 12:41:25 +0700 Subject: [PATCH 28/40] Add contact points e2e test --- controller/controller_e2e_cp_test.go | 70 ++++++ testdata/alert_manager.yaml | 305 +++++++++++++++++++++++++++ 2 files changed, 375 insertions(+) create mode 100644 controller/controller_e2e_cp_test.go create mode 100644 testdata/alert_manager.yaml diff --git a/controller/controller_e2e_cp_test.go b/controller/controller_e2e_cp_test.go new file mode 100644 index 0000000..e321c6f --- /dev/null +++ b/controller/controller_e2e_cp_test.go @@ -0,0 +1,70 @@ +package controller + +import ( + "github.com/logzio/prometheus-alerts-migrator/common" + "github.com/logzio/prometheus-alerts-migrator/pkg/signals" + "github.com/stretchr/testify/assert" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "log" + "testing" + "time" +) + +var stopCh = signals.SetupSignalHandler() + +func cleanupLogzioContactPoints(ctl Controller) { + contactPoints, err := ctl.logzioGrafanaAlertsClient.GetLogzioManagedGrafanaContactPoints() + if err != nil { + log.Fatalf("Failed to get logzio contact points: %v", err) + } + ctl.logzioGrafanaAlertsClient.DeleteContactPoints(contactPoints) +} + +// TestControllerE2E is the main function that runs the end-to-end test +func TestControllerContactPointsE2E(t *testing.T) { + // Setup the test environment + config, err := common.GetConfig() + if err != nil { + t.Fatalf("Failed to get Kubernetes config: %v", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + t.Fatalf("Failed to create Kubernetes clientset: %v", err) + } + ctlConfig := common.NewConfig() + kubeInformerFactory := informers.NewSharedInformerFactory(clientset, time.Second*30) + // set up signals so we handle the first shutdown signal gracefully + // Instantiate the controller + ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), *ctlConfig) + + kubeInformerFactory.Start(stopCh) + + // test contact points + defer cleanupLogzioContactPoints(*ctrl) + defer cleanupTestCluster(clientset, testNamespace, "alert-manager") + err = deployConfigMaps(clientset, "../testdata/alert_manager.yaml") + if err != nil { + t.Fatalf("Failed to deploy ConfigMaps: %v", err) + } + go func() { + runErr := ctrl.Run(1, stopCh) + if runErr != nil { + t.Errorf("Failed to run controller: %v", runErr) + return + } + }() + t.Log("going to sleep") + time.Sleep(time.Second * 10) + logzioContactPoints, err := ctrl.logzioGrafanaAlertsClient.GetLogzioManagedGrafanaContactPoints() + t.Log("logzio contact points:") + for i, contactPoint := range logzioContactPoints { + t.Logf("%d: %v", i, contactPoint.Name) + } + if err != nil { + t.Fatalf("Failed to get logzio contact points: %v", err) + } + assert.Equal(t, 12, len(logzioContactPoints)) + +} diff --git a/testdata/alert_manager.yaml b/testdata/alert_manager.yaml new file mode 100644 index 0000000..bc46f9f --- /dev/null +++ b/testdata/alert_manager.yaml @@ -0,0 +1,305 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alert-manager + namespace: alert-migrator-test + labels: + app: prometheus + annotations: + prometheus.io/kube-alertmanager: "true" +data: + alert_manager: | + global: + resolve_timeout: 5m + http_config: + follow_redirects: true + enable_http2: true + smtp_from: alertmanager@logzio.com + smtp_hello: localhost + smtp_require_tls: false + slack_api_url: https://api.slack.com/ + pagerduty_url: https://events.pagerduty.com/v2/enqueue + opsgenie_api_url: https://api.opsgenie.com/ + wechat_api_url: https://qyapi.weixin.qq.com/cgi-bin/ + victorops_api_url: https://alert.victorops.com/integrations/generic/20131114/alert/ + telegram_api_url: https://api.telegram.org + webex_api_url: https://webexapis.com/v1/messages + route: + receiver: lost-alerts-slack + group_by: + - alertname + - hostname + - service + continue: false + routes: [] + receivers: + - name: lost-alerts-slack + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sre-lost-alerts' + username: AlertManagerd (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sre-lost-alertsto' + username: AlertManagers (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + + - name: ada-disaster-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-ada-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ada-major-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-ada-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ada-testlab-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-ada-lab-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: athlone-disaster-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-athlone-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ops-disaster-channels + pagerduty_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + service_key: + url: https://events.pagerduty.com/v2/enqueue + client: '{{ template "pagerduty.default.client" . }}' + client_url: '{{ template "pagerduty.default.clientURL" . }}' + description: '{{ template "pagerduty.default.description" .}}' + details: + firing: '{{ template "pagerduty.default.instances" .Alerts.Firing }}' + num_firing: '{{ .Alerts.Firing | len }}' + num_resolved: '{{ .Alerts.Resolved | len }}' + resolved: '{{ template "pagerduty.default.instances" .Alerts.Resolved }}' + source: '{{ template "pagerduty.default.client" . }}' + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sysops-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ops-major-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sysops-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ops-quarantine-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-ops-alerts-spam' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: ops-testlab-channelss + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sysops-alerts' + username: AlertManager (tooling-test222) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: kube-system-alerts + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-sysops-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + - name: optimus-disaster-channels + slack_configs: + - send_resolved: true + http_config: + follow_redirects: true + enable_http2: true + api_url: https://api.slack.com/ + channel: '#eng-optimus-alerts' + username: AlertManager (tooling-test) + color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}' + title: '{{ template "default.title" . }}' + title_link: '{{ template "slack.default.titlelink" . }}' + pretext: '{{ template "slack.default.pretext" . }}' + text: '{{ template "default.text" . }}' + short_fields: false + footer: '{{ template "slack.default.footer" . }}' + fallback: '{{ template "slack.default.fallback" . }}' + callback_id: '{{ template "slack.default.callbackid" . }}' + icon_emoji: '{{ template "slack.default.iconemoji" . }}' + icon_url: '{{ template "slack.default.iconurl" . }}' + link_names: false + templates: [] + From d4531d2e969ce12aa616690471debffa460a371a Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 12:48:27 +0700 Subject: [PATCH 29/40] verbose logs in `go test` --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f6761f1..cd7f1bd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,5 +43,5 @@ jobs: run: go mod download - name: Run tests - run: go test ./... -cover + run: go test -v ./... -cover From 85eba26d803d2410a32ac5b26e1192c263f6b358 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 13:03:15 +0700 Subject: [PATCH 30/40] Add coverage report --- .github/workflows/test.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cd7f1bd..402ea05 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,5 +43,11 @@ jobs: run: go mod download - name: Run tests - run: go test -v ./... -cover + run: go test -v ./... -coverprofile=coverage.out + + - name: Upload coverage profile + uses: actions/upload-artifact@v3 + with: + name: coverage-report + path: coverage.out From 604fd882a2fe89590562b0210e056f0c0e993ecf Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 13:08:54 +0700 Subject: [PATCH 31/40] Print coverage --- .github/workflows/test.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 402ea05..7d8b460 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -45,9 +45,7 @@ jobs: - name: Run tests run: go test -v ./... -coverprofile=coverage.out - - name: Upload coverage profile - uses: actions/upload-artifact@v3 - with: - name: coverage-report - path: coverage.out + - name: Extract coverage percentage + run: go tool cover -func=coverage.out | grep total | awk '{print $3}' + From e9689fd1068296b5a87e2c44b9f2067ed3830153 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 14:11:20 +0700 Subject: [PATCH 32/40] Add `ResetNotificationPolicyTree()` --- controller/controller.go | 2 +- logzio_alerts_client/logzio_alerts_client.go | 29 ++++++++++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 6808219..b3a9685 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -283,7 +283,7 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) c.processContactPoints(receiversMap, logzioContactPoints) // Handle the notification policies after contact points are processed, to prevent missing contact points at logzio - c.logzioGrafanaAlertsClient.SetNotificationPolicyTree(routeTree) + c.logzioGrafanaAlertsClient.SetNotificationPolicyTreeFromRouteTree(routeTree) return nil } diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index 12bf302..9502275 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -18,12 +18,13 @@ import ( ) const ( - refIdA = "A" - refIdB = "B" - expressionString = "__expr__" - queryType = "query" - alertFolder = "prometheus-alerts" - randomStringLength = 5 + refIdA = "A" + refIdB = "B" + expressionString = "__expr__" + queryType = "query" + alertFolder = "prometheus-alerts" + randomStringLength = 5 + grafanaDefaultReceiver = "default-email" ) // ReduceQueryModel represents a reduce query for time series data @@ -123,8 +124,20 @@ func NewLogzioGrafanaAlertsClient(logzioApiToken string, logzioApiUrl string, ru } } -// SetNotificationPolicyTree converts route tree to grafana notification policy tree and writes it to logz.io -func (l *LogzioGrafanaAlertsClient) SetNotificationPolicyTree(routeTree *alert_manager_config.Route) { +func (l *LogzioGrafanaAlertsClient) ResetNotificationPolicyTree() error { + defaultGrafanaNotificationPolicy := grafana_notification_policies.GrafanaNotificationPolicyTree{ + Receiver: grafanaDefaultReceiver, + Routes: []grafana_notification_policies.GrafanaNotificationPolicy{}, + } + err := l.logzioNotificationPolicyClient.SetupGrafanaNotificationPolicyTree(defaultGrafanaNotificationPolicy) + if err != nil { + return err + } + return nil +} + +// SetNotificationPolicyTreeFromRouteTree converts route tree to grafana notification policy tree and writes it to logz.io +func (l *LogzioGrafanaAlertsClient) SetNotificationPolicyTreeFromRouteTree(routeTree *alert_manager_config.Route) { // getting logzio contact points to ensure it exists for the notification policy tree logzioContactPoints, err := l.GetLogzioManagedGrafanaContactPoints() if err != nil { From 7ff7a8093b4081e3d413cadf86faf6a586e91cae Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 14:42:27 +0700 Subject: [PATCH 33/40] Add `controller_e2e_np_test.go` --- controller/controller_e2e_cp_test.go | 2 +- controller/controller_e2e_np_test.go | 72 +++++++++ controller/controller_e2e_rules_test.go | 1 - ...yaml => alert_manager_contact_points.yaml} | 0 .../alert_manager_notification_policies.yaml | 137 ++++++++++++++++++ 5 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 controller/controller_e2e_np_test.go rename testdata/{alert_manager.yaml => alert_manager_contact_points.yaml} (100%) create mode 100644 testdata/alert_manager_notification_policies.yaml diff --git a/controller/controller_e2e_cp_test.go b/controller/controller_e2e_cp_test.go index e321c6f..f1240e1 100644 --- a/controller/controller_e2e_cp_test.go +++ b/controller/controller_e2e_cp_test.go @@ -44,7 +44,7 @@ func TestControllerContactPointsE2E(t *testing.T) { // test contact points defer cleanupLogzioContactPoints(*ctrl) defer cleanupTestCluster(clientset, testNamespace, "alert-manager") - err = deployConfigMaps(clientset, "../testdata/alert_manager.yaml") + err = deployConfigMaps(clientset, "../testdata/alert_manager_contact_points.yaml") if err != nil { t.Fatalf("Failed to deploy ConfigMaps: %v", err) } diff --git a/controller/controller_e2e_np_test.go b/controller/controller_e2e_np_test.go new file mode 100644 index 0000000..a55b254 --- /dev/null +++ b/controller/controller_e2e_np_test.go @@ -0,0 +1,72 @@ +package controller + +import ( + "github.com/logzio/prometheus-alerts-migrator/common" + "github.com/stretchr/testify/assert" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + "testing" + "time" +) + +func cleanupLogzioNotificationPolicies(ctl Controller) { + err := ctl.logzioGrafanaAlertsClient.ResetNotificationPolicyTree() + if err != nil { + klog.Error(err) + } +} + +// TestControllerE2E is the main function that runs the end-to-end test +func TestControllerNotificationPoliciesE2E(t *testing.T) { + // Setup the test environment + config, err := common.GetConfig() + if err != nil { + t.Fatalf("Failed to get Kubernetes config: %v", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + t.Fatalf("Failed to create Kubernetes clientset: %v", err) + } + ctlConfig := common.NewConfig() + kubeInformerFactory := informers.NewSharedInformerFactory(clientset, time.Second*30) + // Instantiate the controller + ctrl := NewController(clientset, kubeInformerFactory.Core().V1().ConfigMaps(), *ctlConfig) + // cleanup before starting the test to start in a clean env + cleanupLogzioNotificationPolicies(*ctrl) + cleanupLogzioContactPoints(*ctrl) + // test contact points + defer cleanupLogzioNotificationPolicies(*ctrl) + defer cleanupLogzioContactPoints(*ctrl) + defer cleanupTestCluster(clientset, testNamespace, "alert-manager-np") + err = deployConfigMaps(clientset, "../testdata/alert_manager_notification_policies.yaml") + if err != nil { + t.Fatalf("Failed to deploy ConfigMaps: %v", err) + } + go func() { + runErr := ctrl.Run(1, stopCh) + if runErr != nil { + t.Errorf("Failed to run controller: %v", runErr) + return + } + }() + t.Log("going to sleep") + time.Sleep(time.Second * 10) + logzioContactPoints, err := ctrl.logzioGrafanaAlertsClient.GetLogzioManagedGrafanaContactPoints() + t.Log("logzio contact points:") + for i, contactPoint := range logzioContactPoints { + t.Logf("%d: %v", i, contactPoint.Name) + } + if err != nil { + t.Fatalf("Failed to get logzio contact points: %v", err) + } + assert.Equal(t, 8, len(logzioContactPoints)) + logzioNotificationPolicyTree, err := ctrl.logzioGrafanaAlertsClient.GetLogzioGrafanaNotificationPolicies() + assert.Equal(t, "my-env-alert-migrator-test-alert-manager-np-default-email", logzioNotificationPolicyTree.Receiver) + t.Log("logzio routes:") + for i, route := range logzioNotificationPolicyTree.Routes { + t.Logf("route %d: %v", i, route.Receiver) + } + assert.Equal(t, 7, len(logzioNotificationPolicyTree.Routes)) +} diff --git a/controller/controller_e2e_rules_test.go b/controller/controller_e2e_rules_test.go index 6f33f2a..6638bae 100644 --- a/controller/controller_e2e_rules_test.go +++ b/controller/controller_e2e_rules_test.go @@ -94,7 +94,6 @@ func TestControllerRulesE2E(t *testing.T) { defer cleanupLogzioAlerts(*ctrl) defer cleanupTestCluster(clientset, testNamespace, "opentelemetry-rules", "infrastructure-rules") - //kubeInformerFactory.Start(stopCh) err = deployConfigMaps(clientset, "../testdata/cm.yml", "../testdata/cm2.yml") if err != nil { t.Fatalf("Failed to deploy ConfigMaps: %v", err) diff --git a/testdata/alert_manager.yaml b/testdata/alert_manager_contact_points.yaml similarity index 100% rename from testdata/alert_manager.yaml rename to testdata/alert_manager_contact_points.yaml diff --git a/testdata/alert_manager_notification_policies.yaml b/testdata/alert_manager_notification_policies.yaml new file mode 100644 index 0000000..62f433b --- /dev/null +++ b/testdata/alert_manager_notification_policies.yaml @@ -0,0 +1,137 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alert-manager-np + namespace: alert-migrator-test + annotations: + prometheus.io/kube-alertmanager: "true" +data: + all_instances_down_otel_collector: | + global: + smtp_smarthost: 'smtp.example.com:587' + smtp_from: 'alertmanager@example.com' + smtp_auth_username: 'alertmanager' + smtp_auth_password: 'password' + route: + receiver: 'default-email' + group_by: ['alertname', 'env'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + routes: + - matchers: + - quarantine="true" + - team="ops" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'slack-quarantine-ops' + continue: true + + - matchers: + - severity="critical" + - team="dev" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'pagerduty-critical-dev' + continue: true + + - matchers: + - environment="staging" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'email-staging' + continue: true + + - matchers: + - environment="production" + - severity="warning" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'slack-prod-warning' + continue: true + + - matchers: + - team="network" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'slack-network-team' + continue: true + + - matchers: + - environment="qa" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'slack-qa-alerts' + continue: true + + - matchers: + - service="database" + group_by: + - alertname + - hostname + - instance + - device + - node + receiver: 'email-database-service' + continue: true + + receivers: + - name: 'default-email' + email_configs: + - to: 'alerts@example.com' + + - name: 'slack-quarantine-ops' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B00000000' + channel: '#quarantine-ops-alerts' + + - name: 'pagerduty-critical-dev' + pagerduty_configs: + - service_key: 'YOUR_PAGERDUTY_SERVICE_KEY' + + - name: 'email-staging' + email_configs: + - to: 'staging-alerts@example.com' + + - name: 'slack-prod-warning' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B11111111' + channel: '#prod-warning-alerts' + + - name: 'slack-network-team' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B22222222' + channel: '#network-team-alerts' + + - name: 'slack-qa-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/T00000000/B33333333' + channel: '#qa-alerts' + + - name: 'email-database-service' + email_configs: + - to: 'database-service-alerts@example.com' From 0f54c64143480418ac564506372abaa154ae1e52 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 14:57:00 +0700 Subject: [PATCH 34/40] cleanupLogzioContactPoints() after cleanupLogzioNotificationPolicies() --- controller/controller_e2e_cp_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/controller_e2e_cp_test.go b/controller/controller_e2e_cp_test.go index f1240e1..29025d0 100644 --- a/controller/controller_e2e_cp_test.go +++ b/controller/controller_e2e_cp_test.go @@ -65,6 +65,6 @@ func TestControllerContactPointsE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to get logzio contact points: %v", err) } - assert.Equal(t, 12, len(logzioContactPoints)) + assert.Equal(t, 13, len(logzioContactPoints)) } From 8ebc9e28fb075dcf1962172913838d788219f77f Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Tue, 19 Dec 2023 14:57:12 +0700 Subject: [PATCH 35/40] fix e2e --- controller/controller_e2e_np_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/controller_e2e_np_test.go b/controller/controller_e2e_np_test.go index a55b254..78fb51c 100644 --- a/controller/controller_e2e_np_test.go +++ b/controller/controller_e2e_np_test.go @@ -15,6 +15,7 @@ func cleanupLogzioNotificationPolicies(ctl Controller) { if err != nil { klog.Error(err) } + cleanupLogzioContactPoints(ctl) } // TestControllerE2E is the main function that runs the end-to-end test @@ -38,7 +39,6 @@ func TestControllerNotificationPoliciesE2E(t *testing.T) { cleanupLogzioContactPoints(*ctrl) // test contact points defer cleanupLogzioNotificationPolicies(*ctrl) - defer cleanupLogzioContactPoints(*ctrl) defer cleanupTestCluster(clientset, testNamespace, "alert-manager-np") err = deployConfigMaps(clientset, "../testdata/alert_manager_notification_policies.yaml") if err != nil { From 6312a094c4941c156379f6a326125d81cc3523b7 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Dec 2023 13:06:03 +0700 Subject: [PATCH 36/40] handling `match` operators (old prometheus) --- logzio_alerts_client/logzio_alerts_client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/logzio_alerts_client/logzio_alerts_client.go b/logzio_alerts_client/logzio_alerts_client.go index 9502275..ba2059c 100644 --- a/logzio_alerts_client/logzio_alerts_client.go +++ b/logzio_alerts_client/logzio_alerts_client.go @@ -217,6 +217,10 @@ func (l *LogzioGrafanaAlertsClient) generateGrafanaNotificationPolicy(route *ale grafanaObjMatchers = append(grafanaObjMatchers, grafana_notification_policies.MatcherObj{key, operator, value}) } } + // handling `match` operators although it's deprecated to support users with old prometheus versions + for key, value := range route.Match { + grafanaObjMatchers = append(grafanaObjMatchers, grafana_notification_policies.MatcherObj{key, "=", value}) + } notificationPolicy.ObjectMatchers = grafanaObjMatchers // repeat the process for nested policies for _, childRoute := range route.Routes { From 7997a993a1d6a505ba07d5dbe9ec56ab2e216962 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Dec 2023 13:06:16 +0700 Subject: [PATCH 37/40] logging --- controller/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/controller.go b/controller/controller.go index b3a9685..a1dfe90 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -315,12 +315,12 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( c.logzioGrafanaAlertsClient.AlertManagerGlobalConfig = alertManagerConfig.Global } } + klog.Infof("Found %d receivers and %d routes, in %s", len(receivers), len(routeTree.Routes), configmap.Name) return receivers, &routeTree } func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete := c.compareContactPoints(receiversMap, logzioContactPoints) - klog.Infof("Contact points summary: to add: %d, to update: %d, to delete: %d", len(contactPointsToAdd), len(contactPointsToUpdate), len(contactPointsToDelete)) if len(contactPointsToUpdate) > 0 { c.logzioGrafanaAlertsClient.UpdateContactPoints(contactPointsToUpdate, logzioContactPoints) } From 5d60f96038cdd981982a6e7f1c5ae07418fb1bb6 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Dec 2023 15:27:28 +0700 Subject: [PATCH 38/40] Fail when unable to load alm config --- controller/controller.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index a1dfe90..0ccaef0 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -273,7 +273,10 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) } // get receivers and routes from alert manager configmap - receivers, routeTree := c.getClusterReceiversAndRoutes(configmap) + receivers, routeTree, err := c.getClusterReceiversAndRoutes(configmap) + if err != nil { + return err + } // Creating maps for efficient lookups receiversMap := make(map[string]alert_manager_config.Receiver) for _, receiver := range receivers { @@ -288,7 +291,7 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) return nil } -func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ([]alert_manager_config.Receiver, *alert_manager_config.Route) { +func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ([]alert_manager_config.Receiver, *alert_manager_config.Route, error) { var receivers []alert_manager_config.Receiver var routeTree alert_manager_config.Route if c.isAlertManagerConfigMap(configmap) { @@ -296,7 +299,7 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( alertManagerConfig, err := alert_manager_config.Load(value) if err != nil { utilruntime.HandleError(fmt.Errorf("unable to load alert manager config; %s", err)) - return nil, &alert_manager_config.Route{} + return nil, &alert_manager_config.Route{}, err } // Add prefix to distinguish between alert manager imported from alert manager and logz.io custom contact points stub := common.CreateNameStub(configmap) @@ -316,7 +319,7 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( } } klog.Infof("Found %d receivers and %d routes, in %s", len(receivers), len(routeTree.Routes), configmap.Name) - return receivers, &routeTree + return receivers, &routeTree, nil } func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { From 8098e20b210e5f5499f9033daa565d29be8317a6 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Dec 2023 16:03:00 +0700 Subject: [PATCH 39/40] change contact points and notification policies logic --- controller/controller.go | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/controller/controller.go b/controller/controller.go index 0ccaef0..a7d77e6 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -283,11 +283,30 @@ func (c *Controller) processAlertManagerConfigMaps(configmap *corev1.ConfigMap) receiversMap[receiver.Name] = receiver } - c.processContactPoints(receiversMap, logzioContactPoints) + /* + Processing logic: + 1. compare contact points with logz.io managed contact points + 2. if contact point is not found at logz.io, add it + 3. if contact point is found at logz.io, update it + 4. handle setting new notification policy tree after contact points are processed, to prevent missing contact points at logzio + 5. delete contact points from logz.io that are not found in the alert manager configmap + Note: `name` field is the identifier for contact points, when a user changes the name of a contact point, it will delete the old one and create a new one, so we handle deletion after setting the new notification policy tree to avoid deleting contact points that are in use + */ + contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete := c.compareContactPoints(receiversMap, logzioContactPoints) + if len(contactPointsToUpdate) > 0 { + c.logzioGrafanaAlertsClient.UpdateContactPoints(contactPointsToUpdate, logzioContactPoints) + } + if len(contactPointsToAdd) > 0 { + c.logzioGrafanaAlertsClient.WriteContactPoints(contactPointsToAdd) + } // Handle the notification policies after contact points are processed, to prevent missing contact points at logzio c.logzioGrafanaAlertsClient.SetNotificationPolicyTreeFromRouteTree(routeTree) + if len(contactPointsToDelete) > 0 { + c.logzioGrafanaAlertsClient.DeleteContactPoints(contactPointsToDelete) + } + return nil } @@ -322,19 +341,6 @@ func (c *Controller) getClusterReceiversAndRoutes(configmap *corev1.ConfigMap) ( return receivers, &routeTree, nil } -func (c *Controller) processContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) { - contactPointsToAdd, contactPointsToUpdate, contactPointsToDelete := c.compareContactPoints(receiversMap, logzioContactPoints) - if len(contactPointsToUpdate) > 0 { - c.logzioGrafanaAlertsClient.UpdateContactPoints(contactPointsToUpdate, logzioContactPoints) - } - if len(contactPointsToAdd) > 0 { - c.logzioGrafanaAlertsClient.WriteContactPoints(contactPointsToAdd) - } - if len(contactPointsToDelete) > 0 { - c.logzioGrafanaAlertsClient.DeleteContactPoints(contactPointsToDelete) - } -} - // compareContactPoints func (c *Controller) compareContactPoints(receiversMap map[string]alert_manager_config.Receiver, logzioContactPoints []grafana_contact_points.GrafanaContactPoint) (contactPointsToAdd, contactPointsToUpdate []alert_manager_config.Receiver, contactPointsToDelete []grafana_contact_points.GrafanaContactPoint) { // Initialize a map with slices as values for Logz.io contact points From c10df41333dab879e73efa6e535287e7b7b55907 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Dec 2023 16:08:38 +0700 Subject: [PATCH 40/40] docs --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 41e24bd..f5bdea3 100644 --- a/README.md +++ b/README.md @@ -115,11 +115,11 @@ data: - to: 'alerts@example.com' - name: 'slack-production' slack_configs: - - api_url: 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX' + - api_url: 'https://hooks.slack.com/services/T00000000/B00000000/' channel: '#prod-alerts' - name: 'slack-staging' slack_configs: - - api_url: 'https://hooks.slack.com/services/T00000000/B11111111/YYYYYYYYYYYYYYYYYYYYYYYY' + - api_url: 'https://hooks.slack.com/services/T00000000/B11111111/' channel: '#staging-alerts' ```