diff --git a/cmd/collectors/ems/ems_test.go b/cmd/collectors/ems/ems_test.go index 513578114..783633c8d 100644 --- a/cmd/collectors/ems/ems_test.go +++ b/cmd/collectors/ems/ems_test.go @@ -17,8 +17,8 @@ import ( // Bookend EMS testing: Simulated bookend issuing ems "wafl.vvol.offline" and ems "hm.alert.raised" with alert_id value as "RaidLeftBehindAggrAlert" var issuingEmsNames = []string{"wafl.vvol.offline", "hm.alert.raised"} -// Default labels per ems is 5, "hm.alert.raised" ems has 10 labels and "wafl.vvol.offline" has 4 labels, total instance labels would be 24 -const expectedInstanceLabelCount = 24 +// Default labels per ems is 5, "hm.alert.raised" ems has 11 labels and "wafl.vvol.offline" has 4 labels, total instance labels would be 24 +const expectedInstanceLabelCount = 25 // Auto resolve EMS testing: Simulated bookend issuing ems "LUN.offline" and ems "monitor.fan.critical" var autoresolveEmsNames = []string{"LUN.offline", "monitor.fan.critical"} diff --git a/cmd/tools/generate/counter.go b/cmd/tools/generate/counter.go index c581dbb1e..421d5a06d 100644 --- a/cmd/tools/generate/counter.go +++ b/cmd/tools/generate/counter.go @@ -160,6 +160,7 @@ type Counter struct { Name string `yaml:"Name"` Description string `yaml:"Description"` APIs []MetricDef `yaml:"APIs"` + Labels []string `yaml:"Labels"` } func (c Counter) Header() string { @@ -209,15 +210,15 @@ func searchDescriptionSwagger(objName string, ontapCounterName string) string { } // processRestCounters parse rest and restperf templates -func processRestCounters(client *rest.Client) map[string]Counter { - restPerfCounters := visitRestTemplates("conf/restperf", client, func(path string, client *rest.Client) map[string]Counter { +func processRestCounters(dir string, client *rest.Client) map[string]Counter { + restPerfCounters := visitRestTemplates(filepath.Join(dir, "conf", "restperf"), client, func(path string, client *rest.Client) map[string]Counter { if _, ok := excludePerfTemplates[filepath.Base(path)]; ok { return nil } return processRestPerfCounters(path, client) }) - restCounters := visitRestTemplates("conf/rest", client, func(path string, client *rest.Client) map[string]Counter { // revive:disable-line:unused-parameter + restCounters := visitRestTemplates(filepath.Join(dir, "conf", "rest"), client, func(path string, client *rest.Client) map[string]Counter { // revive:disable-line:unused-parameter return processRestConfigCounters(path) }) @@ -228,11 +229,11 @@ func processRestCounters(client *rest.Client) map[string]Counter { } // processZapiCounters parse zapi and zapiperf templates -func processZapiCounters(client *zapi.Client) map[string]Counter { - zapiCounters := visitZapiTemplates("conf/zapi/cdot", client, func(path string, client *zapi.Client) map[string]Counter { // revive:disable-line:unused-parameter +func processZapiCounters(dir string, client *zapi.Client) map[string]Counter { + zapiCounters := visitZapiTemplates(filepath.Join(dir, "conf", "zapi", "cdot"), client, func(path string, client *zapi.Client) map[string]Counter { // revive:disable-line:unused-parameter return processZapiConfigCounters(path) }) - zapiPerfCounters := visitZapiTemplates("conf/zapiperf/cdot", client, func(path string, client *zapi.Client) map[string]Counter { + zapiPerfCounters := visitZapiTemplates(filepath.Join(dir, "conf", "zapiperf", "cdot"), client, func(path string, client *zapi.Client) map[string]Counter { if _, ok := excludePerfTemplates[filepath.Base(path)]; ok { return nil } @@ -302,6 +303,8 @@ func processRestConfigCounters(path string) map[string]Counter { var ( counters = make(map[string]Counter) ) + var metricLabels []string + var labels []string t, err := tree.ImportYaml(path) if t == nil || err != nil { fmt.Printf("Unable to import template file %s. File is invalid or empty err=%s\n", path, err) @@ -320,7 +323,11 @@ func processRestConfigCounters(path string) map[string]Counter { } if templateCounters != nil { - processCounters(templateCounters.GetAllChildContentS(), &model, path, model.Query, counters) + metricLabels, labels = getAllExportedLabels(t, templateCounters.GetAllChildContentS()) + processCounters(templateCounters.GetAllChildContentS(), &model, path, model.Query, counters, metricLabels) + // This is for object_labels metrics + harvestName := model.Object + "_" + "labels" + counters[harvestName] = Counter{Name: harvestName, Labels: labels} } endpoints := t.GetChildS("endpoints") @@ -332,7 +339,7 @@ func processRestConfigCounters(path string) map[string]Counter { query = line.GetContentS() } if line.GetNameS() == "counters" { - processCounters(line.GetAllChildContentS(), &model, path, query, counters) + processCounters(line.GetAllChildContentS(), &model, path, query, counters, metricLabels) } } } @@ -357,7 +364,7 @@ func processRestConfigCounters(path string) map[string]Counter { return counters } -func processCounters(counterContents []string, model *template2.Model, path, query string, counters map[string]Counter) { +func processCounters(counterContents []string, model *template2.Model, path, query string, counters map[string]Counter, metricLabels []string) { for _, c := range counterContents { if c == "" { continue @@ -380,6 +387,7 @@ func processCounters(counterContents []string, model *template2.Model, path, que ONTAPCounter: name, }, }, + Labels: metricLabels, } counters[harvestName] = co @@ -469,6 +477,7 @@ func processZAPIPerfCounters(path string, client *zapi.Client) map[string]Counte } } + metricLabels, _ := getAllExportedLabels(t, templateCounters.GetAllChildContentS()) for _, c := range templateCounters.GetAllChildContentS() { if c != "" { name, display, m, _ := util.ParseMetric(c) @@ -496,6 +505,7 @@ func processZAPIPerfCounters(path string, client *zapi.Client) map[string]Counte BaseCounter: zapiBaseCounterMap[name], }, }, + Labels: metricLabels, } counters[harvestName] = co @@ -557,6 +567,8 @@ func processZapiConfigCounters(path string) map[string]Counter { var ( counters = make(map[string]Counter) ) + var metricLabels []string + var labels []string t, err := tree.ImportYaml(path) if t == nil || err != nil { fmt.Printf("Unable to import template file %s. File is invalid or empty\n", path) @@ -577,7 +589,10 @@ func processZapiConfigCounters(path string) map[string]Counter { } zc := make(map[string]string) - + metricLabels, labels = getAllExportedLabels(t, templateCounters.GetAllChildContentS()) + // This is for object_labels metrics + harvestName := model.Object + "_" + "labels" + counters[harvestName] = Counter{Name: harvestName, Labels: labels} for _, c := range templateCounters.GetChildren() { parseZapiCounters(c, []string{}, model.Object, zc) } @@ -596,6 +611,7 @@ func processZapiConfigCounters(path string) map[string]Counter { ONTAPCounter: v, }, }, + Labels: metricLabels, } counters[k] = co @@ -875,6 +891,7 @@ func processRestPerfCounters(path string, client *rest.Client) map[string]Counte } counterMap := make(map[string]string) counterMapNoPrefix := make(map[string]string) + metricLabels, _ := getAllExportedLabels(t, templateCounters.GetAllChildContentS()) for _, c := range templateCounters.GetAllChildContentS() { if c != "" { name, display, m, _ := util.ParseMetric(c) @@ -934,6 +951,7 @@ func processRestPerfCounters(path string, client *rest.Client) map[string]Counte BaseCounter: r.Get("denominator.name").String(), }, }, + Labels: metricLabels, } counters[c.Name] = c @@ -994,8 +1012,8 @@ func addAggregatedCounter(c *Counter, metric plugin.DerivedMetric, withPrefix st } } -func processExternalCounters(counters map[string]Counter) map[string]Counter { - dat, err := os.ReadFile("cmd/tools/generate/counter.yaml") +func processExternalCounters(dir string, counters map[string]Counter) map[string]Counter { + dat, err := os.ReadFile(filepath.Join(dir, "cmd", "tools", "generate", "counter.yaml")) if err != nil { fmt.Printf("error while reading file %v", err) return nil @@ -1057,3 +1075,31 @@ func findAPI(apis []MetricDef, other MetricDef) []int { } return indices } + +func getAllExportedLabels(t *node.Node, counterContents []string) ([]string, []string) { + metricLabels := make([]string, 0) + labels := make([]string, 0) + if exportOptions := t.GetChildS("export_options"); exportOptions != nil { + if iAllLabels := exportOptions.GetChildS("include_all_labels"); iAllLabels != nil { + if iAllLabels.GetContentS() == "true" { + for _, c := range counterContents { + if c == "" { + continue + } + if _, display, m, _ := util.ParseMetric(c); m == "key" || m == "label" { + metricLabels = append(metricLabels, display) + } + } + return metricLabels, metricLabels + } + } + + if iKeys := exportOptions.GetChildS("instance_keys"); iKeys != nil { + metricLabels = append(metricLabels, iKeys.GetAllChildContentS()...) + } + if iLabels := exportOptions.GetChildS("instance_labels"); iLabels != nil { + labels = append(labels, iLabels.GetAllChildContentS()...) + } + } + return metricLabels, append(labels, metricLabels...) +} diff --git a/cmd/tools/generate/generate.go b/cmd/tools/generate/generate.go index 57335d413..670c44075 100644 --- a/cmd/tools/generate/generate.go +++ b/cmd/tools/generate/generate.go @@ -136,13 +136,13 @@ func doDockerCompose(cmd *cobra.Command, _ []string) { func doGenerateMetrics(cmd *cobra.Command, _ []string) { addRootOptions(cmd) - counters, cluster := generateMetrics() + counters, cluster := BuildMetrics("", "", opts.Poller) generateCounterTemplate(counters, cluster.Version) } func doDescription(cmd *cobra.Command, _ []string) { addRootOptions(cmd) - counters, _ := generateMetrics() + counters, _ := BuildMetrics("", "", opts.Poller) grafana.VisitDashboards( []string{"grafana/dashboards/cmode"}, func(path string, data []byte) { @@ -553,20 +553,26 @@ func writeAdminSystemd(configFp string) { println(color.Colorize("✓", color.Green) + " HTTP SD file: " + harvestAdminService + " created") } -func generateMetrics() (map[string]Counter, rest.Cluster) { +func BuildMetrics(dir, configPath, pollerName string) (map[string]Counter, rest.Cluster) { var ( - poller *conf.Poller - err error - restClient *rest.Client - zapiClient *zapi.Client + poller *conf.Poller + err error + restClient *rest.Client + zapiClient *zapi.Client + harvestYmlPath string ) - _, err = conf.LoadHarvestConfig(opts.configPath) + if opts.configPath != "" { + harvestYmlPath = filepath.Join(dir, opts.configPath) + } else { + harvestYmlPath = filepath.Join(dir, configPath) + } + _, err = conf.LoadHarvestConfig(harvestYmlPath) if err != nil { logErrAndExit(err) } - if poller, _, err = rest.GetPollerAndAddr(opts.Poller); err != nil { + if poller, _, err = rest.GetPollerAndAddr(pollerName); err != nil { logErrAndExit(err) } @@ -587,10 +593,10 @@ func generateMetrics() (map[string]Counter, rest.Cluster) { } swaggerBytes = readSwaggerJSON() - restCounters := processRestCounters(restClient) - zapiCounters := processZapiCounters(zapiClient) + restCounters := processRestCounters(dir, restClient) + zapiCounters := processZapiCounters(dir, zapiClient) counters := mergeCounters(restCounters, zapiCounters) - counters = processExternalCounters(counters) + counters = processExternalCounters(dir, counters) return counters, restClient.Cluster() } @@ -670,11 +676,11 @@ func init() { fFlags := fullCmd.PersistentFlags() flags := metricCmd.PersistentFlags() - flags.StringVarP(&opts.Poller, "poller", "p", "sar", "name of poller, e.g. 10.193.48.154") + flags.StringVarP(&opts.Poller, "poller", "p", "dc1", "name of poller, e.g. 10.193.48.154") _ = metricCmd.MarkPersistentFlagRequired("poller") flag := descCmd.PersistentFlags() - flag.StringVarP(&opts.Poller, "poller", "p", "sar", "name of poller, e.g. 10.193.48.154") + flag.StringVarP(&opts.Poller, "poller", "p", "dc1", "name of poller, e.g. 10.193.48.154") _ = descCmd.MarkPersistentFlagRequired("poller") dFlags.IntVarP(&opts.loglevel, "loglevel", "l", 2, diff --git a/conf/ems/9.6.0/ems.yaml b/conf/ems/9.6.0/ems.yaml index e2204c6e6..c8b1014d4 100644 --- a/conf/ems/9.6.0/ems.yaml +++ b/conf/ems/9.6.0/ems.yaml @@ -138,18 +138,21 @@ events: - name: Nblade.vscanVirusDetected exports: - - parameters.SID => sid - - parameters.clientIp => client_ip - - parameters.filePath => file_path - - parameters.object_type => object_type - - parameters.object_uuid => object_uuid - - parameters.vscanServerIp => vscan_server_ip - - parameters.vserverName => svm + - parameters.SID => sid + - parameters.clientIp => client_ip + - parameters.filePath => file_path + - parameters.object_type => object_type + - parameters.object_uuid => object_uuid + - parameters.vscanServerIp => vscan_server_ip + - parameters.vserverName => svm + - parameters.vscanEngineStatus => vscanEngineStatus + - parameters.vscanEngineResultString => vscanEngineResultString - name: arl.netra.ca.check.failed exports: - parameters.aggr_uuid => aggr_uuid - parameters.vol => volume + - parameters.reason => reason - name: arw.volume.state matches: @@ -241,6 +244,7 @@ events: - parameters.volUuid => volume_uuid - parameters.vserverName => svm - parameters.vserverUuid => svm_uuid + - parameters.subject => subject - name: callhome.battery.low exports: @@ -250,6 +254,9 @@ events: resolve_after: 672h - name: callhome.hainterconnect.down + exports: + - parameters.subject => subject + - parameters.reason => reason - name: callhome.sp.hbt.missed @@ -265,6 +272,7 @@ events: - parameters.object_uuid => object_uuid - parameters.operation => operation - parameters.shareName => share + - parameters.errMsg => errMsg - name: cloud.aws.iamNotInitialized exports: @@ -278,9 +286,12 @@ events: - name: disk.outOfService exports: - - parameters.diskName => disk - - parameters.powerOnHours => power_on_hours - - parameters.serialno => serial_no + - parameters.diskName => disk + - parameters.powerOnHours => power_on_hours + - parameters.serialno => serial_no + - parameters.reason => reason + - parameters.glistEntries => glistEntries + - parameters.disk_information => disk_information - name: diskShelf.psu.added exports: @@ -312,6 +323,7 @@ events: exports: - parameters.aggr_uuid => aggr_uuid - parameters.vol => volume + - parameters.reason => reason - name: hm.alert.raised matches: @@ -328,6 +340,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -347,6 +360,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -366,6 +380,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -385,6 +400,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -404,6 +420,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -423,6 +440,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -442,6 +460,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -461,6 +480,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -480,6 +500,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -499,6 +520,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -518,6 +540,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -537,6 +560,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -556,6 +580,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -575,6 +600,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -594,6 +620,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -613,6 +640,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -632,6 +660,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -651,6 +680,7 @@ events: - parameters.possible_effect => possible_effect - parameters.suppress => suppress - parameters.suppressor => suppressor + - parameters.detailed_info => detailed_info resolve_when_ems: - name: hm.alert.cleared resolve_after: 672h @@ -712,6 +742,7 @@ events: exports: - ^^parameters.node_uuid => node_uuid - parameters.configname => config + - parameters.reason => reason resolve_when_ems: - name: object.store.available resolve_after: 672h @@ -769,11 +800,17 @@ events: exports: - ^^parameters.prodChannel => prod_channel - parameters.fanNumber => fan_number + - parameters.typeText => typeText + - parameters.errorMsg => errorMsg + - parameters.errorText => errorText + - parameters.locationText => locationText resolve_when_ems: - name: ses.status.fanInfo resolve_after: 672h - name: sk.panic + exports: + - parameters.reason => reason - name: sm.mediator.added exports: @@ -861,6 +898,7 @@ events: - ^^parameters.relationship_id => relationship_id - parameters.dstpath => dst_path - parameters.srcpath => src_path + - parameters.error_msg => error_msg resolve_when_ems: - name: sms.status.in.sync resolve_after: 672h @@ -870,6 +908,7 @@ events: - parameters.cg_relationship_id => cg_relationship_id - parameters.dstcgpath => dst_cg_path - parameters.srccgpath => src_cg_path + - parameters.error_msg => error_msg - name: sp.ipmi.lost.shutdown exports: diff --git a/container/prometheus/alert_rules.yml b/container/prometheus/alert_rules.yml index d4bba70f5..4285a404f 100644 --- a/container/prometheus/alert_rules.yml +++ b/container/prometheus/alert_rules.yml @@ -6,7 +6,7 @@ groups: # Alert for any instance that is unreachable for >5 minutes. - alert: InstanceDown - expr: up == 0 + expr: up{} == 0 for: 5m labels: severity: "critical" @@ -16,7 +16,7 @@ groups: # Alert for any instance that has a volume used percentage > 90% - alert: Volume Used Percentage Breach - expr: volume_size_used_percent > 90 + expr: volume_size_used_percent{} > 90 for: 5m labels: severity: "critical" @@ -58,7 +58,7 @@ groups: # Alert for node nfs latency - alert: Node nfs latency is high - expr: node_nfs_latency > 5000 + expr: node_nfs_latency{} > 5000 for: 5m labels: severity: "critical" @@ -68,7 +68,7 @@ groups: # Snapmirror lag time is high - alert: Snapmirror lag time is high - expr: snapmirror_lag_time > 3600 + expr: snapmirror_lag_time{} > 3600 for: 1m labels: severity: "critical" @@ -105,7 +105,7 @@ groups: # Certificates expiring within 1 month - alert: Certificates expiring within 1 month - expr: 0 < (security_certificate_expiry_time{} - time()) < (30*24*3600) + expr: 0 < ((security_certificate_expiry_time{} * on (uuid) group_left (name, expiry_time) security_certificate_labels{}) - time()) < (30*24*3600) for: 1m labels: severity: "warning" @@ -115,7 +115,7 @@ groups: # Certificates expired - alert: Certificates expired - expr: (security_certificate_expiry_time{} - time()) < 0 + expr: ((security_certificate_expiry_time{} * on (uuid) group_left (name, expiry_time) security_certificate_labels{}) - time()) < 0 labels: severity: "critical" annotations: diff --git a/integration/test/alert_rule_test.go b/integration/test/alert_rule_test.go new file mode 100644 index 000000000..2b41c4106 --- /dev/null +++ b/integration/test/alert_rule_test.go @@ -0,0 +1,195 @@ +package main + +import ( + "fmt" + "github.com/Netapp/harvest-automation/test/utils" + "github.com/netapp/harvest/v2/cmd/tools/generate" + "github.com/netapp/harvest/v2/pkg/tree" + "github.com/netapp/harvest/v2/pkg/tree/node" + "github.com/netapp/harvest/v2/pkg/util" + "path/filepath" + "regexp" + "slices" + "strings" + "testing" +) + +type AlertRule struct { + name string + exprs []string + labels []string +} + +var labelRegex = regexp.MustCompile(`\{{.*?}}`) +var pluginGeneratedMetric = map[string][]string{ + "change_log": {"svm", "state", "type", "anti_ransomware_state", "object", "node", "location", "healthy", "volume", "style", "aggr", "status"}, +} +var exceptionMetrics = []string{ + "up", + "node_nfs_latency", // need to check why it was skipped in restperf +} + +func TestAlertRules(t *testing.T) { + utils.SkipIfMissing(t, utils.Regression) + metrics, _ := generate.BuildMetrics("../..", "integration/test/harvest.yml", "dc1") + for pluginMetric, pluginLabels := range pluginGeneratedMetric { + metrics[pluginMetric] = generate.Counter{Name: pluginMetric, Labels: pluginLabels} + } + + alertRules := GetAllAlertRules("../../container/prometheus/", "alert_rules.yml", false) + for _, alertRule := range alertRules { + for _, label := range alertRule.labels { + found := false + for _, expr := range alertRule.exprs { + if !slices.Contains(exceptionMetrics, expr) { + metricCounters := metrics[expr] + if slices.Contains(metricCounters.Labels, label) { + found = true + break + } + } else { + found = true + } + } + if !found { + t.Errorf("%s is not available in %s metric", label, alertRule.exprs) + } + } + } +} + +func TestEmsAlertRules(t *testing.T) { + utils.SkipIfMissing(t, utils.Regression) + templateEmsLabels := getEmsLabels("../../conf/ems/9.6.0/ems.yaml") + emsAlertRules := GetAllAlertRules("../../container/prometheus/", "ems_alert_rules.yml", true) + for _, alertRule := range emsAlertRules { + for _, ems := range alertRule.exprs { + emsLabels := templateEmsLabels[ems] + for _, label := range alertRule.labels { + if !slices.Contains(emsLabels, label) { + t.Errorf("%s is not available in %s ems", label, ems) + } + } + } + } +} + +func GetAllAlertRules(dir string, fileName string, isEms bool) []AlertRule { + alertRules := make([]AlertRule, 0) + alertNames := make([]string, 0) + exprList := make([]string, 0) + summaryList := make([]string, 0) + alertRulesFilePath := filepath.Join(dir, fileName) + data, err := tree.ImportYaml(alertRulesFilePath) + if err != nil { + panic(err) + } + + for _, v := range data.GetChildS("groups").GetChildren() { + if v.GetNameS() == "rules" { + for _, a := range v.GetChildren() { + switch a.GetNameS() { + case "alert": + alertname := a.GetContentS() + alertNames = append(alertNames, alertname) + case "expr": + alertexp := a.GetContentS() + exprList = append(exprList, alertexp) + case "annotations": + alertSummary := a.GetChildS("summary") + summaryList = append(summaryList, alertSummary.GetContentS()) + } + } + } + } + + for i := range alertNames { + alertRules = append(alertRules, AlertRule{name: alertNames[i], exprs: getAllExpressions(exprList[i], isEms), labels: getAllLabels(summaryList[i])}) + } + return alertRules +} + +func getAllExpressions(expression string, isEms bool) []string { + filtered := make([]string, 0) + var all []string + if isEms { + all = FindEms(expression, "{", "}") + } else { + all = FindStringBetweenTwoChar(expression, "{", "(") + } + for _, counter := range all { + if counter == "" { + continue + } + filtered = append(filtered, counter) + } + return filtered +} + +func FindEms(stringValue string, startChar string, endChar string) []string { + var emsSlice = make([]string, 0) + var firstSet, counterArray []string + if firstSet = strings.Split(stringValue, startChar); len(firstSet) < 2 { + return emsSlice + } + actualString := strings.TrimSpace(firstSet[1]) + if counterArray = strings.Split(actualString, endChar); len(counterArray) < 2 { + return emsSlice + } + ems := strings.TrimSpace(counterArray[0]) + if ems != "" { + counterArray = strings.Split(ems, "=") + emsSlice = append(emsSlice, strings.ReplaceAll(counterArray[1], "\"", "")) + } + return emsSlice +} + +func getAllLabels(summary string) []string { + var labels []string + labelSlice := labelRegex.FindAllString(summary, -1) + for _, label := range labelSlice { + label = strings.Trim(label, "{") + label = strings.Trim(label, "}") + label = strings.TrimSpace(label) + if strings.HasPrefix(label, "$labels") { + labels = append(labels, strings.Split(label, ".")[1]) + } + } + return labels +} + +func getEmsLabels(path string) map[string][]string { + var ( + emsLabels = make(map[string][]string) + ) + emsNames := make([]string, 0) + labels := make([]string, 0) + data, err := tree.ImportYaml(path) + if data == nil || err != nil { + fmt.Printf("Unable to import template file %s. File is invalid or empty err=%s\n", path, err) + return nil + } + + for _, e := range data.GetChildS("events").GetChildren() { + emsNames = append(emsNames, e.GetChildContentS("name")) + labels = append(labels, parseEmsLabels(e.GetChildS("exports"))) + } + + for i := range emsNames { + emsLabels[emsNames[i]] = strings.Split(labels[i], ",") + } + return emsLabels +} + +func parseEmsLabels(exports *node.Node) string { + var labels []string + if exports != nil { + for _, export := range exports.GetAllChildContentS() { + name, _, _, _ := util.ParseMetric(export) + if strings.HasPrefix(name, "parameters") { + labels = append(labels, strings.Split(name, ".")[1]) + } + } + } + return strings.Join(labels, ",") +} diff --git a/integration/test/dashboard_json_test.go b/integration/test/dashboard_json_test.go index 39a1c32c3..a5582a974 100644 --- a/integration/test/dashboard_json_test.go +++ b/integration/test/dashboard_json_test.go @@ -24,6 +24,8 @@ const ( var restDataCollectors = []string{"Rest"} +var isStringAlphabetic = regexp.MustCompile(`^[a-zA-Z0-9_]*$`).MatchString + var fileSet []string // zapiCounterMap are additional counters, above and beyond the ones from counterMap, which should be excluded from Zapi @@ -398,11 +400,12 @@ func GetAllJsons(dir string) []string { func FindStringBetweenTwoChar(stringValue string, startChar string, endChar string) []string { var counters = make([]string, 0) - var isStringAlphabetic = regexp.MustCompile(`^[a-zA-Z0-9_]*$`).MatchString firstSet := strings.Split(stringValue, startChar) for _, actualString := range firstSet { counterArray := strings.Split(actualString, endChar) switch { + case strings.Contains(actualString, ")"): // check for inner expression such as top: + counterArray = strings.Split(actualString, ")") case strings.Contains(actualString, "+"): // check for inner expression such as top: counterArray = strings.Split(actualString, "+") case strings.Contains(actualString, "/"): // check for inner expression such as top: @@ -410,7 +413,6 @@ func FindStringBetweenTwoChar(stringValue string, startChar string, endChar stri case strings.Contains(actualString, ","): // check for inner expression such as top: counterArray = strings.Split(actualString, ",") } - counter := strings.TrimSpace(counterArray[len(counterArray)-1]) counterArray = strings.Split(counter, endChar) counter = strings.TrimSpace(counterArray[len(counterArray)-1])