provide a way to check what the invalid metrics are matching

Signed-off-by: Augustin Husson <[email protected]>
perses · Nov 18, 2024 · 1703b60 · 1703b60
1 parent c6f3147
commit 1703b60
Show file tree

Hide file tree

Showing 6 changed files with 198 additions and 12 deletions.
diff --git a/database/database.go b/database/database.go
@@ -15,19 +15,25 @@ package database
 
 import (
 	"encoding/json"
+	"fmt"
 	"os"
+	"regexp"
+	"strings"
 	"sync"
 	"time"
 
 	"github.com/perses/metrics-usage/config"
 	v1 "github.com/perses/metrics-usage/pkg/api/v1"
+	"github.com/perses/perses/pkg/model/api/v1/common"
 	"github.com/sirupsen/logrus"
 )
 
+var replaceVariableRegexp = regexp.MustCompile(`\$\{[a-zA-Z0-9_:]+}`)
+
 type Database interface {
 	GetMetric(name string) *v1.Metric
 	ListMetrics() map[string]*v1.Metric
-	ListInvalidMetrics() map[string]*v1.Metric
+	ListInvalidMetrics() map[string]*v1.InvalidMetrics
 	ListPendingUsage() map[string]*v1.MetricUsage
 	EnqueueMetricList(metrics []string)
 	EnqueueInvalidMetricsUsage(usages map[string]*v1.MetricUsage)
@@ -38,7 +44,7 @@ type Database interface {
 func New(cfg config.Database) Database {
 	d := &db{
 		metrics:                  make(map[string]*v1.Metric),
-		invalidMetrics:           make(map[string]*v1.Metric),
+		invalidMetrics:           make(map[string]*v1.InvalidMetrics),
 		usage:                    make(map[string]*v1.MetricUsage),
 		usageQueue:               make(chan map[string]*v1.MetricUsage, 250),
 		invalidMetricsUsageQueue: make(chan map[string]*v1.MetricUsage, 250),
@@ -66,7 +72,7 @@ type db struct {
 	// This struct is our "database".
 	metrics map[string]*v1.Metric
 	// invalidMetrics is the list of metric name that likely contains a variable or a regexp and as such cannot be a valid metric name.
-	invalidMetrics map[string]*v1.Metric
+	invalidMetrics map[string]*v1.InvalidMetrics
 	// usage is a buffer in case the metric name has not yet been collected
 	usage map[string]*v1.MetricUsage
 	// metricsQueue is the channel that should be used to send and receive the list of metric name to keep in memory.
@@ -110,7 +116,7 @@ func (d *db) ListMetrics() map[string]*v1.Metric {
 	return d.metrics
 }
 
-func (d *db) ListInvalidMetrics() map[string]*v1.Metric {
+func (d *db) ListInvalidMetrics() map[string]*v1.InvalidMetrics {
 	d.invalidMetricsUsageMutex.Lock()
 	defer d.invalidMetricsUsageMutex.Unlock()
 	return d.invalidMetrics
@@ -147,6 +153,7 @@ func (d *db) watchMetricsQueue() {
 				d.metrics[metricName] = &v1.Metric{
 					Labels: make(v1.Set[string]),
 				}
+				d.matchValidMetric(metricName)
 				// Since it's a new metric, potentially we already have a usage stored in the buffer.
 				if usage, usageExists := d.usage[metricName]; usageExists {
 					// TODO at some point we need to erase the usage map because it will cause a memory leak
@@ -164,8 +171,11 @@ func (d *db) watchInvalidMetricsUsageQueue() {
 		d.invalidMetricsUsageMutex.Lock()
 		for metricName, usage := range data {
 			if _, ok := d.invalidMetrics[metricName]; !ok {
-				d.invalidMetrics[metricName] = &v1.Metric{
-					Usage: usage,
+				re, matchingMetrics := d.matchInvalidMetric(metricName)
+				d.invalidMetrics[metricName] = &v1.InvalidMetrics{
+					Usage:           usage,
+					MatchingMetrics: matchingMetrics,
+					MatchingRegexp:  re,
 				}
 			} else {
 				d.invalidMetrics[metricName].Usage = mergeUsage(d.invalidMetrics[metricName].Usage, usage)
@@ -247,15 +257,94 @@ func (d *db) readMetricsInJSONFile() error {
 	return json.Unmarshal(data, &d.metrics)
 }
 
+func (d *db) matchInvalidMetric(invalidMetric string) (*common.Regexp, v1.Set[string]) {
+	re, err := generateRegexp(invalidMetric)
+	if err != nil {
+		logrus.WithError(err).Errorf("unable to compile the invalid metric name %q into a regexp", invalidMetric)
+		return nil, nil
+	}
+	if re == nil {
+		return nil, nil
+	}
+	result := v1.NewSet[string]()
+	d.metricsMutex.Lock()
+	defer d.metricsMutex.Unlock()
+	for m := range d.metrics {
+		if re.MatchString(m) {
+			result.Add(m)
+		}
+	}
+	return re, result
+}
+
+func (d *db) matchValidMetric(validMetric string) {
+	d.invalidMetricsUsageMutex.Lock()
+	defer d.invalidMetricsUsageMutex.Unlock()
+	for metricName, invalidMetric := range d.invalidMetrics {
+		re := invalidMetric.MatchingRegexp
+		if re == nil {
+			var err error
+			re, err = generateRegexp(metricName)
+			if err != nil {
+				logrus.WithError(err).Errorf("unable to compile the invalid metric name %q into a regexp", metricName)
+				continue
+			}
+			invalidMetric.MatchingRegexp = re
+			if re == nil {
+				continue
+			}
+		}
+		if re.MatchString(validMetric) {
+			matchingMetrics := invalidMetric.MatchingMetrics
+			if matchingMetrics == nil {
+				matchingMetrics = v1.NewSet[string]()
+				invalidMetric.MatchingMetrics = matchingMetrics
+			}
+			matchingMetrics.Add(validMetric)
+		}
+	}
+}
+
 func mergeUsage(old, new *v1.MetricUsage) *v1.MetricUsage {
 	if old == nil {
 		return new
 	}
 	if new == nil {
 		return old
 	}
-	old.Dashboards.Merge(new.Dashboards)
-	old.AlertRules.Merge(new.AlertRules)
-	old.RecordingRules.Merge(new.RecordingRules)
+	v1.MergeSet(old.Dashboards, new.Dashboards)
+	v1.MergeSet(old.AlertRules, new.AlertRules)
+	v1.MergeSet(old.RecordingRules, new.RecordingRules)
 	return old
 }
+
+// GenerateRegexp is taking an invalid metric name,
+// will replace every variable by a pattern and then returning a regepx if the final string is not just equal to .*.
+func generateRegexp(invalidMetricName string) (*common.Regexp, error) {
+	// The first step is to replace every variable by a single special char.
+	// We are using a special single char because it will be easier to find if these chars are continuous
+	// or if there are other characters in between.
+	s := replaceVariableRegexp.ReplaceAllString(invalidMetricName, "#")
+	s = strings.ReplaceAll(s, ".+", "#")
+	s = strings.ReplaceAll(s, ".*", "#")
+	if s == "#" || len(s) == 0 {
+		// This means the metric name is just a variable and as such can match all metric.
+		// So it's basically impossible to know what this invalid metric name is covering/matching.
+		return nil, nil
+	}
+	// The next step is to contact every continuous special char '#' to a single one.
+	compileString := fmt.Sprintf("%c", s[0])
+	expr := []rune(s)
+	for i := 1; i < len(expr); i++ {
+		if expr[i-1] == '#' && expr[i-1] == expr[i] {
+			continue
+		}
+		compileString += string(expr[i])
+	}
+	if compileString == "#" {
+		return nil, nil
+	}
+	compileString = strings.ReplaceAll(compileString, "#", ".+")
+	re, err := common.NewRegexp(fmt.Sprintf("^%s$", compileString))
+	return &re, err
+}
diff --git a/database/database_test.go b/database/database_test.go
@@ -0,0 +1,63 @@
+// Copyright 2024 The Perses Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package database
+
+import (
+	"regexp"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGenerateRegexp(t *testing.T) {
+	tests := []struct {
+		title         string
+		invalidMetric string
+		result        *regexp.Regexp
+	}{
+		{
+			title:         "metric equal to a variable",
+			invalidMetric: "${metric}",
+			result:        nil,
+		},
+		{
+			title:         "metric with variable a suffix",
+			invalidMetric: "otelcol_exporter_enqueue_failed_log_records${suffix}",
+			result:        regexp.MustCompile(`otelcol_exporter_enqueue_failed_log_records.+`),
+		},
+		{
+			title:         "metric with multiple variable 1",
+			invalidMetric: "${foo}${bar}${john}${doe}",
+			result:        nil,
+		},
+		{
+			title:         "metric with multiple variable 2",
+			invalidMetric: "prefix_${foo}${bar}:collection_${collection}_suffix:${john}${doe}",
+			result:        regexp.MustCompile(`prefix_.+:collection_.+_suffix:.+`),
+		},
+		{
+			title:         "metric no variable",
+			invalidMetric: "otelcol_receiver_.+",
+			result:        regexp.MustCompile(`otelcol_receiver_.+`),
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.title, func(t *testing.T) {
+			re, err := generateRegexp(test.invalidMetric)
+			assert.NoError(t, err)
+			assert.Equal(t, test.result, re)
+		})
+	}
+}
diff --git a/pkg/analyze/parser/parser.go b/pkg/analyze/parser/parser.go
@@ -39,7 +39,7 @@ func (p *parser) parse(expr string) modelAPIV1.Set[string] {
 			p.currentMetric += string(char)
 			continue
 		}
-		if char == '(' || char == ')' || char == '"' || char == '=' || char == '!' || char == ',' {
+		if isInvalidMetricChar(char) {
 			// then it was not a metric name and we need to drop it
 			p.currentMetric = ""
 			continue
@@ -83,3 +83,7 @@ func isWhitespace(ch rune) bool {
 func isValidMetricChar(ch rune) bool {
 	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' || ch == ':'
 }
+
+func isInvalidMetricChar(ch rune) bool {
+	return ch == '(' || ch == ')' || ch == '"' || ch == '=' || ch == '!' || ch == ',' || ch == '-' || ch == '+' || ch == '/' || ch == '~'
+}
diff --git a/pkg/analyze/parser/parser_test.go b/pkg/analyze/parser/parser_test.go
@@ -14,6 +14,7 @@
 package parser
 
 import (
+	"slices"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -40,11 +41,18 @@ func TestExtractMetricNameWithVariable(t *testing.T) {
 			expr:   "rate(tomcat_requestprocessor_received_bytes{$onlyAddsExporter,phase=~\"$phase\",instance=~\"$instance\"}[5m])",
 			result: []string{"tomcat_requestprocessor_received_bytes"},
 		},
+		{
+			title:  "complex query",
+			expr:   "sum by (wow,${grouping:csv}) (label_replace( region_appinstance_witcher_schooltype:ninja_sarutobi_response_time_nanoseconds:rate2m_wow{prometheus=~\"ninja\", region=~\"$region\", app_instance=~\"$app_instance\", stack=~\"$stack\", witcher=~\"$witcher\"} / 1000000 / region_appinstance_witcher_schooltype:ninja_sarutobi_response_event_total:rate2m_wow{prometheus=~\"ninja\", region=~\"$region\", app_instance=~\"$app_instance\", stack=~\"$stack\", witcher=~\"$witcher\"}, \"wow\", \"wow\", \"\",\"\")) $wow true",
+			result: []string{"region_appinstance_witcher_schooltype:ninja_sarutobi_response_event_total:rate2m_wow", "region_appinstance_witcher_schooltype:ninja_sarutobi_response_time_nanoseconds:rate2m_wow"},
+		},
 	}
 	for _, test := range tests {
 		t.Run(test.title, func(t *testing.T) {
 			result := ExtractMetricNameWithVariable(test.expr)
-			assert.Equal(t, test.result, result.TransformAsSlice())
+			r := result.TransformAsSlice()
+			slices.Sort(r)
+			assert.Equal(t, test.result, r)
 		})
 	}
 }
diff --git a/pkg/api/v1/metric_usage.go b/pkg/api/v1/metric_usage.go
@@ -13,7 +13,11 @@
 
 package v1
 
-import "encoding/json"
+import (
+	"encoding/json"
+
+	"github.com/perses/perses/pkg/model/api/v1/common"
+)
 
 type Set[T comparable] map[T]struct{}
 
@@ -25,6 +29,17 @@ func NewSet[T comparable](vals ...T) Set[T] {
 	return s
 }
 
+func MergeSet[T comparable](old, new Set[T]) Set[T] {
+	if new == nil {
+		return old
+	}
+	if old == nil {
+		return new
+	}
+	old.Merge(new)
+	return old
+}
+
 func (s Set[T]) Add(vals ...T) {
 	for _, v := range vals {
 		s[v] = struct{}{}
@@ -100,3 +115,9 @@ type Metric struct {
 	Labels Set[string]  `json:"labels,omitempty"`
 	Usage  *MetricUsage `json:"usage,omitempty"`
 }
+
+type InvalidMetrics struct {
+	Usage           *MetricUsage   `json:"usage,omitempty"`
+	MatchingMetrics Set[string]    `json:"matchingMetrics,omitempty"`
+	MatchingRegexp  *common.Regexp `json:"matchingRegexp,omitempty"`
+}
diff --git a/source/metric/metric.go b/source/metric/metric.go
@@ -61,6 +61,7 @@ func (c *metricCollector) Execute(ctx context.Context, _ context.CancelFunc) err
 	}
 	// Finally, send the metric collected to the database; db will take care to store these data properly
 	if len(result) > 0 {
+		logrus.Infof("saving %d metrics", len(result))
 		c.db.EnqueueMetricList(result)
 	}
 	return nil