Skip to content

Commit

Permalink
provide a way to check what the invalid metrics are matching
Browse files Browse the repository at this point in the history
Signed-off-by: Augustin Husson <[email protected]>
  • Loading branch information
Nexucis committed Nov 18, 2024
1 parent c6f3147 commit 1703b60
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 12 deletions.
107 changes: 98 additions & 9 deletions database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,25 @@ package database

import (
"encoding/json"
"fmt"
"os"
"regexp"
"strings"
"sync"
"time"

"github.com/perses/metrics-usage/config"
v1 "github.com/perses/metrics-usage/pkg/api/v1"
"github.com/perses/perses/pkg/model/api/v1/common"
"github.com/sirupsen/logrus"
)

var replaceVariableRegexp = regexp.MustCompile(`\$\{[a-zA-Z0-9_:]+}`)

type Database interface {
GetMetric(name string) *v1.Metric
ListMetrics() map[string]*v1.Metric
ListInvalidMetrics() map[string]*v1.Metric
ListInvalidMetrics() map[string]*v1.InvalidMetrics
ListPendingUsage() map[string]*v1.MetricUsage
EnqueueMetricList(metrics []string)
EnqueueInvalidMetricsUsage(usages map[string]*v1.MetricUsage)
Expand All @@ -38,7 +44,7 @@ type Database interface {
func New(cfg config.Database) Database {
d := &db{
metrics: make(map[string]*v1.Metric),
invalidMetrics: make(map[string]*v1.Metric),
invalidMetrics: make(map[string]*v1.InvalidMetrics),
usage: make(map[string]*v1.MetricUsage),
usageQueue: make(chan map[string]*v1.MetricUsage, 250),
invalidMetricsUsageQueue: make(chan map[string]*v1.MetricUsage, 250),
Expand Down Expand Up @@ -66,7 +72,7 @@ type db struct {
// This struct is our "database".
metrics map[string]*v1.Metric
// invalidMetrics is the list of metric name that likely contains a variable or a regexp and as such cannot be a valid metric name.
invalidMetrics map[string]*v1.Metric
invalidMetrics map[string]*v1.InvalidMetrics
// usage is a buffer in case the metric name has not yet been collected
usage map[string]*v1.MetricUsage
// metricsQueue is the channel that should be used to send and receive the list of metric name to keep in memory.
Expand Down Expand Up @@ -110,7 +116,7 @@ func (d *db) ListMetrics() map[string]*v1.Metric {
return d.metrics
}

func (d *db) ListInvalidMetrics() map[string]*v1.Metric {
func (d *db) ListInvalidMetrics() map[string]*v1.InvalidMetrics {
d.invalidMetricsUsageMutex.Lock()
defer d.invalidMetricsUsageMutex.Unlock()
return d.invalidMetrics
Expand Down Expand Up @@ -147,6 +153,7 @@ func (d *db) watchMetricsQueue() {
d.metrics[metricName] = &v1.Metric{
Labels: make(v1.Set[string]),
}
d.matchValidMetric(metricName)
// Since it's a new metric, potentially we already have a usage stored in the buffer.
if usage, usageExists := d.usage[metricName]; usageExists {
// TODO at some point we need to erase the usage map because it will cause a memory leak
Expand All @@ -164,8 +171,11 @@ func (d *db) watchInvalidMetricsUsageQueue() {
d.invalidMetricsUsageMutex.Lock()
for metricName, usage := range data {
if _, ok := d.invalidMetrics[metricName]; !ok {
d.invalidMetrics[metricName] = &v1.Metric{
Usage: usage,
re, matchingMetrics := d.matchInvalidMetric(metricName)
d.invalidMetrics[metricName] = &v1.InvalidMetrics{
Usage: usage,
MatchingMetrics: matchingMetrics,
MatchingRegexp: re,
}
} else {
d.invalidMetrics[metricName].Usage = mergeUsage(d.invalidMetrics[metricName].Usage, usage)
Expand Down Expand Up @@ -247,15 +257,94 @@ func (d *db) readMetricsInJSONFile() error {
return json.Unmarshal(data, &d.metrics)
}

func (d *db) matchInvalidMetric(invalidMetric string) (*common.Regexp, v1.Set[string]) {
re, err := generateRegexp(invalidMetric)
if err != nil {
logrus.WithError(err).Errorf("unable to compile the invalid metric name %q into a regexp", invalidMetric)
return nil, nil
}
if re == nil {
return nil, nil
}
result := v1.NewSet[string]()
d.metricsMutex.Lock()
defer d.metricsMutex.Unlock()
for m := range d.metrics {
if re.MatchString(m) {
result.Add(m)
}
}
return re, result
}

func (d *db) matchValidMetric(validMetric string) {
d.invalidMetricsUsageMutex.Lock()
defer d.invalidMetricsUsageMutex.Unlock()
for metricName, invalidMetric := range d.invalidMetrics {
re := invalidMetric.MatchingRegexp
if re == nil {
var err error
re, err = generateRegexp(metricName)
if err != nil {
logrus.WithError(err).Errorf("unable to compile the invalid metric name %q into a regexp", metricName)
continue
}
invalidMetric.MatchingRegexp = re
if re == nil {
continue
}
}
if re.MatchString(validMetric) {
matchingMetrics := invalidMetric.MatchingMetrics
if matchingMetrics == nil {
matchingMetrics = v1.NewSet[string]()
invalidMetric.MatchingMetrics = matchingMetrics
}
matchingMetrics.Add(validMetric)
}
}
}

func mergeUsage(old, new *v1.MetricUsage) *v1.MetricUsage {
if old == nil {
return new
}
if new == nil {
return old
}
old.Dashboards.Merge(new.Dashboards)
old.AlertRules.Merge(new.AlertRules)
old.RecordingRules.Merge(new.RecordingRules)
v1.MergeSet(old.Dashboards, new.Dashboards)
v1.MergeSet(old.AlertRules, new.AlertRules)
v1.MergeSet(old.RecordingRules, new.RecordingRules)
return old
}

// GenerateRegexp is taking an invalid metric name,
// will replace every variable by a pattern and then returning a regepx if the final string is not just equal to .*.
func generateRegexp(invalidMetricName string) (*common.Regexp, error) {
// The first step is to replace every variable by a single special char.
// We are using a special single char because it will be easier to find if these chars are continuous
// or if there are other characters in between.
s := replaceVariableRegexp.ReplaceAllString(invalidMetricName, "#")
s = strings.ReplaceAll(s, ".+", "#")
s = strings.ReplaceAll(s, ".*", "#")
if s == "#" || len(s) == 0 {
// This means the metric name is just a variable and as such can match all metric.
// So it's basically impossible to know what this invalid metric name is covering/matching.
return nil, nil
}
// The next step is to contact every continuous special char '#' to a single one.
compileString := fmt.Sprintf("%c", s[0])
expr := []rune(s)
for i := 1; i < len(expr); i++ {
if expr[i-1] == '#' && expr[i-1] == expr[i] {
continue
}
compileString += string(expr[i])
}
if compileString == "#" {
return nil, nil
}
compileString = strings.ReplaceAll(compileString, "#", ".+")
re, err := common.NewRegexp(fmt.Sprintf("^%s$", compileString))
return &re, err
}
63 changes: 63 additions & 0 deletions database/database_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2024 The Perses Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package database

import (
"regexp"
"testing"

"github.com/stretchr/testify/assert"
)

func TestGenerateRegexp(t *testing.T) {
tests := []struct {
title string
invalidMetric string
result *regexp.Regexp
}{
{
title: "metric equal to a variable",
invalidMetric: "${metric}",
result: nil,
},
{
title: "metric with variable a suffix",
invalidMetric: "otelcol_exporter_enqueue_failed_log_records${suffix}",
result: regexp.MustCompile(`otelcol_exporter_enqueue_failed_log_records.+`),
},
{
title: "metric with multiple variable 1",
invalidMetric: "${foo}${bar}${john}${doe}",
result: nil,
},
{
title: "metric with multiple variable 2",
invalidMetric: "prefix_${foo}${bar}:collection_${collection}_suffix:${john}${doe}",
result: regexp.MustCompile(`prefix_.+:collection_.+_suffix:.+`),
},
{
title: "metric no variable",
invalidMetric: "otelcol_receiver_.+",
result: regexp.MustCompile(`otelcol_receiver_.+`),
},
}

for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
re, err := generateRegexp(test.invalidMetric)
assert.NoError(t, err)
assert.Equal(t, test.result, re)
})
}
}
6 changes: 5 additions & 1 deletion pkg/analyze/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (p *parser) parse(expr string) modelAPIV1.Set[string] {
p.currentMetric += string(char)
continue
}
if char == '(' || char == ')' || char == '"' || char == '=' || char == '!' || char == ',' {
if isInvalidMetricChar(char) {
// then it was not a metric name and we need to drop it
p.currentMetric = ""
continue
Expand Down Expand Up @@ -83,3 +83,7 @@ func isWhitespace(ch rune) bool {
func isValidMetricChar(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' || ch == ':'
}

func isInvalidMetricChar(ch rune) bool {
return ch == '(' || ch == ')' || ch == '"' || ch == '=' || ch == '!' || ch == ',' || ch == '-' || ch == '+' || ch == '/' || ch == '~'
}
10 changes: 9 additions & 1 deletion pkg/analyze/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package parser

import (
"slices"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -40,11 +41,18 @@ func TestExtractMetricNameWithVariable(t *testing.T) {
expr: "rate(tomcat_requestprocessor_received_bytes{$onlyAddsExporter,phase=~\"$phase\",instance=~\"$instance\"}[5m])",
result: []string{"tomcat_requestprocessor_received_bytes"},
},
{
title: "complex query",
expr: "sum by (wow,${grouping:csv}) (label_replace( region_appinstance_witcher_schooltype:ninja_sarutobi_response_time_nanoseconds:rate2m_wow{prometheus=~\"ninja\", region=~\"$region\", app_instance=~\"$app_instance\", stack=~\"$stack\", witcher=~\"$witcher\"} / 1000000 / region_appinstance_witcher_schooltype:ninja_sarutobi_response_event_total:rate2m_wow{prometheus=~\"ninja\", region=~\"$region\", app_instance=~\"$app_instance\", stack=~\"$stack\", witcher=~\"$witcher\"}, \"wow\", \"wow\", \"\",\"\")) $wow true",
result: []string{"region_appinstance_witcher_schooltype:ninja_sarutobi_response_event_total:rate2m_wow", "region_appinstance_witcher_schooltype:ninja_sarutobi_response_time_nanoseconds:rate2m_wow"},
},
}
for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
result := ExtractMetricNameWithVariable(test.expr)
assert.Equal(t, test.result, result.TransformAsSlice())
r := result.TransformAsSlice()
slices.Sort(r)
assert.Equal(t, test.result, r)
})
}
}
23 changes: 22 additions & 1 deletion pkg/api/v1/metric_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

package v1

import "encoding/json"
import (
"encoding/json"

"github.com/perses/perses/pkg/model/api/v1/common"
)

type Set[T comparable] map[T]struct{}

Expand All @@ -25,6 +29,17 @@ func NewSet[T comparable](vals ...T) Set[T] {
return s
}

func MergeSet[T comparable](old, new Set[T]) Set[T] {
if new == nil {
return old
}
if old == nil {
return new
}
old.Merge(new)
return old
}

func (s Set[T]) Add(vals ...T) {
for _, v := range vals {
s[v] = struct{}{}
Expand Down Expand Up @@ -100,3 +115,9 @@ type Metric struct {
Labels Set[string] `json:"labels,omitempty"`
Usage *MetricUsage `json:"usage,omitempty"`
}

type InvalidMetrics struct {
Usage *MetricUsage `json:"usage,omitempty"`
MatchingMetrics Set[string] `json:"matchingMetrics,omitempty"`
MatchingRegexp *common.Regexp `json:"matchingRegexp,omitempty"`
}
1 change: 1 addition & 0 deletions source/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (c *metricCollector) Execute(ctx context.Context, _ context.CancelFunc) err
}
// Finally, send the metric collected to the database; db will take care to store these data properly
if len(result) > 0 {
logrus.Infof("saving %d metrics", len(result))
c.db.EnqueueMetricList(result)
}
return nil
Expand Down

0 comments on commit 1703b60

Please sign in to comment.