Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature that can disable traffic generator metrics. #807

Merged
merged 3 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tools/traffic/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,6 @@ run2: build2
TRAFFIC_GENERATOR_NUM_WORKERS=4 \
TRAFFIC_GENERATOR_CHAIN_RPC=http://localhost:8545 \
TRAFFIC_GENERATOR_NUM_RETRIES=2 \
TRAFFIC_GENERATOR_METRICS_FUZZY_BLACKLIST=_returned_chunk,recombination_success \
TRAFFIC_GENERATOR_METRICS_BLACKLIST=get_status_CONFIRMED \
./bin/server2
3 changes: 3 additions & 0 deletions tools/traffic/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ func NewConfig(ctx *cli.Context) (*Config, error) {
EigenDAServiceManager: retrieverConfig.EigenDAServiceManagerAddr,
SignerPrivateKey: ctx.String(SignerPrivateKeyFlag.Name),
CustomQuorums: customQuorumsUint8,

MetricsBlacklist: ctx.StringSlice(MetricsBlacklistFlag.Name),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need both flags?
could we just use MetricsFuzzyBlacklist? not sure what use cases there are where we want exact matching vs. fuzzy matching (fuzzy matching can cover all use cases we need for exact matching)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fuzzy matching is needed to handle metric labels that contain DA node IDs. Exact matching is needed to disable metrics where one metric is a substring of another. For example, there are two metrics labeled read and read_success. Without exact matching, we couldn't disable the read metric without also disabling the read_success metric.

The only straight forward way to avoid having two lists would be to interpret each entry as a regex. In your opinion, would you prefer to have a single list of regex blacklist arguments, or the current pattern?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. That makes sense. I think the current pattern is simpler in that case

MetricsFuzzyBlacklist: ctx.StringSlice(MetricsFuzzyBlacklistFlag.Name),
},
}

Expand Down
16 changes: 16 additions & 0 deletions tools/traffic/config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@ var (
EnvVar: common.PrefixEnvVar(envPrefix, "INSTANCE_LAUNCH_INTERVAL"),
}

MetricsBlacklistFlag = cli.StringSliceFlag{
Name: common.PrefixFlag(FlagPrefix, "metrics-blacklist"),
Usage: "Any metric with a label exactly matching this string will not be sent to the metrics server.",
Required: false,
EnvVar: common.PrefixEnvVar(envPrefix, "METRICS_BLACKLIST"),
}

MetricsFuzzyBlacklistFlag = cli.StringSliceFlag{
Name: common.PrefixFlag(FlagPrefix, "metrics-fuzzy-blacklist"),
Usage: "Any metric that contains any string in this list will not be sent to the metrics server.",
Required: false,
EnvVar: common.PrefixEnvVar(envPrefix, "METRICS_FUZZY_BLACKLIST"),
}

/* Configuration for the blob writer. */

NumWriteInstancesFlag = cli.UintFlag{
Expand Down Expand Up @@ -238,6 +252,8 @@ var optionalFlags = []cli.Flag{
GetBlobStatusTimeoutFlag,
WriteTimeoutFlag,
VerificationChannelCapacityFlag,
MetricsBlacklistFlag,
MetricsFuzzyBlacklistFlag,
}

// Flags contains the list of configuration options available to the binary.
Expand Down
10 changes: 9 additions & 1 deletion tools/traffic/config/worker_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package config

import "time"

// Config configures the traffic generator workers.
// WorkerConfig configures the traffic generator workers.
type WorkerConfig struct {
// The number of worker threads that generate write traffic.
NumWriteInstances uint
Expand Down Expand Up @@ -42,4 +42,12 @@ type WorkerConfig struct {
SignerPrivateKey string
// Custom quorum numbers to use for the traffic generator.
CustomQuorums []uint8

// Any metric with a label exactly matching one of the strings in this list will not be sent to the metrics server.
MetricsBlacklist []string

// Any metric that contains any string in this list will not be sent to the metrics server. For example,
// including the string "_returned_chunk" will cause all metrics in the form of
// "operator_fb390a64122db3957fb220c3c42d5f71e97ab0c995da4e1e5cc3261602dac527_returned_chunk" to be omitted.
MetricsFuzzyBlacklist []string
}
6 changes: 5 additions & 1 deletion tools/traffic/generator_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ func NewTrafficGeneratorV2(config *config.Config) (*Generator, error) {
ctx, cancel := context.WithCancel(context.Background())
waitGroup := sync.WaitGroup{}

generatorMetrics := metrics.NewMetrics(config.MetricsHTTPPort, logger)
generatorMetrics := metrics.NewMetrics(
config.MetricsHTTPPort,
logger,
config.WorkerConfig.MetricsBlacklist,
config.WorkerConfig.MetricsFuzzyBlacklist)

blobTable := table.NewBlobStore()

Expand Down
7 changes: 6 additions & 1 deletion tools/traffic/metrics/count_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@ type CountMetric interface {
type countMetric struct {
metrics *metrics
description string
// disabled specifies whether the metrics should behave as a no-op
disabled bool
}

// Increment increments the count of a type of event.
func (metric *countMetric) Increment() {
if metric.disabled {
return
}
metric.metrics.count.WithLabelValues(metric.description).Inc()
}

// NewCountMetric creates a new prometheus collector for counting metrics.
// buildCounterCollector creates a new prometheus collector for counting metrics.
func buildCounterCollector(namespace string, registry *prometheus.Registry) *prometheus.CounterVec {
return promauto.With(registry).NewCounterVec(
prometheus.CounterOpts{
Expand Down
7 changes: 6 additions & 1 deletion tools/traffic/metrics/gauge_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@ type GaugeMetric interface {
type gaugeMetric struct {
metrics *metrics
description string
// disabled specifies whether the metrics should behave as a no-op
disabled bool
}

// Set sets the value of a gauge metric.
func (metric *gaugeMetric) Set(value float64) {
if metric.disabled {
return
}
metric.metrics.gauge.WithLabelValues(metric.description).Set(value)
}

// NewGaugeMetric creates a collector for gauge metrics.
// buildGaugeCollector creates a collector for gauge metrics.
func buildGaugeCollector(namespace string, registry *prometheus.Registry) *prometheus.GaugeVec {
return promauto.With(registry).NewGaugeVec(
prometheus.GaugeOpts{
Expand Down
7 changes: 6 additions & 1 deletion tools/traffic/metrics/latency_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@ type LatencyMetric interface {
type latencyMetric struct {
metrics *metrics
description string
// disabled specifies whether the metrics should behave as a no-op
disabled bool
}

// ReportLatency reports the latency of an operation.
func (metric *latencyMetric) ReportLatency(latency time.Duration) {
if metric.disabled {
return
}
metric.metrics.latency.WithLabelValues(metric.description).Observe(latency.Seconds())
}

// NewLatencyMetric creates a new prometheus collector for latency metrics.
// buildLatencyCollector creates a new prometheus collector for latency metrics.
func buildLatencyCollector(namespace string, registry *prometheus.Registry) *prometheus.SummaryVec {
return promauto.With(registry).NewSummaryVec(
prometheus.SummaryOpts{
Expand Down
50 changes: 43 additions & 7 deletions tools/traffic/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
"strings"
)

// Metrics allows the creation of metrics for the traffic generator.
Expand All @@ -31,22 +32,39 @@ type metrics struct {

httpPort string
logger logging.Logger

metricsBlacklist []string
metricsFuzzyBlacklist []string
}

// NewMetrics creates a new Metrics instance.
func NewMetrics(httpPort string, logger logging.Logger) Metrics {
func NewMetrics(
httpPort string,
logger logging.Logger,
metricsBlacklist []string,
metricsFuzzyBlacklist []string) Metrics {

namespace := "eigenda_generator"
reg := prometheus.NewRegistry()
reg.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
reg.MustRegister(collectors.NewGoCollector())

if metricsBlacklist == nil {
metricsBlacklist = []string{}
}
if metricsFuzzyBlacklist == nil {
metricsFuzzyBlacklist = []string{}
}
Comment on lines +52 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I don't think these are needed


metrics := &metrics{
count: buildCounterCollector(namespace, reg),
latency: buildLatencyCollector(namespace, reg),
gauge: buildGaugeCollector(namespace, reg),
registry: reg,
httpPort: httpPort,
logger: logger.With("component", "GeneratorMetrics"),
count: buildCounterCollector(namespace, reg),
latency: buildLatencyCollector(namespace, reg),
gauge: buildGaugeCollector(namespace, reg),
registry: reg,
httpPort: httpPort,
logger: logger.With("component", "GeneratorMetrics"),
metricsBlacklist: metricsBlacklist,
metricsFuzzyBlacklist: metricsFuzzyBlacklist,
}
return metrics
}
Expand All @@ -71,6 +89,7 @@ func (metrics *metrics) NewLatencyMetric(description string) LatencyMetric {
return &latencyMetric{
metrics: metrics,
description: description,
disabled: metrics.isBlacklisted(description),
}
}

Expand All @@ -79,6 +98,7 @@ func (metrics *metrics) NewCountMetric(description string) CountMetric {
return &countMetric{
metrics: metrics,
description: description,
disabled: metrics.isBlacklisted(description),
}
}

Expand All @@ -87,5 +107,21 @@ func (metrics *metrics) NewGaugeMetric(description string) GaugeMetric {
return &gaugeMetric{
metrics: metrics,
description: description,
disabled: metrics.isBlacklisted(description),
}
}

// isBlacklisted returns true if the metric name is blacklisted.
func (metrics *metrics) isBlacklisted(metricName string) bool {
for _, blacklisted := range metrics.metricsBlacklist {
if metricName == blacklisted {
return true
}
}
for _, blacklisted := range metrics.metricsFuzzyBlacklist {
if strings.Contains(metricName, blacklisted) {
return true
}
}
return false
}
Loading