From b0624f0ec7841f71abb578ffeeb2b71896297d7b Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 13 Dec 2024 15:07:05 +0000 Subject: [PATCH 1/5] OTLP: add CLI flag for 'quiet zero' So that we can update all ingesters before enabling this in distributors. --- pkg/api/api.go | 2 +- pkg/distributor/distributor.go | 4 ++++ pkg/distributor/otel.go | 6 ++++-- pkg/distributor/otel_test.go | 14 +++++++------- pkg/distributor/push_test.go | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index e2f6da5735..eeab16cd67 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -266,7 +266,7 @@ func (a *API) RegisterDistributor(d *distributor.Distributor, pushConfig distrib ), true, false, "POST") a.RegisterRoute(OTLPPushEndpoint, distributor.OTLPHandler( pushConfig.MaxOTLPRequestSize, d.RequestBufferPool, a.sourceIPs, limits, pushConfig.OTelResourceAttributePromotionConfig, - pushConfig.RetryConfig, d.PushWithMiddlewares, d.PushMetrics, reg, a.logger, + pushConfig.RetryConfig, pushConfig.EnableStartTimeQuietZero, d.PushWithMiddlewares, d.PushMetrics, reg, a.logger, ), true, false, "POST") a.indexPage.AddLinks(defaultWeight, "Distributor", []IndexPageLink{ diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 7bf589f7bc..a6874b4371 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -249,6 +249,9 @@ type Config struct { // OTelResourceAttributePromotionConfig allows for specializing OTel resource attribute promotion. OTelResourceAttributePromotionConfig OTelResourceAttributePromotionConfig `yaml:"-"` + + // Change the implementation of Otel startTime from a real zero to a special NaN value. + EnableStartTimeQuietZero bool `yaml:"start_time_quiet_zero" category:"advanced"` } // PushWrapper wraps around a push. It is similar to middleware.Interface. @@ -267,6 +270,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { f.DurationVar(&cfg.RemoteTimeout, "distributor.remote-timeout", 2*time.Second, "Timeout for downstream ingesters.") f.BoolVar(&cfg.WriteRequestsBufferPoolingEnabled, "distributor.write-requests-buffer-pooling-enabled", true, "Enable pooling of buffers used for marshaling write requests.") f.IntVar(&cfg.ReusableIngesterPushWorkers, "distributor.reusable-ingester-push-workers", 2000, "Number of pre-allocated workers used to forward push requests to the ingesters. If 0, no workers will be used and a new goroutine will be spawned for each ingester push request. If not enough workers available, new goroutine will be spawned. (Note: this is a performance optimization, not a limiting feature.)") + f.BoolVar(&cfg.EnableStartTimeQuietZero, "distributor.otel-start-time-quiet-zero", false, "Change the implementation of Otel startTime from a real zero to a special NaN value.") cfg.DefaultLimits.RegisterFlags(f) } diff --git a/pkg/distributor/otel.go b/pkg/distributor/otel.go index 79f02bb1bb..a6788b9136 100644 --- a/pkg/distributor/otel.go +++ b/pkg/distributor/otel.go @@ -62,6 +62,7 @@ func OTLPHandler( limits OTLPHandlerLimits, resourceAttributePromotionConfig OTelResourceAttributePromotionConfig, retryCfg RetryConfig, + enableStartTimeQuietZero bool, push PushFunc, pushMetrics *PushMetrics, reg prometheus.Registerer, @@ -181,7 +182,7 @@ func OTLPHandler( pushMetrics.ObserveUncompressedBodySize(tenantID, float64(uncompressedBodySize)) var metrics []mimirpb.PreallocTimeseries - metrics, err = otelMetricsToTimeseries(ctx, tenantID, addSuffixes, enableCTZeroIngestion, promoteResourceAttributes, keepIdentifyingResourceAttributes, discardedDueToOtelParseError, spanLogger, otlpReq.Metrics()) + metrics, err = otelMetricsToTimeseries(ctx, tenantID, addSuffixes, enableCTZeroIngestion, enableStartTimeQuietZero, promoteResourceAttributes, keepIdentifyingResourceAttributes, discardedDueToOtelParseError, spanLogger, otlpReq.Metrics()) if err != nil { return err } @@ -410,11 +411,12 @@ func otelMetricsToMetadata(addSuffixes bool, md pmetric.Metrics) []*mimirpb.Metr return metadata } -func otelMetricsToTimeseries(ctx context.Context, tenantID string, addSuffixes, enableCTZeroIngestion bool, promoteResourceAttributes []string, keepIdentifyingResourceAttributes bool, discardedDueToOtelParseError *prometheus.CounterVec, logger log.Logger, md pmetric.Metrics) ([]mimirpb.PreallocTimeseries, error) { +func otelMetricsToTimeseries(ctx context.Context, tenantID string, addSuffixes, enableCTZeroIngestion, enableStartTimeQuietZero bool, promoteResourceAttributes []string, keepIdentifyingResourceAttributes bool, discardedDueToOtelParseError *prometheus.CounterVec, logger log.Logger, md pmetric.Metrics) ([]mimirpb.PreallocTimeseries, error) { converter := otlp.NewMimirConverter() _, errs := converter.FromMetrics(ctx, md, otlp.Settings{ AddMetricSuffixes: addSuffixes, EnableCreatedTimestampZeroIngestion: enableCTZeroIngestion, + EnableStartTimeQuietZero: enableStartTimeQuietZero, PromoteResourceAttributes: promoteResourceAttributes, KeepIdentifyingResourceAttributes: keepIdentifyingResourceAttributes, }, utillog.SlogFromGoKit(logger)) diff --git a/pkg/distributor/otel_test.go b/pkg/distributor/otel_test.go index c381660a76..e976789412 100644 --- a/pkg/distributor/otel_test.go +++ b/pkg/distributor/otel_test.go @@ -283,7 +283,7 @@ func TestOTelMetricsToTimeSeries(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { mimirTS, err := otelMetricsToTimeseries( - context.Background(), tenantID, true, false, tc.promoteResourceAttributes, tc.keepIdentifyingResourceAttributes, discardedDueToOTelParseError, log.NewNopLogger(), md, + context.Background(), tenantID, true, false, false, tc.promoteResourceAttributes, tc.keepIdentifyingResourceAttributes, discardedDueToOTelParseError, log.NewNopLogger(), md, ) require.NoError(t, err) require.Len(t, mimirTS, 2) @@ -351,7 +351,7 @@ func BenchmarkOTLPHandler(b *testing.B) { validation.NewMockTenantLimits(map[string]*validation.Limits{}), ) require.NoError(b, err) - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, pushFunc, nil, nil, log.NewNopLogger()) + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, pushFunc, nil, nil, log.NewNopLogger()) b.Run("protobuf", func(b *testing.B) { req := createOTLPProtoRequest(b, exportReq, "") @@ -750,7 +750,7 @@ func TestHandlerOTLPPush(t *testing.T) { logs := &concurrency.SyncBuffer{} retryConfig := RetryConfig{Enabled: true, MinBackoff: 5 * time.Second, MaxBackoff: 5 * time.Second} - handler := OTLPHandler(tt.maxMsgSize, nil, nil, limits, tt.resourceAttributePromotionConfig, retryConfig, pusher, nil, nil, level.NewFilter(log.NewLogfmtLogger(logs), level.AllowInfo())) + handler := OTLPHandler(tt.maxMsgSize, nil, nil, limits, tt.resourceAttributePromotionConfig, retryConfig, false, pusher, nil, nil, level.NewFilter(log.NewLogfmtLogger(logs), level.AllowInfo())) resp := httptest.NewRecorder() handler.ServeHTTP(resp, req) @@ -823,7 +823,7 @@ func TestHandler_otlpDroppedMetricsPanic(t *testing.T) { req := createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp := httptest.NewRecorder() - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() assert.NoError(t, err) assert.Len(t, request.Timeseries, 3) @@ -869,7 +869,7 @@ func TestHandler_otlpDroppedMetricsPanic2(t *testing.T) { req := createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp := httptest.NewRecorder() - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() t.Cleanup(pushReq.CleanUp) require.NoError(t, err) @@ -895,7 +895,7 @@ func TestHandler_otlpDroppedMetricsPanic2(t *testing.T) { req = createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp = httptest.NewRecorder() - handler = OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler = OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() t.Cleanup(pushReq.CleanUp) require.NoError(t, err) @@ -923,7 +923,7 @@ func TestHandler_otlpWriteRequestTooBigWithCompression(t *testing.T) { resp := httptest.NewRecorder() - handler := OTLPHandler(140, nil, nil, nil, nil, RetryConfig{}, readBodyPushFunc(t), nil, nil, log.NewNopLogger()) + handler := OTLPHandler(140, nil, nil, nil, nil, RetryConfig{}, false, readBodyPushFunc(t), nil, nil, log.NewNopLogger()) handler.ServeHTTP(resp, req) assert.Equal(t, http.StatusRequestEntityTooLarge, resp.Code) body, err := io.ReadAll(resp.Body) diff --git a/pkg/distributor/push_test.go b/pkg/distributor/push_test.go index 58e4704b2b..5d23c23d35 100644 --- a/pkg/distributor/push_test.go +++ b/pkg/distributor/push_test.go @@ -1183,7 +1183,7 @@ func TestOTLPPushHandlerErrorsAreReportedCorrectlyViaHttpgrpc(t *testing.T) { return nil } - h := OTLPHandler(200, util.NewBufferPool(0), nil, otlpLimitsMock{}, nil, RetryConfig{}, push, newPushMetrics(reg), reg, log.NewNopLogger()) + h := OTLPHandler(200, util.NewBufferPool(0), nil, otlpLimitsMock{}, nil, RetryConfig{}, false, push, newPushMetrics(reg), reg, log.NewNopLogger()) srv.HTTP.Handle("/otlp", h) // start the server From a2d35b4ba673326b400f425093a31545cf59599e Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 13 Dec 2024 15:13:23 +0000 Subject: [PATCH 2/5] Use mimir-prometheus from jvp/reimplement-quiet-zeros branch --- go.mod | 2 +- go.sum | 4 ++-- .../prometheus/prometheus/model/value/value.go | 3 +++ .../otlptranslator/prometheusremotewrite/helper.go | 12 ++++++++---- .../prometheusremotewrite/metrics_to_prw.go | 1 + .../prometheus/prometheus/tsdb/head_append.go | 11 ++++++++++- .../prometheus/prometheus/tsdb/head_wal.go | 7 +++++++ vendor/modules.txt | 4 ++-- 8 files changed, 34 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 555bbc18a0..8dbc2543bc 100644 --- a/go.mod +++ b/go.mod @@ -285,7 +285,7 @@ require ( ) // Using a fork of Prometheus with Mimir-specific changes. -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241210170917-0a0a41616520 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241213145946-1e9d95a2e3cd // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index 44c2383392..5c91f457bd 100644 --- a/go.sum +++ b/go.sum @@ -1279,8 +1279,8 @@ github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40 h1:1TeKhyS+pvzO github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40/go.mod h1:IGRj8oOoxwJbHBYl1+OhS9UjQR0dv6SQOep7HqmtyFU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20241210170917-0a0a41616520 h1:FADazl5oVYBARbfVMtLkPQ9IfIwhiE9lrPrKNPOHBV4= -github.com/grafana/mimir-prometheus v0.0.0-20241210170917-0a0a41616520/go.mod h1:NpYc1U0eC7m6xUh3t3Pq565KxaIc08Oaquiu71dEMi8= +github.com/grafana/mimir-prometheus v0.0.0-20241213145946-1e9d95a2e3cd h1:udN4x69ZecueBBm8cCZFtWsRmUMW/UiBLg0xBWxu9kc= +github.com/grafana/mimir-prometheus v0.0.0-20241213145946-1e9d95a2e3cd/go.mod h1:NpYc1U0eC7m6xUh3t3Pq565KxaIc08Oaquiu71dEMi8= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= github.com/grafana/prometheus-alertmanager v0.25.1-0.20240930132144-b5e64e81e8d3 h1:6D2gGAwyQBElSrp3E+9lSr7k8gLuP3Aiy20rweLWeBw= diff --git a/vendor/github.com/prometheus/prometheus/model/value/value.go b/vendor/github.com/prometheus/prometheus/model/value/value.go index 655ce852d5..d3dd9b996f 100644 --- a/vendor/github.com/prometheus/prometheus/model/value/value.go +++ b/vendor/github.com/prometheus/prometheus/model/value/value.go @@ -26,6 +26,9 @@ const ( // complicated values in the future. It is 2 rather than 1 to make // it easier to distinguish from the NormalNaN by a human when debugging. StaleNaN uint64 = 0x7ff0000000000002 + + // QuietZeroNaN signals TSDB to add a zero, but do nothing if there is already a value at that timestamp. + QuietZeroNaN uint64 = 0x7ff0000000000003 ) // IsStaleNaN returns true when the provided NaN value is a stale marker. diff --git a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 1f9c8b6570..f4fc44bf48 100644 --- a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -595,9 +595,10 @@ const defaultIntervalForStartTimestamps = int64(300_000) // handleStartTime adds a zero sample at startTs only if startTs is within validIntervalForStartTimestamps of the sample timestamp. // The reason for doing this is that PRW v1 doesn't support Created Timestamps. After switching to PRW v2's direct CT support, // make use of its direct support fort Created Timestamps instead. +// See https://github.com/prometheus/prometheus/issues/14600 for context. // See https://opentelemetry.io/docs/specs/otel/metrics/data-model/#resets-and-gaps to know more about how OTel handles // resets for cumulative metrics. -func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, value float64, logger *slog.Logger) { +func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, val float64, logger *slog.Logger) { if !settings.EnableCreatedTimestampZeroIngestion { return } @@ -619,10 +620,13 @@ func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb return } - logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) + logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", val) - // See https://github.com/prometheus/prometheus/issues/14600 for context. - c.addSample(&prompb.Sample{Timestamp: startTs}, labels) + var createdTimeValue float64 + if settings.EnableStartTimeQuietZero { + createdTimeValue = math.Float64frombits(value.QuietZeroNaN) + } + c.addSample(&prompb.Sample{Timestamp: startTs, Value: createdTimeValue}, labels) } // handleHistogramStartTime similar to the method above but for native histograms.. diff --git a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 7f0cc04a10..65fd080047 100644 --- a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -47,6 +47,7 @@ type Settings struct { // Mimir specifics. EnableCreatedTimestampZeroIngestion bool + EnableStartTimeQuietZero bool ValidIntervalCreatedTimestampZeroIngestion time.Duration } diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_append.go b/vendor/github.com/prometheus/prometheus/tsdb/head_append.go index b64607a417..a4def2bc91 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_append.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_append.go @@ -497,7 +497,7 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi if s.lastHistogramValue != nil || s.lastFloatHistogramValue != nil { return false, 0, storage.NewDuplicateHistogramToFloatErr(t, v) } - if math.Float64bits(s.lastValue) != math.Float64bits(v) { + if math.Float64bits(s.lastValue) != math.Float64bits(v) && math.Float64bits(v) != value.QuietZeroNaN { return false, 0, storage.NewDuplicateFloatErr(t, s.lastValue, v) } // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. @@ -505,6 +505,10 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi } } + if math.Float64bits(v) == value.QuietZeroNaN { // Say it's allowed; it will be dropped later in commitSamples. + return true, 0, nil + } + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { return true, headMaxt - t, nil @@ -1144,6 +1148,8 @@ func (a *headAppender) commitSamples(acc *appenderCommitContext) { switch { case err != nil: // Do nothing here. + case oooSample && math.Float64bits(s.V) == value.QuietZeroNaN: + // No-op: we don't store quiet zeros out-of-order. case oooSample: // Sample is OOO and OOO handling is enabled // and the delta is within the OOO tolerance. @@ -1190,6 +1196,9 @@ func (a *headAppender) commitSamples(acc *appenderCommitContext) { acc.floatsAppended-- } default: + if math.Float64bits(s.V) == value.QuietZeroNaN { + s.V = 0 // Note that this is modifying the copy which is what will be appended but the WAL got the NaN already. + } ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) if ok { if s.T < acc.inOrderMint { diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go index b1f3abd154..5b1a868837 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" @@ -589,6 +590,9 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp if s.T <= ms.mmMaxTime { continue } + if math.Float64bits(s.V) == value.QuietZeroNaN { + s.V = 0 + } if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated { h.metrics.chunksCreated.Inc() h.metrics.chunks.Inc() @@ -989,6 +993,9 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHi unknownRefs++ continue } + if math.Float64bits(s.V) == value.QuietZeroNaN { + continue + } ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger) if chunkCreated { h.metrics.chunksCreated.Inc() diff --git a/vendor/modules.txt b/vendor/modules.txt index 20a6a3279f..14bd8700ba 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1017,7 +1017,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20241210170917-0a0a41616520 +# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20241213145946-1e9d95a2e3cd ## explicit; go 1.22.0 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1688,7 +1688,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241210170917-0a0a41616520 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241213145946-1e9d95a2e3cd # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 # github.com/grafana/regexp => github.com/grafana/regexp v0.0.0-20240531075221-3685f1377d7b From b34d5f52dc04a0be591c7719ac96bc6287f5daa3 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 13 Dec 2024 15:31:38 +0000 Subject: [PATCH 3/5] make generate-otlp --- pkg/distributor/otlp/helper_generated.go | 12 ++++++++---- pkg/distributor/otlp/metrics_to_prw_generated.go | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/distributor/otlp/helper_generated.go b/pkg/distributor/otlp/helper_generated.go index 58e1720b63..7516c4268b 100644 --- a/pkg/distributor/otlp/helper_generated.go +++ b/pkg/distributor/otlp/helper_generated.go @@ -597,9 +597,10 @@ const defaultIntervalForStartTimestamps = int64(300_000) // handleStartTime adds a zero sample at startTs only if startTs is within validIntervalForStartTimestamps of the sample timestamp. // The reason for doing this is that PRW v1 doesn't support Created Timestamps. After switching to PRW v2's direct CT support, // make use of its direct support fort Created Timestamps instead. +// See https://github.com/prometheus/prometheus/issues/14600 for context. // See https://opentelemetry.io/docs/specs/otel/metrics/data-model/#resets-and-gaps to know more about how OTel handles // resets for cumulative metrics. -func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.LabelAdapter, settings Settings, typ string, value float64, logger *slog.Logger) { +func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.LabelAdapter, settings Settings, typ string, val float64, logger *slog.Logger) { if !settings.EnableCreatedTimestampZeroIngestion { return } @@ -621,10 +622,13 @@ func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.Lab return } - logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) + logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", val) - // See https://github.com/prometheus/prometheus/issues/14600 for context. - c.addSample(&mimirpb.Sample{TimestampMs: startTs}, labels) + var createdTimeValue float64 + if settings.EnableStartTimeQuietZero { + createdTimeValue = math.Float64frombits(value.QuietZeroNaN) + } + c.addSample(&mimirpb.Sample{TimestampMs: startTs, Value: createdTimeValue}, labels) } // handleHistogramStartTime similar to the method above but for native histograms.. diff --git a/pkg/distributor/otlp/metrics_to_prw_generated.go b/pkg/distributor/otlp/metrics_to_prw_generated.go index 5eb1391dad..e9fdbbc896 100644 --- a/pkg/distributor/otlp/metrics_to_prw_generated.go +++ b/pkg/distributor/otlp/metrics_to_prw_generated.go @@ -50,6 +50,7 @@ type Settings struct { // Mimir specifics. EnableCreatedTimestampZeroIngestion bool + EnableStartTimeQuietZero bool ValidIntervalCreatedTimestampZeroIngestion time.Duration } From 7f9b9033dee9baa4aad9654b475de0988ca2ef41 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 13 Dec 2024 18:41:12 +0000 Subject: [PATCH 4/5] make doc --- .../mimir/configure/configuration-parameters/index.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index f9a147e7bc..9a0d007e9f 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -956,6 +956,11 @@ instance_limits: # limiting feature.) # CLI flag: -distributor.reusable-ingester-push-workers [reusable_ingester_push_workers: | default = 2000] + +# (advanced) Change the implementation of Otel startTime from a real zero to a +# special NaN value. +# CLI flag: -distributor.otel-start-time-quiet-zero +[start_time_quiet_zero: | default = false] ``` ### ingester From 8de646b6eaeabdedad54682622c895f03e4fde0a Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 13 Dec 2024 19:02:43 +0000 Subject: [PATCH 5/5] make reference-help --- cmd/mimir/config-descriptor.json | 11 +++++++++++ cmd/mimir/help-all.txt.tmpl | 2 ++ 2 files changed, 13 insertions(+) diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 3ac7f5c294..5fbc925d9c 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -1690,6 +1690,17 @@ "fieldFlag": "distributor.reusable-ingester-push-workers", "fieldType": "int", "fieldCategory": "advanced" + }, + { + "kind": "field", + "name": "start_time_quiet_zero", + "required": false, + "desc": "Change the implementation of Otel startTime from a real zero to a special NaN value.", + "fieldValue": null, + "fieldDefaultValue": false, + "fieldFlag": "distributor.otel-start-time-quiet-zero", + "fieldType": "boolean", + "fieldCategory": "advanced" } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 08bc71314d..45cb816731 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -1393,6 +1393,8 @@ Usage of ./cmd/mimir/mimir: Whether to enable automatic suffixes to names of metrics ingested through OTLP. -distributor.otel-promote-resource-attributes comma-separated-list-of-strings [experimental] Optionally specify OTel resource attributes to promote to labels. + -distributor.otel-start-time-quiet-zero + Change the implementation of Otel startTime from a real zero to a special NaN value. -distributor.remote-timeout duration Timeout for downstream ingesters. (default 2s) -distributor.request-burst-size int