From 4ec69dc0cb88182bb6e6fb8054e8db4d6086200d Mon Sep 17 00:00:00 2001 From: Jian Xiao <99709935+jianoaix@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:10:13 -0800 Subject: [PATCH] Add latency metric for dispersal/retrieval with blob size breakdown (#939) --- disperser/apiserver/server.go | 6 ++++++ disperser/metrics.go | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/disperser/apiserver/server.go b/disperser/apiserver/server.go index 831b0e635..b0163a942 100644 --- a/disperser/apiserver/server.go +++ b/disperser/apiserver/server.go @@ -269,6 +269,8 @@ func (s *DispersalServer) disperseBlob(ctx context.Context, blob *core.Blob, aut })) defer timer.ObserveDuration() + dispersalStart := time.Now() + securityParams := blob.RequestHeader.SecurityParams securityParamsStrings := make([]string, len(securityParams)) for i, sp := range securityParams { @@ -319,6 +321,7 @@ func (s *DispersalServer) disperseBlob(ctx context.Context, blob *core.Blob, aut for _, param := range securityParams { s.metrics.HandleSuccessfulRequest(fmt.Sprintf("%d", param.QuorumID), blobSize, apiMethodName) } + s.metrics.BlobLatency.WithLabelValues(apiMethodName, dispcommon.BlobSizeBucket(blobSize)).Set(float64(time.Since(dispersalStart).Milliseconds())) return &pb.DisperseBlobReply{ Result: pb.BlobStatus_PROCESSING, @@ -701,6 +704,8 @@ func (s *DispersalServer) RetrieveBlob(ctx context.Context, req *pb.RetrieveBlob })) defer timer.ObserveDuration() + retrievalStart := time.Now() + origin, err := common.GetClientAddress(ctx, s.rateConfig.ClientIPHeader, 2, true) if err != nil { s.metrics.HandleInvalidArgRpcRequest("RetrieveBlob") @@ -807,6 +812,7 @@ func (s *DispersalServer) RetrieveBlob(ctx context.Context, req *pb.RetrieveBlob } s.metrics.HandleSuccessfulRpcRequest("RetrieveBlob") s.metrics.HandleSuccessfulRequest("", len(data), "RetrieveBlob") + s.metrics.BlobLatency.WithLabelValues("RetrieveBlob", dispcommon.BlobSizeBucket(len(data))).Set(float64(time.Since(retrievalStart).Milliseconds())) s.logger.Debug("fetched blob content", "batchHeaderHash", req.BatchHeaderHash, "blobIndex", req.BlobIndex, "data size (bytes)", len(data), "duration", time.Since(stageTimer).String()) diff --git a/disperser/metrics.go b/disperser/metrics.go index 6a762344d..43a5aef99 100644 --- a/disperser/metrics.go +++ b/disperser/metrics.go @@ -24,6 +24,7 @@ type Metrics struct { NumBlobRequests *prometheus.CounterVec NumRpcRequests *prometheus.CounterVec BlobSize *prometheus.GaugeVec + BlobLatency *prometheus.GaugeVec Latency *prometheus.SummaryVec httpPort string @@ -78,6 +79,15 @@ func NewMetrics(reg *prometheus.Registry, httpPort string, logger logging.Logger }, []string{"method"}, ), + BlobLatency: promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "blob_latency_ms", + Help: "blob dispersal or retrieval latency by size", + }, + []string{"method", "size_bucket"}, + ), + registry: reg, httpPort: httpPort, logger: logger.With("component", "DisperserMetrics"),