Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement open telemetry integration - client/server and explict token latency #296

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 115 additions & 4 deletions microservices-connector/cmd/router/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ import (

mcv1alpha3 "github.com/opea-project/GenAIInfra/microservices-connector/api/v1alpha3"
flag "github.com/spf13/pflag"

// Prometheus and opentelemetry imports
"github.com/prometheus/client_golang/prometheus/promhttp"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/prometheus"
api "go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"

"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"

"go.opentelemetry.io/otel/metric"
)

const (
Expand All @@ -60,8 +72,8 @@ var (
TLSHandshakeTimeout: time.Minute,
ExpectContinueTimeout: 30 * time.Second,
}
callClient = &http.Client{
Transport: transport,
callClient = http.Client{
Transport: otelhttp.NewTransport(transport),
Timeout: 30 * time.Second,
}
)
Expand All @@ -80,6 +92,69 @@ type ReadCloser struct {
*bytes.Reader
}

var (
firstTokenLatencyMeasure metric.Float64Histogram
nextTokenLatencyMeasure metric.Float64Histogram
allTokenLatencyMeasure metric.Float64Histogram
pipelineLatencyMeasure metric.Float64Histogram
)
Comment on lines +95 to +100
Copy link
Contributor

@eero-t eero-t Sep 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO this would look nicer as:

Suggested change
var (
firstTokenLatencyMeasure metric.Float64Histogram
nextTokenLatencyMeasure metric.Float64Histogram
allTokenLatencyMeasure metric.Float64Histogram
pipelineLatencyMeasure metric.Float64Histogram
)
type struct LatencyMeasureT {
firstToken, nextToken, allTokens, pipeline metric.Float64Histogram
}
var latencyMeasure latencyMeasureT


func init() {

// The exporter embeds a default OpenTelemetry Reader and
// implements prometheus.Collector, allowing it to be used as
// both a Reader and Collector.
exporter, err := prometheus.New()
if err != nil {
log.Error(err, "metrics: cannot init prometheus collector")
}
provider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter))
otel.SetMeterProvider(provider)

// ppalucki: Own metrics defintion bellow
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typos:

Suggested change
// ppalucki: Own metrics defintion bellow
// ppalucki: Own metrics definition below

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice catch!

const meterName = "entrag-telemetry"
meter := provider.Meter(meterName)

firstTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.first.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration of first token generation."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register first token histogram measure")
}
nextTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.next.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration of generating all but first tokens."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register next token histogram measure")
}

allTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.all.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration to generate response with all tokens."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register all token histogram measure")
}

pipelineLatencyMeasure, err = meter.Float64Histogram(
"llm.pipeline.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration to going through pipeline steps until first token is being generated (including read data time from client)."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register pipeline histogram measure")
}
Comment on lines +118 to +155
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a bit of duplication. Maybe this could iterate over slice like (untested code):

metrics := struct {
  metric *metric.Float64Histogram
  name string
  desc string
}[] = {
   { &latencyMeasure.firstToken, "llm.first.token.latency", "..." },
   ...
}

for _, item := metrics {
  item.metric, err = meter.Float64Histogram(
    item.name,
    metric.WithUnit("ms"),
    metric.WithDescription(item.desc),
    api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
  )
  if err != nil {
    log.Errorf(err, "metrics: cannot register '%s' histogram measure", item.name)
  }
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice catch!

}

func (ReadCloser) Close() error {
// Typically, you would release resources here, but for bytes.Reader, there's nothing to do.
return nil
Expand Down Expand Up @@ -536,6 +611,7 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
go func() {
defer close(done)

allTokensStartTime := time.Now()
inputBytes, err := io.ReadAll(req.Body)
if err != nil {
log.Error(err, "failed to read request body")
Expand All @@ -544,6 +620,9 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
}

responseBody, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header)

pipelineLatencyMeasure.Record(ctx, float64(time.Since(allTokensStartTime))/float64(time.Millisecond))

if err != nil {
log.Error(err, "failed to process request")
w.Header().Set("Content-Type", "application/json")
Expand All @@ -561,9 +640,22 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
}()

w.Header().Set("Content-Type", "application/json")
firstTokenCollected := false
buffer := make([]byte, BufferSize)
for {

// measure time of reading another portion of response
tokenStartTime := time.Now()
n, err := responseBody.Read(buffer)
elapsedTimeMilisecond := float64(time.Since(tokenStartTime)) / float64(time.Millisecond)

if !firstTokenCollected {
firstTokenCollected = true
firstTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
} else {
nextTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
}

if err != nil && err != io.EOF {
log.Error(err, "failed to read from response body")
http.Error(w, "failed to read from response body", http.StatusInternalServerError)
Expand All @@ -586,6 +678,10 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
return
}
}

allTokensElapsedTimeMilisecond := float64(time.Since(allTokensStartTime)) / float64(time.Millisecond)
allTokenLatencyMeasure.Record(ctx, allTokensElapsedTimeMilisecond)

}()

select {
Expand Down Expand Up @@ -729,8 +825,23 @@ func handleMultipartError(writer *multipart.Writer, err error) {

func initializeRoutes() *http.ServeMux {
mux := http.NewServeMux()
mux.HandleFunc("/", mcGraphHandler)
mux.HandleFunc("/dataprep", mcDataHandler)

// Wrap connector handlers with otelhttp wrappers
// "http.server.request.size" - Int64Counter - "Measures the size of HTTP request messages" (Incoming request bytes total)
// "http.server.response.size" - Int64Counter - "Measures the size of HTTP response messages" (Incoming response bytes total)
// "http.server.duration" - Float64histogram "Measures the duration of inbound HTTP requests." (Incoming end to end duration, milliseconds)
handleFunc := func(pattern string, handlerFunc func(http.ResponseWriter, *http.Request), operation string) {
handler := otelhttp.NewHandler(otelhttp.WithRouteTag(pattern, http.HandlerFunc(handlerFunc)), operation)
mux.Handle(pattern, handler)
}

handleFunc("/", mcGraphHandler, "mcGraphHandler")
handleFunc("/dataprep", mcDataHandler, "mcDataHandler")

promHandler := promhttp.Handler()
handleFunc("/metrics", promHandler.ServeHTTP, "metrics")
log.Info("Metrics exposed on /metrics.")

return mux
}

Expand Down
37 changes: 23 additions & 14 deletions microservices-connector/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ require (
github.com/onsi/ginkgo/v2 v2.14.0
github.com/onsi/gomega v1.30.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.19.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.9.0
github.com/tidwall/gjson v1.17.1
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0
go.opentelemetry.io/otel v1.28.0
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
go.opentelemetry.io/otel/metric v1.28.0
go.opentelemetry.io/otel/sdk/metric v1.28.0
k8s.io/api v0.29.2
k8s.io/apimachinery v0.29.2
k8s.io/client-go v0.29.2
Expand All @@ -18,13 +24,15 @@ require (

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand All @@ -46,24 +54,25 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.53.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
go.opentelemetry.io/otel/trace v1.28.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.20.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.21.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
Loading