Skip to content

Commit

Permalink
open telemetry PoC - client/server and explict token latency
Browse files Browse the repository at this point in the history
Small issues and also collect stats from /metrics router
remove examples + add allTokenLatency metric
Fix description of histograms
pipeline latency histogram before token measurments
  • Loading branch information
ppalucki committed Sep 2, 2024
1 parent c0d2ba6 commit 735d3be
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 44 deletions.
128 changes: 124 additions & 4 deletions microservices-connector/cmd/router/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ import (

mcv1alpha3 "github.com/opea-project/GenAIInfra/microservices-connector/api/v1alpha3"
flag "github.com/spf13/pflag"

// Prometheus and opentelemetry imports
"github.com/prometheus/client_golang/prometheus/promhttp"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/prometheus"
api "go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"

"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"

"go.opentelemetry.io/otel/metric"
)

var (
Expand Down Expand Up @@ -72,6 +84,69 @@ type ReadCloser struct {
*bytes.Reader
}

var (
firstTokenLatencyMeasure metric.Float64Histogram
nextTokenLatencyMeasure metric.Float64Histogram
allTokenLatencyMeasure metric.Float64Histogram
pipelineLatencyMeasure metric.Float64Histogram
)

func init() {

// The exporter embeds a default OpenTelemetry Reader and
// implements prometheus.Collector, allowing it to be used as
// both a Reader and Collector.
exporter, err := prometheus.New()
if err != nil {
log.Error(err, "metrics: cannot init prometheus collector")
}
provider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter))
otel.SetMeterProvider(provider)

// ppalucki: Own metrics defintion bellow
const meterName = "entrag-telemetry"
meter := provider.Meter(meterName)

firstTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.first.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration of first token generation."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register first token histogram measure")
}
nextTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.next.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration of generating all but first tokens."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register next token histogram measure")
}

allTokenLatencyMeasure, err = meter.Float64Histogram(
"llm.all.token.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration to generate response with all tokens."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register all token histogram measure")
}

pipelineLatencyMeasure, err = meter.Float64Histogram(
"llm.pipeline.latency",
metric.WithUnit("ms"),
metric.WithDescription("Measures the duration to going through pipeline steps until first token is being generated (including read data time from client)."),
api.WithExplicitBucketBoundaries(1, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16364),
)
if err != nil {
log.Error(err, "metrics: cannot register pipeline histogram measure")
}
}

func (ReadCloser) Close() error {
// Typically, you would release resources here, but for bytes.Reader, there's nothing to do.
return nil
Expand Down Expand Up @@ -173,7 +248,15 @@ func callService(
if val := req.Header.Get("Content-Type"); val == "" {
req.Header.Add("Content-Type", "application/json")
}
resp, err := http.DefaultClient.Do(req)

// Wrap default Go Http client with opentelemetry transport to provide extra metrics/pass context/create spans:
// - "http.client.request.size" - "Measures the size of HTTP request messages." // Outgoing request bytes total
// - "http.client.response.size" - "Measures the size of HTTP response messages." // Outgoing response bytes total
// - "http.client.duration" - "Measures the duration of outbound HTTP requests." // Outgoing end to end duration, milliseconds
// TODO/traces: span will be created with "client" as default, as middleware it possbile shouldbe "internal"
client := http.Client{Transport: otelhttp.NewTransport(http.DefaultTransport)}

resp, err := client.Do(req)

if err != nil {
log.Error(err, "An error has occurred while calling service", "service", serviceUrl)
Expand Down Expand Up @@ -517,13 +600,15 @@ func routeStep(nodeName string,
}

func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), time.Minute)

ctx, cancel := context.WithTimeout(req.Context(), 1*time.Minute)
defer cancel()

done := make(chan struct{})
go func() {
defer close(done)

allTokensStartTime := time.Now()
inputBytes, err := io.ReadAll(req.Body)
if err != nil {
log.Error(err, "failed to read request body")
Expand All @@ -532,6 +617,9 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
}

responseBody, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header)

pipelineLatencyMeasure.Record(ctx, float64(time.Since(allTokensStartTime))/float64(time.Millisecond))

if err != nil {
log.Error(err, "failed to process request")
w.Header().Set("Content-Type", "application/json")
Expand All @@ -549,9 +637,22 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
}()

w.Header().Set("Content-Type", "application/json")
firstTokenCollected := false
buffer := make([]byte, BufferSize)
for {

// measure time of reading another portion of response
tokenStartTime := time.Now()
n, err := responseBody.Read(buffer)
elapsedTimeMilisecond := float64(time.Since(tokenStartTime)) / float64(time.Millisecond)

if !firstTokenCollected {
firstTokenCollected = true
firstTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
} else {
nextTokenLatencyMeasure.Record(ctx, elapsedTimeMilisecond)
}

if err != nil && err != io.EOF {
log.Error(err, "failed to read from response body")
http.Error(w, "failed to read from response body", http.StatusInternalServerError)
Expand Down Expand Up @@ -583,6 +684,10 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
return
}
}

allTokensElapsedTimeMilisecond := float64(time.Since(allTokensStartTime)) / float64(time.Millisecond)
allTokenLatencyMeasure.Record(ctx, allTokensElapsedTimeMilisecond)

}()

select {
Expand Down Expand Up @@ -726,8 +831,23 @@ func handleMultipartError(writer *multipart.Writer, err error) {

func initializeRoutes() *http.ServeMux {
mux := http.NewServeMux()
mux.HandleFunc("/", mcGraphHandler)
mux.HandleFunc("/dataprep", mcDataHandler)

// Wrap connector handlers with otelhttp wrappers
// "http.server.request.size" - Int64Counter - "Measures the size of HTTP request messages" (Incoming request bytes total)
// "http.server.response.size" - Int64Counter - "Measures the size of HTTP response messages" (Incoming response bytes total)
// "http.server.duration" - Float64histogram "Measures the duration of inbound HTTP requests." (Incoming end to end duration, milliseconds)
handleFunc := func(pattern string, handlerFunc func(http.ResponseWriter, *http.Request), operation string) {
handler := otelhttp.NewHandler(otelhttp.WithRouteTag(pattern, http.HandlerFunc(handlerFunc)), operation)
mux.Handle(pattern, handler)
}

handleFunc("/", mcGraphHandler, "mcGraphHandler")
handleFunc("/dataprep", mcDataHandler, "mcDataHandler")

promHandler := promhttp.Handler()
handleFunc("/metrics", promHandler.ServeHTTP, "metrics")
log.Info("Metrics exposed on /metrics.")

return mux
}

Expand Down
37 changes: 23 additions & 14 deletions microservices-connector/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ require (
github.com/onsi/ginkgo/v2 v2.14.0
github.com/onsi/gomega v1.30.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.19.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.9.0
github.com/tidwall/gjson v1.17.1
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0
go.opentelemetry.io/otel v1.28.0
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
go.opentelemetry.io/otel/metric v1.28.0
go.opentelemetry.io/otel/sdk/metric v1.28.0
k8s.io/api v0.29.2
k8s.io/apimachinery v0.29.2
k8s.io/client-go v0.29.2
Expand All @@ -18,12 +24,14 @@ require (

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand All @@ -45,24 +53,25 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.53.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
go.opentelemetry.io/otel/trace v1.28.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.20.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.21.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
Loading

0 comments on commit 735d3be

Please sign in to comment.