diff --git a/controller/konnect/ops/ops.go b/controller/konnect/ops/ops.go index 770991cd..b64a40de 100644 --- a/controller/konnect/ops/ops.go +++ b/controller/konnect/ops/ops.go @@ -16,6 +16,7 @@ import ( "github.com/kong/gateway-operator/controller/konnect/constraints" sdkops "github.com/kong/gateway-operator/controller/konnect/ops/sdk" "github.com/kong/gateway-operator/controller/pkg/log" + "github.com/kong/gateway-operator/internal/metrics" "github.com/kong/gateway-operator/pkg/consts" k8sutils "github.com/kong/gateway-operator/pkg/utils/kubernetes" @@ -37,6 +38,34 @@ const ( DeleteOp Op = "delete" ) +// EntityTypeName is the type of the Konnect entity name used for distinguish operations on different types of entities in the prometheus metrics. +type EntityTypeName string + +const ( + // Entity type names used in metrics. + // REVIEW: Should we use the path inside the API as the type names? These are not very consistent in + EntityTypeControlPlane EntityTypeName = "control_planes" + EntityTypeService EntityTypeName = "services" + EntityTypeRoute EntityTypeName = "routes" + EntityTypeConsumer EntityTypeName = "consumers" + EntityTypeConsumerGroup EntityTypeName = "consumer_groups" + EntityTypePlugin EntityTypeName = "plugins" + EntityTypeUpstream EntityTypeName = "upstreams" + EntityTypeTarget EntityTypeName = "targets" + EntityTypeBasicAuthCredential EntityTypeName = "basic_auth_credentials" + EntityTypeAPIKeyCredential EntityTypeName = "api_key_credentials" + EntityTypeACLCredential EntityTypeName = "acl_credentials" + EntityTypeJWTCredential EntityTypeName = "jwt_credentials" + EntityTypeHMACCredential EntityTypeName = "hmac_credentials" + EntityTypeCACertificate EntityTypeName = "ca_certificates" + EntityTypeCertificate EntityTypeName = "certificates" + EntityTypeSNI EntityTypeName = "snis" + EntityTypeKey EntityTypeName = "keys" + EntityTypeKeySet EntityTypeName = "key_sets" + EntityTypeVault EntityTypeName = "vaults" + EntityTypeDataPlaneCertificate EntityTypeName = "data_plane_certificates" +) + // Create creates a Konnect entity. func Create[ T constraints.SupportedKonnectEntityType, @@ -45,52 +74,76 @@ func Create[ ctx context.Context, sdk sdkops.SDKWrapper, cl client.Client, + metricRecorder metrics.Recorder, e TEnt, ) (*T, error) { var ( err error start = time.Now() + + entityType EntityTypeName + statusCode int ) switch ent := any(e).(type) { case *konnectv1alpha1.KonnectGatewayControlPlane: + entityType = EntityTypeControlPlane err = createControlPlane(ctx, sdk.GetControlPlaneSDK(), sdk.GetControlPlaneGroupSDK(), cl, ent) case *configurationv1alpha1.KongService: + entityType = EntityTypeService err = createService(ctx, sdk.GetServicesSDK(), ent) case *configurationv1alpha1.KongRoute: + entityType = EntityTypeRoute err = createRoute(ctx, sdk.GetRoutesSDK(), ent) case *configurationv1.KongConsumer: + entityType = EntityTypeConsumer err = createConsumer(ctx, sdk.GetConsumersSDK(), sdk.GetConsumerGroupsSDK(), cl, ent) case *configurationv1beta1.KongConsumerGroup: + entityType = EntityTypeConsumerGroup err = createConsumerGroup(ctx, sdk.GetConsumerGroupsSDK(), ent) case *configurationv1alpha1.KongPluginBinding: + entityType = EntityTypePlugin err = createPlugin(ctx, cl, sdk.GetPluginSDK(), ent) case *configurationv1alpha1.KongUpstream: + entityType = EntityTypeUpstream err = createUpstream(ctx, sdk.GetUpstreamsSDK(), ent) case *configurationv1alpha1.KongCredentialBasicAuth: + entityType = EntityTypeBasicAuthCredential err = createKongCredentialBasicAuth(ctx, sdk.GetBasicAuthCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialAPIKey: + entityType = EntityTypeAPIKeyCredential err = createKongCredentialAPIKey(ctx, sdk.GetAPIKeyCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialACL: + entityType = EntityTypeACLCredential err = createKongCredentialACL(ctx, sdk.GetACLCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialJWT: + entityType = EntityTypeJWTCredential err = createKongCredentialJWT(ctx, sdk.GetJWTCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialHMAC: + entityType = EntityTypeHMACCredential err = createKongCredentialHMAC(ctx, sdk.GetHMACCredentialsSDK(), ent) case *configurationv1alpha1.KongCACertificate: + entityType = EntityTypeCACertificate err = createCACertificate(ctx, sdk.GetCACertificatesSDK(), ent) case *configurationv1alpha1.KongCertificate: + entityType = EntityTypeCertificate err = createCertificate(ctx, sdk.GetCertificatesSDK(), ent) case *configurationv1alpha1.KongTarget: + entityType = EntityTypeTarget err = createTarget(ctx, sdk.GetTargetsSDK(), ent) case *configurationv1alpha1.KongVault: + entityType = EntityTypeVault err = createVault(ctx, sdk.GetVaultSDK(), ent) case *configurationv1alpha1.KongKey: + entityType = EntityTypeKey err = createKey(ctx, sdk.GetKeysSDK(), ent) case *configurationv1alpha1.KongKeySet: + entityType = EntityTypeKeySet err = createKeySet(ctx, sdk.GetKeySetsSDK(), ent) case *configurationv1alpha1.KongSNI: + entityType = EntityTypeSNI err = createSNI(ctx, sdk.GetSNIsSDK(), ent) case *configurationv1alpha1.KongDataPlaneClientCertificate: + entityType = EntityTypeDataPlaneCertificate err = createKongDataPlaneClientCertificate(ctx, sdk.GetDataPlaneCertificatesSDK(), ent) // --------------------------------------------------------------------- // TODO: add other Konnect types @@ -162,6 +215,7 @@ func Create[ } case errors.As(err, &errSDK): + statusCode = errSDK.StatusCode SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToCreateReason, errSDK.Error()) case errors.As(err, &errRelationsFailed): e.SetKonnectID(errRelationsFailed.KonnectID) @@ -172,6 +226,20 @@ func Create[ SetKonnectEntityProgrammedCondition(e) } + if err != nil { + metricRecorder.RecordKonnectEntityOperationFailure( + metrics.KonnectEntityOperationCreate, + string(entityType), + time.Since(start), + statusCode, + ) + } else { + metricRecorder.RecordKonnectEntityOperationSuccess( + metrics.KonnectEntityOperationCreate, + string(entityType), + time.Since(start), + ) + } logOpComplete(ctx, start, CreateOp, e, err) return e, IgnoreUnrecoverableAPIErr(err, loggerForEntity(ctx, e, CreateOp)) @@ -182,7 +250,7 @@ func Create[ func Delete[ T constraints.SupportedKonnectEntityType, TEnt constraints.EntityType[T], -](ctx context.Context, sdk sdkops.SDKWrapper, cl client.Client, ent TEnt) error { +](ctx context.Context, sdk sdkops.SDKWrapper, cl client.Client, metricRecorder metrics.Recorder, ent TEnt) error { if ent.GetKonnectStatus().GetKonnectID() == "" { cond, ok := k8sutils.GetCondition(konnectv1alpha1.KonnectEntityProgrammedConditionType, ent) if ok && cond.Status == metav1.ConditionTrue { @@ -197,47 +265,70 @@ func Delete[ var ( err error start = time.Now() + + entityType EntityTypeName + statusCode int ) switch ent := any(ent).(type) { case *konnectv1alpha1.KonnectGatewayControlPlane: + entityType = EntityTypeControlPlane err = deleteControlPlane(ctx, sdk.GetControlPlaneSDK(), ent) case *configurationv1alpha1.KongService: + entityType = EntityTypeService err = deleteService(ctx, sdk.GetServicesSDK(), ent) case *configurationv1alpha1.KongRoute: + entityType = EntityTypeRoute err = deleteRoute(ctx, sdk.GetRoutesSDK(), ent) case *configurationv1.KongConsumer: + entityType = EntityTypeConsumer err = deleteConsumer(ctx, sdk.GetConsumersSDK(), ent) case *configurationv1beta1.KongConsumerGroup: + entityType = EntityTypeConsumerGroup err = deleteConsumerGroup(ctx, sdk.GetConsumerGroupsSDK(), ent) case *configurationv1alpha1.KongPluginBinding: + entityType = EntityTypePlugin err = deletePlugin(ctx, sdk.GetPluginSDK(), ent) case *configurationv1alpha1.KongUpstream: + entityType = EntityTypeUpstream err = deleteUpstream(ctx, sdk.GetUpstreamsSDK(), ent) case *configurationv1alpha1.KongCredentialBasicAuth: + entityType = EntityTypeBasicAuthCredential err = deleteKongCredentialBasicAuth(ctx, sdk.GetBasicAuthCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialAPIKey: + entityType = EntityTypeAPIKeyCredential err = deleteKongCredentialAPIKey(ctx, sdk.GetAPIKeyCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialACL: + entityType = EntityTypeACLCredential err = deleteKongCredentialACL(ctx, sdk.GetACLCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialJWT: + entityType = EntityTypeJWTCredential err = deleteKongCredentialJWT(ctx, sdk.GetJWTCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialHMAC: + entityType = EntityTypeHMACCredential err = deleteKongCredentialHMAC(ctx, sdk.GetHMACCredentialsSDK(), ent) case *configurationv1alpha1.KongCACertificate: + entityType = EntityTypeCACertificate err = deleteCACertificate(ctx, sdk.GetCACertificatesSDK(), ent) case *configurationv1alpha1.KongCertificate: + entityType = EntityTypeCertificate err = deleteCertificate(ctx, sdk.GetCertificatesSDK(), ent) case *configurationv1alpha1.KongTarget: + entityType = EntityTypeTarget err = deleteTarget(ctx, sdk.GetTargetsSDK(), ent) case *configurationv1alpha1.KongVault: + entityType = EntityTypeVault err = deleteVault(ctx, sdk.GetVaultSDK(), ent) case *configurationv1alpha1.KongKey: + entityType = EntityTypeKey err = deleteKey(ctx, sdk.GetKeysSDK(), ent) case *configurationv1alpha1.KongKeySet: + entityType = EntityTypeKeySet err = deleteKeySet(ctx, sdk.GetKeySetsSDK(), ent) case *configurationv1alpha1.KongSNI: + entityType = EntityTypeSNI err = deleteSNI(ctx, sdk.GetSNIsSDK(), ent) case *configurationv1alpha1.KongDataPlaneClientCertificate: + entityType = EntityTypeDataPlaneCertificate err = deleteKongDataPlaneClientCertificate(ctx, sdk.GetDataPlaneCertificatesSDK(), ent) // --------------------------------------------------------------------- // TODO: add other Konnect types @@ -245,6 +336,24 @@ func Delete[ return fmt.Errorf("unsupported entity type %T", ent) } + if err != nil { + var errSDK sdkkonnecterrs.SDKError + if errors.As(err, &errSDK) { + statusCode = errSDK.StatusCode + } + metricRecorder.RecordKonnectEntityOperationFailure( + metrics.KonnectEntityOperationDelete, + string(entityType), + time.Since(start), + statusCode, + ) + } else { + metricRecorder.RecordKonnectEntityOperationSuccess( + metrics.KonnectEntityOperationDelete, + string(entityType), + time.Since(start), + ) + } logOpComplete(ctx, start, DeleteOp, ent, err) return err @@ -297,6 +406,7 @@ func Update[ sdk sdkops.SDKWrapper, syncPeriod time.Duration, cl client.Client, + metricRecorder metrics.Recorder, e TEnt, ) (ctrl.Result, error) { now := time.Now() @@ -312,45 +422,70 @@ func Update[ ) } - var err error + var ( + err error + + entityType EntityTypeName + statusCode int + start = time.Now() + ) switch ent := any(e).(type) { case *konnectv1alpha1.KonnectGatewayControlPlane: + entityType = EntityTypeControlPlane err = updateControlPlane(ctx, sdk.GetControlPlaneSDK(), sdk.GetControlPlaneGroupSDK(), cl, ent) case *configurationv1alpha1.KongService: + entityType = EntityTypeService err = updateService(ctx, sdk.GetServicesSDK(), ent) case *configurationv1alpha1.KongRoute: + entityType = EntityTypeRoute err = updateRoute(ctx, sdk.GetRoutesSDK(), ent) case *configurationv1.KongConsumer: + entityType = EntityTypeConsumer err = updateConsumer(ctx, sdk.GetConsumersSDK(), sdk.GetConsumerGroupsSDK(), cl, ent) case *configurationv1beta1.KongConsumerGroup: + entityType = EntityTypeConsumerGroup err = updateConsumerGroup(ctx, sdk.GetConsumerGroupsSDK(), ent) case *configurationv1alpha1.KongPluginBinding: + entityType = EntityTypePlugin err = updatePlugin(ctx, sdk.GetPluginSDK(), cl, ent) case *configurationv1alpha1.KongUpstream: + entityType = EntityTypeUpstream err = updateUpstream(ctx, sdk.GetUpstreamsSDK(), ent) case *configurationv1alpha1.KongCredentialBasicAuth: + entityType = EntityTypeBasicAuthCredential err = updateKongCredentialBasicAuth(ctx, sdk.GetBasicAuthCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialAPIKey: + entityType = EntityTypeAPIKeyCredential err = updateKongCredentialAPIKey(ctx, sdk.GetAPIKeyCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialACL: + entityType = EntityTypeACLCredential err = updateKongCredentialACL(ctx, sdk.GetACLCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialJWT: + entityType = EntityTypeJWTCredential err = updateKongCredentialJWT(ctx, sdk.GetJWTCredentialsSDK(), ent) case *configurationv1alpha1.KongCredentialHMAC: + entityType = EntityTypeJWTCredential err = updateKongCredentialHMAC(ctx, sdk.GetHMACCredentialsSDK(), ent) case *configurationv1alpha1.KongCACertificate: + entityType = EntityTypeCACertificate err = updateCACertificate(ctx, sdk.GetCACertificatesSDK(), ent) case *configurationv1alpha1.KongCertificate: + entityType = EntityTypeCertificate err = updateCertificate(ctx, sdk.GetCertificatesSDK(), ent) case *configurationv1alpha1.KongTarget: + entityType = EntityTypeTarget err = updateTarget(ctx, sdk.GetTargetsSDK(), ent) case *configurationv1alpha1.KongVault: + entityType = EntityTypeVault err = updateVault(ctx, sdk.GetVaultSDK(), ent) case *configurationv1alpha1.KongKey: + entityType = EntityTypeKey err = updateKey(ctx, sdk.GetKeysSDK(), ent) case *configurationv1alpha1.KongKeySet: + entityType = EntityTypeKeySet err = updateKeySet(ctx, sdk.GetKeySetsSDK(), ent) case *configurationv1alpha1.KongSNI: + entityType = EntityTypeSNI err = updateSNI(ctx, sdk.GetSNIsSDK(), ent) case *configurationv1alpha1.KongDataPlaneClientCertificate: err = nil // DataPlaneCertificates are immutable. @@ -367,6 +502,7 @@ func Update[ ) switch { case errors.As(err, &errSDK): + statusCode = errSDK.StatusCode SetKonnectEntityProgrammedConditionFalse(e, consts.KonnectEntitiesFailedToUpdateReason, errSDK.Body) case errors.As(err, &errRelationsFailed): e.SetKonnectID(errRelationsFailed.KonnectID) @@ -377,7 +513,21 @@ func Update[ SetKonnectEntityProgrammedCondition(e) } - logOpComplete(ctx, now, UpdateOp, e, err) + if err != nil { + metricRecorder.RecordKonnectEntityOperationFailure( + metrics.KonnectEnttiyOperationUpdate, + string(entityType), + time.Since(start), + statusCode, + ) + } else { + metricRecorder.RecordKonnectEntityOperationSuccess( + metrics.KonnectEnttiyOperationUpdate, + string(entityType), + time.Since(start), + ) + } + logOpComplete(ctx, start, UpdateOp, e, err) return ctrl.Result{}, IgnoreUnrecoverableAPIErr(err, loggerForEntity(ctx, e, UpdateOp)) } diff --git a/controller/konnect/ops/ops_controlplane_test.go b/controller/konnect/ops/ops_controlplane_test.go index 1006a194..406cfeca 100644 --- a/controller/konnect/ops/ops_controlplane_test.go +++ b/controller/konnect/ops/ops_controlplane_test.go @@ -21,6 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" sdkmocks "github.com/kong/gateway-operator/controller/konnect/ops/sdk/mocks" + "github.com/kong/gateway-operator/internal/metrics" "github.com/kong/gateway-operator/modules/manager/scheme" "github.com/kong/gateway-operator/pkg/consts" @@ -619,7 +620,7 @@ func TestCreateAndUpdateControlPlane_KubernetesMetadataConsistency(t *testing.T) ID: "12345", }, }, nil) - _, err := Create(ctx, sdk.SDK, fakeClient, cp) + _, err := Create(ctx, sdk.SDK, fakeClient, &metrics.MockRecorder{}, cp) require.NoError(t, err) t.Log("Triggering UpdateControlPlane with expected labels") @@ -633,7 +634,7 @@ func TestCreateAndUpdateControlPlane_KubernetesMetadataConsistency(t *testing.T) ID: "12345", }, }, nil) - _, err = Update(ctx, sdk.SDK, 0, fakeClient, cp) + _, err = Update(ctx, sdk.SDK, 0, fakeClient, &metrics.MockRecorder{}, cp) require.NoError(t, err) } diff --git a/controller/konnect/ops/ops_test.go b/controller/konnect/ops/ops_test.go index 058003d4..4b36318b 100644 --- a/controller/konnect/ops/ops_test.go +++ b/controller/konnect/ops/ops_test.go @@ -17,6 +17,7 @@ import ( "github.com/kong/gateway-operator/controller/konnect/constraints" sdkmocks "github.com/kong/gateway-operator/controller/konnect/ops/sdk/mocks" + "github.com/kong/gateway-operator/internal/metrics" "github.com/kong/gateway-operator/modules/manager/scheme" "github.com/kong/gateway-operator/pkg/consts" @@ -237,7 +238,7 @@ func testCreate[ sdk = tc.sdkFunc(t, sdk) } - _, err := Create(context.Background(), sdk, fakeClient, tc.entity) + _, err := Create(context.Background(), sdk, fakeClient, &metrics.MockRecorder{}, tc.entity) if tc.expectedErrorContains != "" { require.ErrorContains(t, err, tc.expectedErrorContains) } else { @@ -345,7 +346,7 @@ func testDelete[ sdk = tc.sdkFunc(t, sdk) } - err := Delete(context.Background(), sdk, fakeClient, tc.entity) + err := Delete(context.Background(), sdk, fakeClient, &metrics.MockRecorder{}, tc.entity) if tc.expectedError != "" { require.ErrorContains(t, err, tc.expectedError) return diff --git a/controller/konnect/reconciler_generic.go b/controller/konnect/reconciler_generic.go index 71afcda8..59269dfd 100644 --- a/controller/konnect/reconciler_generic.go +++ b/controller/konnect/reconciler_generic.go @@ -22,6 +22,7 @@ import ( sdkops "github.com/kong/gateway-operator/controller/konnect/ops/sdk" "github.com/kong/gateway-operator/controller/pkg/log" "github.com/kong/gateway-operator/controller/pkg/patch" + "github.com/kong/gateway-operator/internal/metrics" "github.com/kong/gateway-operator/pkg/consts" k8sutils "github.com/kong/gateway-operator/pkg/utils/kubernetes" @@ -45,6 +46,8 @@ type KonnectEntityReconciler[T constraints.SupportedKonnectEntityType, TEnt cons Client client.Client SyncPeriod time.Duration MaxConcurrentReconciles uint + + MetricRecoder metrics.Recorder } // KonnectEntityReconcilerOption is a functional option for the KonnectEntityReconciler. @@ -71,6 +74,15 @@ func WithKonnectMaxConcurrentReconciles[T constraints.SupportedKonnectEntityType } } +// WithMetricRecoder sets the metric recorder to record metrics of Konnect entity operations of the reconciler. +func WithMetricRecorder[T constraints.SupportedKonnectEntityType, TEnt constraints.EntityType[T]]( + metricRecorder metrics.Recorder, +) KonnectEntityReconcilerOption[T, TEnt] { + return func(r *KonnectEntityReconciler[T, TEnt]) { + r.MetricRecoder = metricRecorder + } +} + // NewKonnectEntityReconciler returns a new KonnectEntityReconciler for the given // Konnect entity type. func NewKonnectEntityReconciler[ @@ -88,6 +100,7 @@ func NewKonnectEntityReconciler[ Client: client, SyncPeriod: consts.DefaultKonnectSyncPeriod, MaxConcurrentReconciles: consts.DefaultKonnectMaxConcurrentReconciles, + MetricRecoder: &metrics.MockRecorder{}, } for _, opt := range opts { opt(r) @@ -469,7 +482,7 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile( } if controllerutil.RemoveFinalizer(ent, KonnectCleanupFinalizer) { - if err := ops.Delete[T, TEnt](ctx, sdk, r.Client, ent); err != nil { + if err := ops.Delete[T, TEnt](ctx, sdk, r.Client, r.MetricRecoder, ent); err != nil { if res, errStatus := patch.StatusWithCondition( ctx, r.Client, ent, konnectv1alpha1.KonnectEntityProgrammedConditionType, @@ -500,7 +513,7 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile( // https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/controller_utils.go if status := ent.GetKonnectStatus(); status == nil || status.GetKonnectID() == "" { obj := ent.DeepCopyObject().(client.Object) - _, err := ops.Create[T, TEnt](ctx, sdk, r.Client, ent) + _, err := ops.Create[T, TEnt](ctx, sdk, r.Client, r.MetricRecoder, ent) // TODO: this is actually not 100% error prone because when status // update fails we don't store the Konnect ID and hence the reconciler @@ -557,7 +570,7 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile( return ctrl.Result{}, nil } - if res, err := ops.Update[T, TEnt](ctx, sdk, r.SyncPeriod, r.Client, ent); err != nil { + if res, err := ops.Update[T, TEnt](ctx, sdk, r.SyncPeriod, r.Client, r.MetricRecoder, ent); err != nil { setServerURLAndOrgID(ent, serverURL, apiAuth.Status.OrganizationID) if errUpd := r.Client.Status().Update(ctx, ent); errUpd != nil { if k8serrors.IsConflict(errUpd) { diff --git a/internal/metrics/prometheus.go b/internal/metrics/prometheus.go new file mode 100644 index 00000000..c4ea9e2f --- /dev/null +++ b/internal/metrics/prometheus.go @@ -0,0 +1,159 @@ +package metrics + +import ( + "fmt" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// Recorder is the interface fir recording metrics on a certain operation. +type Recorder interface { + // REVIEW: add URL of Konnect API as a label? + RecordKonnectEntityOperationSuccess(operationType KonnectEntityOperation, entityType string, duration time.Duration) + RecordKonnectEntityOperationFailure(operationType KonnectEntityOperation, entityType string, duration time.Duration, statusCode int) +} + +type KonnectEntityOperation string + +const ( + KonnectEntityOperationTypeKey = "operation_type" + KonnectEntityOperationCreate KonnectEntityOperation = "create" + KonnectEnttiyOperationUpdate KonnectEntityOperation = "update" + KonnectEntityOperationDelete KonnectEntityOperation = "delete" + + KonnectEntityTypeKey = "entity_type" + + SuccessKey = "success" + SuccessTrue = "true" + SuccessFalse = "false" + + StatusCodeKey = "status_code" +) + +// metric names for konnect entity operations. +const ( + // REVIEW: define a Namespace `gateway_operator` for creating prometheus metrics here? + MetricNameKonnectEntityOperationCount = "gateway_operator_konnect_entity_operation_count" + MetricNameKonnectEntityOperationDuration = "gateway_operator_konnect_entity_operation_duration" +) + +var ( + konnectEntityOperationCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: MetricNameKonnectEntityOperationCount, + Help: fmt.Sprintf( + "Count of successful/failed entity operations in Konnect. "+ + "`%s` describes the operation type (`%s`, `%s`, or `%s`)."+ + "`%s` describes the type of the operated entity. "+ + "`%s` describes whether the operation is successful (`%s`) or not (`%s`). "+ + "`%s` is populated in case of `%s=\"%s\"` and describes the status code returned from Konnect API.", + KonnectEntityOperationTypeKey, KonnectEntityOperationCreate, KonnectEnttiyOperationUpdate, KonnectEntityOperationDelete, + KonnectEntityTypeKey, + SuccessKey, SuccessTrue, SuccessFalse, + StatusCodeKey, SuccessKey, SuccessFalse, + ), + }, + []string{KonnectEntityOperationTypeKey, KonnectEntityTypeKey, SuccessKey, StatusCodeKey}, + ) + + konnectEntityOperationDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: MetricNameKonnectEntityOperationDuration, + Help: fmt.Sprintf( + "How long did the Konnect entity operation take in seconds. "+ + "`%s` describes the operation type (`%s`, `%s`, or `%s`)."+ + "`%s` describes the type of the operated entity. "+ + "`%s` describes whether the operation is successful (`%s`) or not (`%s`). "+ + "`%s` is populated in case of `%s=\"%s\"` and describes the status code returned from Konnect API.", + KonnectEntityOperationTypeKey, KonnectEntityOperationCreate, KonnectEnttiyOperationUpdate, KonnectEntityOperationDelete, + KonnectEntityTypeKey, + SuccessKey, SuccessTrue, SuccessFalse, + StatusCodeKey, SuccessKey, SuccessFalse, + ), + // Duration range from 0.1s to 10min (600s). + Buckets: prometheus.ExponentialBucketsRange(0.1, 600, 20), + }, + []string{KonnectEntityOperationTypeKey, KonnectEntityTypeKey, SuccessKey, StatusCodeKey}, + ) +) + +// GlobalCtrlRuntimeMetricsRecorder is a metrics recorder that uses a global Prometheus registry +// provided by the controller-runtime. Any instance of it will record metrics to the same registry. +// +// We want to expose Gateway operator's custom metrics on the same endpoint as controller-runtime's built-in +// ones. Because of that, we have to use its global registry as CR doesn't allow injecting a custom one. +// Upstream issue regarding this: https://github.com/kubernetes-sigs/controller-runtime/issues/210. +type GlobalCtrlRuntimeMetricsRecorder struct{} + +var _ Recorder = &GlobalCtrlRuntimeMetricsRecorder{} + +func NewGlobalCtrlRuntimeMetricsRecorder() *GlobalCtrlRuntimeMetricsRecorder { + return &GlobalCtrlRuntimeMetricsRecorder{} +} + +func (r *GlobalCtrlRuntimeMetricsRecorder) RecordKonnectEntityOperationSuccess( + operationType KonnectEntityOperation, entityType string, duration time.Duration) { + r.recordKonnectEntityOperationCount(operationType, entityType, true, 0) + r.recordKonnectEntityOperationDuration(operationType, entityType, true, 0, duration) +} + +func (r *GlobalCtrlRuntimeMetricsRecorder) RecordKonnectEntityOperationFailure( + operationType KonnectEntityOperation, entityType string, duration time.Duration, statusCode int) { + r.recordKonnectEntityOperationCount(operationType, entityType, false, statusCode) + r.recordKonnectEntityOperationDuration(operationType, entityType, false, statusCode, duration) +} + +func (r *GlobalCtrlRuntimeMetricsRecorder) recordKonnectEntityOperationCount( + operationType KonnectEntityOperation, entityType string, success bool, statusCode int, +) { + labels := konnectEntityOperationLabels(operationType, entityType, success, statusCode) + konnectEntityOperationCount.With(labels).Inc() +} + +func (r *GlobalCtrlRuntimeMetricsRecorder) recordKonnectEntityOperationDuration( + operationType KonnectEntityOperation, entityType string, success bool, statusCode int, duration time.Duration, +) { + labels := konnectEntityOperationLabels(operationType, entityType, success, statusCode) + konnectEntityOperationDuration.With(labels).Observe(duration.Seconds()) +} + +func konnectEntityOperationLabels(operationType KonnectEntityOperation, entityType string, success bool, statusCode int, +) prometheus.Labels { + labels := prometheus.Labels{ + KonnectEntityOperationTypeKey: string(operationType), + KonnectEntityTypeKey: entityType, + } + if success { + labels[SuccessKey] = SuccessTrue + labels[StatusCodeKey] = "" + } else { + labels[SuccessKey] = SuccessFalse + labels[StatusCodeKey] = strconv.Itoa(statusCode) + } + return labels +} + +func init() { + allMetrics := []prometheus.Collector{ + konnectEntityOperationCount, + konnectEntityOperationDuration, + } + for _, m := range allMetrics { + ctrlmetrics.Registry.MustRegister(m) + } +} + +type MockRecorder struct{} + +var _ Recorder = &MockRecorder{} + +func (m *MockRecorder) RecordKonnectEntityOperationSuccess( + operationType KonnectEntityOperation, entityType string, duration time.Duration) { +} + +func (m *MockRecorder) RecordKonnectEntityOperationFailure( + operationType KonnectEntityOperation, entityType string, duration time.Duration, statusCode int) { +} diff --git a/modules/manager/controller_setup.go b/modules/manager/controller_setup.go index 10b33ed2..3cda88b9 100644 --- a/modules/manager/controller_setup.go +++ b/modules/manager/controller_setup.go @@ -27,6 +27,7 @@ import ( sdkops "github.com/kong/gateway-operator/controller/konnect/ops/sdk" "github.com/kong/gateway-operator/controller/pkg/log" "github.com/kong/gateway-operator/controller/specialized" + "github.com/kong/gateway-operator/internal/metrics" "github.com/kong/gateway-operator/internal/utils/index" dataplanevalidator "github.com/kong/gateway-operator/internal/validation/dataplane" "github.com/kong/gateway-operator/pkg/consts" @@ -508,6 +509,9 @@ func SetupControllers(mgr manager.Manager, c *Config) (map[string]ControllerDef, return nil, err } + // REVIEW: Should we define the recorder here, or define it out of the section to allow setting custom metrics in other controllers? + metricRecorder := metrics.NewGlobalCtrlRuntimeMetricsRecorder() + sdkFactory := sdkops.NewSDKFactory() controllerFactory := konnectControllerFactory{ sdkFactory: sdkFactory, @@ -515,6 +519,7 @@ func SetupControllers(mgr manager.Manager, c *Config) (map[string]ControllerDef, client: mgr.GetClient(), syncPeriod: c.KonnectSyncPeriod, maxConcurrentReconciles: c.KonnectMaxConcurrentReconciles, + metricRecorder: metricRecorder, } konnectControllers := map[string]ControllerDef{ @@ -696,6 +701,7 @@ type konnectControllerFactory struct { client client.Client syncPeriod time.Duration maxConcurrentReconciles uint + metricRecorder metrics.Recorder } func newKonnectController[ @@ -710,6 +716,7 @@ func newKonnectController[ f.client, konnect.WithKonnectEntitySyncPeriod[T, TEnt](f.syncPeriod), konnect.WithKonnectMaxConcurrentReconciles[T, TEnt](f.maxConcurrentReconciles), + konnect.WithMetricRecorder[T, TEnt](f.metricRecorder), ), } }