From 843a8d1323738cdf98c3526bac3a6cd78de25adc Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:23:44 +0200 Subject: [PATCH 01/32] init service_go Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 276 ++++++++++++++++++ ...bservatorium-metrics-compact-template.yaml | 276 ++++++++++++++++++ services_go/instances/rhobs/rhobs.go | 22 ++ services_go/observatorium/metrics.go | 116 ++++++++ services_go/observatorium/observatorium.go | 69 +++++ services_go/services.go | 15 + 6 files changed, 774 insertions(+) create mode 100755 resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml create mode 100755 resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml create mode 100644 services_go/instances/rhobs/rhobs.go create mode 100644 services_go/observatorium/metrics.go create mode 100644 services_go/observatorium/observatorium.go create mode 100644 services_go/services.go diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml new file mode 100755 index 0000000000..665299282c --- /dev/null +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -0,0 +1,276 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-metrics-compact +objects: +- apiVersion: v1 + data: + session_secret: c2VjcmV0 + kind: Secret + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: compact-proxy + namespace: rhobs +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + status: + loadBalancer: {} +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + endpoints: + - bearerTokenSecret: + key: "" + port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - openshift-monitoring + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + serviceName: observatorium-thanos-compact + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + namespaces: + - observatorium + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=warn + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhobs-thanos-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhobs-thanos-s3 + - name: OBJSTORE_CONFIG + valueFrom: + secretKeyRef: + key: thanos.yaml + name: rhobs-thanos-objectstorage + image: quay.io/thanos/thanos:v0.32.3 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + - mountPath: /etc/proxy/secrets + name: compact-proxy + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls + - name: compact-proxy + secret: + secretName: compact-proxy + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 + status: {} + status: + availableReplicas: 0 + replicas: 0 diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml new file mode 100755 index 0000000000..665299282c --- /dev/null +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -0,0 +1,276 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-metrics-compact +objects: +- apiVersion: v1 + data: + session_secret: c2VjcmV0 + kind: Secret + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: compact-proxy + namespace: rhobs +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + status: + loadBalancer: {} +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + endpoints: + - bearerTokenSecret: + key: "" + port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - openshift-monitoring + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-compact + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + serviceName: observatorium-thanos-compact + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + namespaces: + - observatorium + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=warn + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhobs-thanos-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhobs-thanos-s3 + - name: OBJSTORE_CONFIG + valueFrom: + secretKeyRef: + key: thanos.yaml + name: rhobs-thanos-objectstorage + image: quay.io/thanos/thanos:v0.32.3 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + - mountPath: /etc/proxy/secrets + name: compact-proxy + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls + - name: compact-proxy + secret: + secretName: compact-proxy + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 + status: {} + status: + availableReplicas: 0 + replicas: 0 diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go new file mode 100644 index 0000000000..31a91128a8 --- /dev/null +++ b/services_go/instances/rhobs/rhobs.go @@ -0,0 +1,22 @@ +package rhobs + +import ( + "github.com/rhobs/configuration/services_go/observatorium" +) + +func ClusterConfigs() []observatorium.InstanceConfiguration { + return []observatorium.InstanceConfiguration{ + { + Cluster: "app-sre-stage-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + }, + { + Cluster: "telemeter-prod-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + }, + } +} diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go new file mode 100644 index 0000000000..1736f5a659 --- /dev/null +++ b/services_go/observatorium/metrics.go @@ -0,0 +1,116 @@ +package observatorium + +import ( + "fmt" + "time" + + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/k8sutil" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/runtime" +) + +func makeCompactor(namespace string) *compactor.CompactorStatefulSet { + // K8s config + compactorSatefulset := compactor.NewCompactor() + compactorSatefulset.Image = thanosImage + compactorSatefulset.ImageTag = thanosImageTag + compactorSatefulset.Namespace = namespace + compactorSatefulset.Replicas = 1 + delete(compactorSatefulset.PodResources.Limits, corev1.ResourceCPU) // To be confirmed + compactorSatefulset.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") + compactorSatefulset.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("1Gi") + compactorSatefulset.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("5Gi") + compactorSatefulset.VolumeType = "gp2" + compactorSatefulset.VolumeSize = "500Gi" + compactorSatefulset.Env = []corev1.EnvVar{ + k8sutil.NewEnvFromSecret("AWS_ACCESS_KEY_ID", "rhobs-thanos-s3", "aws_access_key_id"), + k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", "rhobs-thanos-s3", "aws_secret_access_key"), + k8sutil.NewEnvFromSecret("OBJSTORE_CONFIG", "rhobs-thanos-objectstorage", "thanos.yaml"), + } + tlsSecret := "compact-tls" + compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} + // compactorSatefulset.PostProcess = []k8sutil.ObjectProcessor{ + // serviceCertAnnotationModifier(tlsSecret), + // } + + // Compactor config + compactorSatefulset.Options.LogLevel = "warn" + compactorSatefulset.Options.RetentionResolutionRaw = 365 * 24 * time.Hour + compactorSatefulset.Options.RetentionResolution5m = 365 * 24 * time.Hour + compactorSatefulset.Options.RetentionResolution1h = 365 * 24 * time.Hour + compactorSatefulset.Options.DeleteDelay = 24 * time.Hour + compactorSatefulset.Options.CompactConcurrency = 1 + compactorSatefulset.Options.DownsampleConcurrency = 1 + compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" + compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") + + return compactorSatefulset +} + +func serviceCertAnnotationModifier(secretName string) func(object runtime.Object) { + return func(object runtime.Object) { + if service, ok := object.(*corev1.Service); ok { + service.ObjectMeta.Annotations["service.beta.openshift.io/serving-cert-secret-name"] = secretName + } + } +} + +func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { + proxyPort := int32(8443) + + return &k8sutil.Container{ + Name: "oauth-proxy", + Image: "quay.io/openshift/origin-oauth-proxy", + ImageTag: "v4.8.0", + Args: []string{ + "-provider=openshift", + fmt.Sprintf("-https-address=:%d", proxyPort), + "-http-address=", + "-email-domain=*", + fmt.Sprintf("-upstream=http://localhost:%d", upstreamPort), + fmt.Sprintf("-openshift-service-account=%s", serviceAccount), + fmt.Sprintf(`-openshift-sar={"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}`, namespace, namespace), + fmt.Sprintf(`-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}}`, namespace, namespace), + "-tls-cert=/etc/tls/private/tls.crt", + "-tls-key=/etc/tls/private/tls.key", + "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token", + "-cookie-secret-file=/etc/proxy/secrets/session_secret", + "-openshift-ca=/etc/pki/tls/cert.pem", + "-openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + }, + Resources: k8sutil.NewResourcesRequirements("100m", "200m", "100Mi", "200Mi"), + Ports: []corev1.ContainerPort{ + { + Name: "https", + ContainerPort: proxyPort, + Protocol: corev1.ProtocolTCP, + }, + }, + ServicePorts: []corev1.ServicePort{ + k8sutil.NewServicePort("https", int(proxyPort), int(proxyPort)), + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "compact-tls", + MountPath: "/etc/tls/private", + ReadOnly: true, + }, + { + Name: "compact-proxy", + MountPath: "/etc/proxy/secrets", + ReadOnly: true, + }, + }, + Volumes: []corev1.Volume{ + k8sutil.NewPodVolumeFromSecret("compact-tls", tlsSecret), + k8sutil.NewPodVolumeFromSecret("compact-proxy", "compact-proxy"), + }, + Secrets: map[string]map[string][]byte{ + "compact-proxy": { + "session_secret": []byte("secret"), + }, + }, + } +} diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go new file mode 100644 index 0000000000..7411af799d --- /dev/null +++ b/services_go/observatorium/observatorium.go @@ -0,0 +1,69 @@ +package observatorium + +// import "github.com/rhobs/configuration/services_go/components/thanos/compactor" + +import ( + "github.com/bwplotka/mimic" + "github.com/bwplotka/mimic/encoding" + "github.com/observatorium/api/rbac" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/k8sutil" + "github.com/observatorium/observatorium/configuration_go/openshift" + templatev1 "github.com/openshift/api/template/v1" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + thanosImage = "quay.io/thanos/thanos" + thanosImageTag = "v0.32.3" + monitoringNamespace = "openshift-monitoring" +) + +type TenantInstanceConfiguration struct { + IngestRateLimit []struct{} + QueryRateLimit []struct{} + IngestHardTenant bool + Authorizers map[string]rbac.Authorizer + // Tenant *obs_api.tenant +} + +type InstanceConfiguration struct { + Cluster string + Instance string + Namespace string + Tenants []TenantInstanceConfiguration +} + +type Observatorium struct { + Cfg *InstanceConfiguration + Compactor *compactor.CompactorStatefulSet +} + +func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { + + return &Observatorium{ + Cfg: cfg, + Compactor: makeCompactor(cfg.Namespace), + } +} + +func (o *Observatorium) Manifests(generator *mimic.Generator) { + compactorManifests := o.Compactor.Manifests() + postProcessManifests(compactorManifests) + + commonTemplateMeta := metav1.ObjectMeta{ + Name: "observatorium-metrics-compact", + } + compactorTemplate := openshift.WrapInTemplate("", compactorManifests, commonTemplateMeta, []templatev1.Parameter{}) + generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add("observatorium-metrics-compact-template.yaml", encoding.GhodssYAML(compactorTemplate[""])) + +} + +func postProcessManifests(manifests k8sutil.ObjectMap) { + for _, manifest := range manifests { + if serviceMonitor, ok := manifest.(*monv1.ServiceMonitor); ok { + serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{monitoringNamespace} + } + } +} diff --git a/services_go/services.go b/services_go/services.go new file mode 100644 index 0000000000..387cd6d41c --- /dev/null +++ b/services_go/services.go @@ -0,0 +1,15 @@ +package services + +import ( + "github.com/bwplotka/mimic" + "github.com/rhobs/configuration/services_go/instances/rhobs" + "github.com/rhobs/configuration/services_go/observatorium" +) + +func Generate(gen *mimic.Generator) { + rhobsConfigs := rhobs.ClusterConfigs() + for _, cfg := range rhobsConfigs { + observatorium := observatorium.NewObservatorium(&cfg) + observatorium.Manifests(gen) + } +} From 4447792456f1b4d601d5aca3c9c019bdd57370f6 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:24:16 +0200 Subject: [PATCH 02/32] wip Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 6 +++++- go.sum | 14 +++++++++----- mimic.go | 9 ++++++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 08e79ed61a..f6e67bace1 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/rhobs/configuration -go 1.19 +go 1.21 + +toolchain go1.21.1 require ( github.com/bwplotka/mimic v0.1.1-0.20220621130344-a6338e3b8238 @@ -78,3 +80,5 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.3.0 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) + +replace github.com/observatorium/observatorium => ../observatorium diff --git a/go.sum b/go.sum index 203a080e46..ebeac84660 100644 --- a/go.sum +++ b/go.sum @@ -171,8 +171,9 @@ github.com/aws/aws-sdk-go v1.30.12/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.40.11/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q= -github.com/aws/aws-sdk-go v1.42.31 h1:tSv/YzjrFlbSqWmov9quBxrSNXLPUjJI7nPEB57S1+M= github.com/aws/aws-sdk-go v1.42.31/go.mod h1:OGr6lGMAKGlG9CVrYnWYDKIyb829c6EVBRjxqjmPepc= +github.com/aws/aws-sdk-go v1.44.245 h1:KtY2s4q31/kn33AdV63R5t77mdxsI7rq3YT7Mgo805M= +github.com/aws/aws-sdk-go v1.44.245/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/benbjohnson/immutable v0.2.1/go.mod h1:uc6OHo6PN2++n98KHLxW8ef4W42ylHiQSENghE1ezxI= github.com/benbjohnson/tmpl v1.0.0/go.mod h1:igT620JFIi44B6awvU9IsDhR77IXWtFigTLil/RPdps= @@ -195,8 +196,8 @@ github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7 github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50= github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= -github.com/bwplotka/mimic v0.1.1-0.20220621130344-a6338e3b8238 h1:Fp7YvZafMgDnF/xGmczsMKzqlZvPFS05BJuC0AOqNoQ= -github.com/bwplotka/mimic v0.1.1-0.20220621130344-a6338e3b8238/go.mod h1:TT/FO4KJ2iOjxaBxrHmhGawOOgVGSMupSiiEgBQZpxE= +github.com/bwplotka/mimic v0.2.1-0.20230303101552-f705cca2f4a4 h1:z6ej4tVVkGgRXpdGB/p0qh1slebb/yI5TTYl3EFf4tw= +github.com/bwplotka/mimic v0.2.1-0.20230303101552-f705cca2f4a4/go.mod h1:TT/FO4KJ2iOjxaBxrHmhGawOOgVGSMupSiiEgBQZpxE= github.com/c-bata/go-prompt v0.2.2/go.mod h1:VzqtzE2ksDBcdln8G7mk2RX9QyGjH+OVqOCSiVIqS34= github.com/cactus/go-statsd-client/statsd v0.0.0-20191106001114-12b4e2b38748/go.mod h1:l/bIBLeOl9eX+wxJAzxS4TveKRtAqlyDpHjhkfO0MEI= github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= @@ -381,6 +382,7 @@ github.com/eclipse/paho.mqtt.golang v1.2.0/go.mod h1:H9keYFcgq3Qr5OUJm/JZI/i6U7j github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b h1:ZHiD4/yE4idlbqvAO6iYCOYRzOMRpxkW+FKasRA3tsQ= +github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b/go.mod h1:OmVcnJopJL8d3X3sSXTiypGoUSgFq1aDGmlrdi9dn/M= github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= @@ -703,6 +705,8 @@ github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= +github.com/google/pprof v0.0.0-20230406165453-00490a63f317 h1:hFhpt7CTmR3DX+b4R19ydQFtofxT0Sv3QsKNMVQYTMQ= +github.com/google/pprof v0.0.0-20230406165453-00490a63f317/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -1039,6 +1043,8 @@ github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mo github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE= github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= +github.com/openshift/api v3.9.0+incompatible h1:fJ/KsefYuZAjmrr3+5U9yZIZbTOpVkDDLDLFresAeYs= +github.com/openshift/api v3.9.0+incompatible/go.mod h1:dh9o4Fs58gpFXGSYfnVxGR9PnV53I8TW84pQaJDdGiY= github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= github.com/opentracing-contrib/go-stdlib v0.0.0-20190519235532-cf7a6c988dc9/go.mod h1:PLldrQSroqzH70Xl+1DQcGnefIbqsKR7UDaiux3zV+w= github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= @@ -1221,8 +1227,6 @@ github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= diff --git a/mimic.go b/mimic.go index 21ae4adaef..3f5a3ff3e0 100644 --- a/mimic.go +++ b/mimic.go @@ -2,7 +2,7 @@ package main import ( "github.com/bwplotka/mimic" - cfgobservatorium "github.com/rhobs/configuration/configuration/observatorium" + services "github.com/rhobs/configuration/services_go" ) func main() { @@ -10,7 +10,10 @@ func main() { defer gen.Generate() - cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules")) + // cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules")) + + // cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) + + services.Generate(gen.With("services_go")) - cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) } From 16af0ee92d4011ab6efd280942ca570060fe89e0 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:44:35 +0200 Subject: [PATCH 03/32] add post processing Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 9 +- .../observatorium-metrics-store-template.yaml | 281 ++++++++++++++++++ ...bservatorium-metrics-compact-template.yaml | 9 +- .../observatorium-metrics-store-template.yaml | 281 ++++++++++++++++++ services_go/observatorium/metrics.go | 57 +++- services_go/observatorium/observatorium.go | 48 ++- services_go/observatorium/postprocess.go | 34 +++ 7 files changed, 687 insertions(+), 32 deletions(-) create mode 100755 resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml create mode 100755 resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml create mode 100644 services_go/observatorium/postprocess.go diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 665299282c..f4f8936f2d 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -70,7 +70,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 name: observatorium-thanos-compact - namespace: rhobs + namespace: openshift-monitoring spec: endpoints: - bearerTokenSecret: @@ -85,7 +85,7 @@ objects: targetLabel: instance namespaceSelector: matchNames: - - openshift-monitoring + - rhobs selector: matchLabels: app.kubernetes.io/component: database-compactor @@ -190,7 +190,7 @@ objects: readinessProbe: failureThreshold: 20 httpGet: - path: /-/healthy + path: /-/ready port: 10902 periodSeconds: 5 resources: @@ -200,6 +200,9 @@ objects: cpu: 200m memory: 1Gi terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data - args: - -provider=openshift - -https-address=:8443 diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml new file mode 100755 index 0000000000..c432022535 --- /dev/null +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -0,0 +1,281 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-metrics-store +objects: +- apiVersion: v1 + data: + session_secret: c2VjcmV0 + kind: Secret + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: compact-proxy + namespace: rhobs +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: compact-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + status: + loadBalancer: {} +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: openshift-monitoring + spec: + endpoints: + - bearerTokenSecret: + key: "" + port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + serviceName: observatorium-thanos-store + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + namespaces: + - observatorium + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --chunk-pool-size=1TiB1016GiB + - --data-dir=/var/thanos/store + - --http-address=0.0.0.0:10902 + - --ignore-deletion-marks-delay=24h0m0s + - | + --index-cache.config=type: REDIS + config: + addr: rhobs-redis.rhobs.svc.cluster.local:6379 + - --log.format=logfmt + - --log.level=warn + - --max-time=8760h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhobs-thanos-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhobs-thanos-s3 + - name: OBJSTORE_CONFIG + valueFrom: + secretKeyRef: + key: thanos.yaml + name: rhobs-thanos-objectstorage + image: quay.io/thanos/thanos:v0.32.3 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-store + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + - mountPath: /etc/proxy/secrets + name: compact-proxy + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls + - name: compact-proxy + secret: + secretName: compact-proxy + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 + status: {} + status: + availableReplicas: 0 + replicas: 0 diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 665299282c..f4f8936f2d 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -70,7 +70,7 @@ objects: app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 name: observatorium-thanos-compact - namespace: rhobs + namespace: openshift-monitoring spec: endpoints: - bearerTokenSecret: @@ -85,7 +85,7 @@ objects: targetLabel: instance namespaceSelector: matchNames: - - openshift-monitoring + - rhobs selector: matchLabels: app.kubernetes.io/component: database-compactor @@ -190,7 +190,7 @@ objects: readinessProbe: failureThreshold: 20 httpGet: - path: /-/healthy + path: /-/ready port: 10902 periodSeconds: 5 resources: @@ -200,6 +200,9 @@ objects: cpu: 200m memory: 1Gi terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data - args: - -provider=openshift - -https-address=:8443 diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml new file mode 100755 index 0000000000..c432022535 --- /dev/null +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -0,0 +1,281 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-metrics-store +objects: +- apiVersion: v1 + data: + session_secret: c2VjcmV0 + kind: Secret + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: compact-proxy + namespace: rhobs +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: compact-tls + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + status: + loadBalancer: {} +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: openshift-monitoring + spec: + endpoints: + - bearerTokenSecret: + key: "" + port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: observatorium-thanos-store + namespace: rhobs + spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + serviceName: observatorium-thanos-store + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + namespaces: + - observatorium + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --chunk-pool-size=1TiB1016GiB + - --data-dir=/var/thanos/store + - --http-address=0.0.0.0:10902 + - --ignore-deletion-marks-delay=24h0m0s + - | + --index-cache.config=type: REDIS + config: + addr: rhobs-redis.rhobs.svc.cluster.local:6379 + - --log.format=logfmt + - --log.level=warn + - --max-time=8760h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhobs-thanos-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhobs-thanos-s3 + - name: OBJSTORE_CONFIG + valueFrom: + secretKeyRef: + key: thanos.yaml + name: rhobs-thanos-objectstorage + image: quay.io/thanos/thanos:v0.32.3 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-store + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + - mountPath: /etc/proxy/secrets + name: compact-proxy + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls + - name: compact-proxy + secret: + secretName: compact-proxy + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.3 + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 + status: {} + status: + availableReplicas: 0 + replicas: 0 diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 1736f5a659..ac1855db44 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -2,16 +2,21 @@ package observatorium import ( "fmt" + "net" "time" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache/redis" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/units" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/runtime" ) -func makeCompactor(namespace string) *compactor.CompactorStatefulSet { +func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, PostProcessFunc) { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -31,9 +36,6 @@ func makeCompactor(namespace string) *compactor.CompactorStatefulSet { } tlsSecret := "compact-tls" compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} - // compactorSatefulset.PostProcess = []k8sutil.ObjectProcessor{ - // serviceCertAnnotationModifier(tlsSecret), - // } // Compactor config compactorSatefulset.Options.LogLevel = "warn" @@ -46,15 +48,48 @@ func makeCompactor(namespace string) *compactor.CompactorStatefulSet { compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") - return compactorSatefulset + return compactorSatefulset, addServiceCertAnnotation(compactorSatefulset.CommonLabels[k8sutil.NameLabel], tlsSecret) } -func serviceCertAnnotationModifier(secretName string) func(object runtime.Object) { - return func(object runtime.Object) { - if service, ok := object.(*corev1.Service); ok { - service.ObjectMeta.Annotations["service.beta.openshift.io/serving-cert-secret-name"] = secretName - } +func makeStore(namespace string) (*store.StoreStatefulSet, PostProcessFunc) { + storeStatefulSet := store.NewStore() + storeStatefulSet.Image = thanosImage + storeStatefulSet.ImageTag = thanosImageTag + storeStatefulSet.Namespace = namespace + storeStatefulSet.Replicas = 1 + delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) // To be confirmed + storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") + storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("1Gi") + storeStatefulSet.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("5Gi") + storeStatefulSet.VolumeType = "gp2" + storeStatefulSet.VolumeSize = "500Gi" + storeStatefulSet.Env = []corev1.EnvVar{ + k8sutil.NewEnvFromSecret("AWS_ACCESS_KEY_ID", "rhobs-thanos-s3", "aws_access_key_id"), + k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", "rhobs-thanos-s3", "aws_secret_access_key"), + k8sutil.NewEnvFromSecret("OBJSTORE_CONFIG", "rhobs-thanos-objectstorage", "thanos.yaml"), } + tlsSecret := "store-tls" + storeStatefulSet.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, storeStatefulSet.Name, tlsSecret)} + + // Store config + storeStatefulSet.Options.LogLevel = "warn" + storeStatefulSet.Options.LogFormat = "logfmt" + storeStatefulSet.Options.IgnoreDeletionMarksDelay = 24 * time.Hour + maxTime := time.Duration(365*24) * time.Hour + storeStatefulSet.Options.MaxTime = &common.TimeOrDurationValue{Dur: &maxTime} + storeStatefulSet.Options.ChunkPoolSize = 2040 * units.GiB + storeStatefulSet.Options.HttpAddress = &net.TCPAddr{Port: 10902, IP: net.ParseIP("0.0.0.0")} + // storeStatefulSet.Options.StoreGrpcDownloadedBytesLimit + // indexCacheCfg, err := yaml.Marshal(cache.NewConfig(redis.RedisClientConfig{ + // Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", + // })) + // mimic.PanicOnErr(err) + + storeStatefulSet.Options.IndexCacheConfig = cache.NewConfig(redis.RedisClientConfig{ + Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", + }) + + return storeStatefulSet, addServiceCertAnnotation(storeStatefulSet.CommonLabels[k8sutil.NameLabel], tlsSecret) } func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 7411af799d..500d3a5871 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -7,11 +7,12 @@ import ( "github.com/bwplotka/mimic/encoding" "github.com/observatorium/api/rbac" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/openshift" templatev1 "github.com/openshift/api/template/v1" - monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" ) const ( @@ -35,35 +36,52 @@ type InstanceConfiguration struct { Tenants []TenantInstanceConfiguration } +type PostProcessFunc func(obj runtime.Object) + type Observatorium struct { - Cfg *InstanceConfiguration - Compactor *compactor.CompactorStatefulSet + Cfg *InstanceConfiguration + Compactor *compactor.CompactorStatefulSet + Store *store.StoreStatefulSet + PostProcessFuncs []PostProcessFunc } func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { + postProcessFuncs := []PostProcessFunc{updateServiceMonitorNamespace} + storeComponent, postProcess := makeStore(cfg.Namespace) + postProcessFuncs = append(postProcessFuncs, postProcess) + compactorComponent, postProcess := makeCompactor(cfg.Namespace) + postProcessFuncs = append(postProcessFuncs, postProcess) return &Observatorium{ - Cfg: cfg, - Compactor: makeCompactor(cfg.Namespace), + Cfg: cfg, + Compactor: compactorComponent, + Store: storeComponent, + PostProcessFuncs: postProcessFuncs, } } func (o *Observatorium) Manifests(generator *mimic.Generator) { - compactorManifests := o.Compactor.Manifests() - postProcessManifests(compactorManifests) - - commonTemplateMeta := metav1.ObjectMeta{ - Name: "observatorium-metrics-compact", + components := []struct { + name string + objects k8sutil.ObjectMap + }{ + {"observatorium-metrics-compact", o.Compactor.Manifests()}, + {"observatorium-metrics-store", o.Store.Manifests()}, } - compactorTemplate := openshift.WrapInTemplate("", compactorManifests, commonTemplateMeta, []templatev1.Parameter{}) - generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add("observatorium-metrics-compact-template.yaml", encoding.GhodssYAML(compactorTemplate[""])) + for _, component := range components { + o.postProcess(component.objects) + template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ + Name: component.name, + }, []templatev1.Parameter{}) + generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add(component.name+"-template.yaml", encoding.GhodssYAML(template[""])) + } } -func postProcessManifests(manifests k8sutil.ObjectMap) { +func (o *Observatorium) postProcess(manifests k8sutil.ObjectMap) { for _, manifest := range manifests { - if serviceMonitor, ok := manifest.(*monv1.ServiceMonitor); ok { - serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{monitoringNamespace} + for _, postProcessFunc := range o.PostProcessFuncs { + postProcessFunc(manifest) } } } diff --git a/services_go/observatorium/postprocess.go b/services_go/observatorium/postprocess.go new file mode 100644 index 0000000000..8ca3ec101b --- /dev/null +++ b/services_go/observatorium/postprocess.go @@ -0,0 +1,34 @@ +package observatorium + +import ( + "github.com/observatorium/observatorium/configuration_go/k8sutil" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +func updateServiceMonitorNamespace(obj runtime.Object) { + if serviceMonitor, ok := obj.(*monv1.ServiceMonitor); ok { + serviceMonitor.ObjectMeta.Namespace = monitoringNamespace + } +} + +func addServiceCertAnnotation(nameLabelSelector, secretName string) func(object runtime.Object) { + return func(object runtime.Object) { + if service, ok := object.(*corev1.Service); ok { + if service.ObjectMeta.Labels == nil { + return + } + + if service.ObjectMeta.Labels[k8sutil.NameLabel] != nameLabelSelector { + return + } + + if service.ObjectMeta.Annotations == nil { + service.ObjectMeta.Annotations = map[string]string{} + } + + service.ObjectMeta.Annotations["service.beta.openshift.io/serving-cert-secret-name"] = secretName + } + } +} From 55933ca3d68e4ec020b9fb49eac206cde3f9927b Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Mon, 25 Sep 2023 17:12:11 +0200 Subject: [PATCH 04/32] add store template Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 2 + .../observatorium-metrics-store-template.yaml | 125 ++++++++------ ...bservatorium-metrics-compact-template.yaml | 2 + .../observatorium-metrics-store-template.yaml | 133 +++++++++------ .../store-auto-shard-relabel-configMap.sh | 18 ++ services_go/observatorium/metrics.go | 161 ++++++++++-------- services_go/observatorium/observatorium.go | 7 +- services_go/observatorium/postprocess.go | 98 +++++++++-- services_go/observatorium/sidecars.go | 120 +++++++++++++ 9 files changed, 470 insertions(+), 196 deletions(-) create mode 100644 services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh create mode 100644 services_go/observatorium/sidecars.go diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index f4f8936f2d..f13e272e60 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -21,6 +21,8 @@ objects: - apiVersion: v1 kind: Service metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls creationTimestamp: null labels: app.kubernetes.io/component: database-compactor diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index c432022535..f589fb8adc 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -6,8 +6,15 @@ metadata: objects: - apiVersion: v1 data: - session_secret: c2VjcmV0 - kind: Secret + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n\n# + Logging parameters\necho \"generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} + THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap metadata: creationTimestamp: null labels: @@ -16,13 +23,11 @@ objects: app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 - name: compact-proxy + name: hashmod-config-template namespace: rhobs - apiVersion: v1 kind: Service metadata: - annotations: - service.beta.openshift.io/serving-cert-secret-name: compact-tls creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -38,10 +43,6 @@ objects: port: 10902 protocol: TCP targetPort: 10902 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 selector: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium @@ -147,9 +148,7 @@ objects: containers: - args: - store - - --chunk-pool-size=1TiB1016GiB - --data-dir=/var/thanos/store - - --http-address=0.0.0.0:10902 - --ignore-deletion-marks-delay=24h0m0s - | --index-cache.config=type: REDIS @@ -157,8 +156,10 @@ objects: addr: rhobs-redis.rhobs.svc.cluster.local:6379 - --log.format=logfmt - --log.level=warn - - --max-time=8760h0m0s + - --max-time=-22h0m0s - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true env: - name: AWS_ACCESS_KEY_ID valueFrom: @@ -197,52 +198,74 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: 80Gi requests: - cpu: 200m - memory: 1Gi + cpu: "4" + memory: 20Gi terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store name: data + - mountPath: /etc/config + name: hashmod-config - args: - - -provider=openshift - - -https-address=:8443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-store - - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", - "namespace": "rhobs"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", - "name": "rhobs", "namespace": "rhobs"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret-file=/etc/proxy/secrets/session_secret - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:v4.8.0 - name: oauth-proxy + - --reporter.grpc.host-port=dns:///jaeger-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent ports: - - containerPort: 8443 - name: https + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 resources: limits: - cpu: 200m - memory: 200Mi + cpu: 128m + memory: 128Mi requests: - cpu: 100m - memory: 100Mi + cpu: 32m + memory: 64Mi terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: THANOS_STORE_REPLICAS + value: "1" + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} volumeMounts: - - mountPath: /etc/tls/private - name: compact-tls - readOnly: true - - mountPath: /etc/proxy/secrets - name: compact-proxy - readOnly: true + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config nodeSelector: kubernetes.io/os: linux securityContext: @@ -251,12 +274,12 @@ objects: serviceAccountName: observatorium-thanos-store terminationGracePeriodSeconds: 120 volumes: - - name: compact-tls - secret: - secretName: compact-tls - - name: compact-proxy - secret: - secretName: compact-proxy + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + - emptyDir: {} + name: hashmod-config updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index f4f8936f2d..f13e272e60 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -21,6 +21,8 @@ objects: - apiVersion: v1 kind: Service metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls creationTimestamp: null labels: app.kubernetes.io/component: database-compactor diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index c432022535..bcca62e334 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -6,8 +6,15 @@ metadata: objects: - apiVersion: v1 data: - session_secret: c2VjcmV0 - kind: Secret + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n\n# + Logging parameters\necho \"generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} + THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap metadata: creationTimestamp: null labels: @@ -16,13 +23,11 @@ objects: app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 - name: compact-proxy + name: hashmod-config-template namespace: rhobs - apiVersion: v1 kind: Service metadata: - annotations: - service.beta.openshift.io/serving-cert-secret-name: compact-tls creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -38,10 +43,6 @@ objects: port: 10902 protocol: TCP targetPort: 10902 - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 selector: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium @@ -132,14 +133,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-store - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store namespaces: - observatorium topologyKey: kubernetes.io/hostname @@ -147,9 +148,7 @@ objects: containers: - args: - store - - --chunk-pool-size=1TiB1016GiB - --data-dir=/var/thanos/store - - --http-address=0.0.0.0:10902 - --ignore-deletion-marks-delay=24h0m0s - | --index-cache.config=type: REDIS @@ -157,8 +156,10 @@ objects: addr: rhobs-redis.rhobs.svc.cluster.local:6379 - --log.format=logfmt - --log.level=warn - - --max-time=8760h0m0s + - --max-time=-22h0m0s - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true env: - name: AWS_ACCESS_KEY_ID valueFrom: @@ -197,52 +198,74 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: 80Gi requests: - cpu: 200m - memory: 1Gi + cpu: "4" + memory: 20Gi terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store name: data + - mountPath: /etc/config + name: hashmod-config - args: - - -provider=openshift - - -https-address=:8443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-store - - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", - "namespace": "rhobs"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", - "name": "rhobs", "namespace": "rhobs"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret-file=/etc/proxy/secrets/session_secret - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:v4.8.0 - name: oauth-proxy + - --reporter.grpc.host-port=dns:///jaeger-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent ports: - - containerPort: 8443 - name: https + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 resources: limits: - cpu: 200m - memory: 200Mi + cpu: 128m + memory: 128Mi requests: - cpu: 100m - memory: 100Mi + cpu: 32m + memory: 64Mi terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: THANOS_STORE_REPLICAS + value: "1" + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} volumeMounts: - - mountPath: /etc/tls/private - name: compact-tls - readOnly: true - - mountPath: /etc/proxy/secrets - name: compact-proxy - readOnly: true + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config nodeSelector: kubernetes.io/os: linux securityContext: @@ -251,12 +274,12 @@ objects: serviceAccountName: observatorium-thanos-store terminationGracePeriodSeconds: 120 volumes: - - name: compact-tls - secret: - secretName: compact-tls - - name: compact-proxy - secret: - secretName: compact-proxy + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + - emptyDir: {} + name: hashmod-config updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh b/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh new file mode 100644 index 0000000000..30604acd77 --- /dev/null +++ b/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Kubernetes replicas are named with the following convention "-". +# This parameter expansion removes all characters until the last hyphen, capturing only the ordinal. +export ORDINAL_INDEX=${HOSTNAME##*-} + +# Logging parameters +echo "generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}" + +cat </tmp/config/hashmod-config.yaml +- action: hashmod + source_labels: ["__block_id"] + target_label: shard + modulus: ${THANOS_STORE_REPLICAS} +- action: keep + source_labels: ["shard"] + regex: ${ORDINAL_INDEX} +EOF diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index ac1855db44..ab70858199 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -1,8 +1,8 @@ package observatorium import ( - "fmt" - "net" + _ "embed" + "strconv" "time" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" @@ -11,12 +11,16 @@ import ( "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache/redis" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/units" + trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) -func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, PostProcessFunc) { +//go:embed assets/store-auto-shard-relabel-configMap.sh +var storeAutoShardRelabelConfigMap string + +func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, []PostProcessFunc) { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -48,19 +52,26 @@ func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, PostProce compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") - return compactorSatefulset, addServiceCertAnnotation(compactorSatefulset.CommonLabels[k8sutil.NameLabel], tlsSecret) + posProcessFuncs := []PostProcessFunc{ + addAnnotation("Service", compactorSatefulset.Name, servingCertSecretNameAnnotation, tlsSecret), + } + + return compactorSatefulset, posProcessFuncs + } -func makeStore(namespace string) (*store.StoreStatefulSet, PostProcessFunc) { +func makeStore(namespace string) (*store.StoreStatefulSet, []PostProcessFunc) { + // K8s config + replicas := int32(1) storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage storeStatefulSet.ImageTag = thanosImageTag storeStatefulSet.Namespace = namespace - storeStatefulSet.Replicas = 1 + storeStatefulSet.Replicas = replicas delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) // To be confirmed - storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") - storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("1Gi") - storeStatefulSet.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("5Gi") + storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("4") + storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("20Gi") + storeStatefulSet.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("80Gi") storeStatefulSet.VolumeType = "gp2" storeStatefulSet.VolumeSize = "500Gi" storeStatefulSet.Env = []corev1.EnvVar{ @@ -68,84 +79,84 @@ func makeStore(namespace string) (*store.StoreStatefulSet, PostProcessFunc) { k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", "rhobs-thanos-s3", "aws_secret_access_key"), k8sutil.NewEnvFromSecret("OBJSTORE_CONFIG", "rhobs-thanos-objectstorage", "thanos.yaml"), } - tlsSecret := "store-tls" - storeStatefulSet.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, storeStatefulSet.Name, tlsSecret)} - - // Store config - storeStatefulSet.Options.LogLevel = "warn" - storeStatefulSet.Options.LogFormat = "logfmt" - storeStatefulSet.Options.IgnoreDeletionMarksDelay = 24 * time.Hour - maxTime := time.Duration(365*24) * time.Hour - storeStatefulSet.Options.MaxTime = &common.TimeOrDurationValue{Dur: &maxTime} - storeStatefulSet.Options.ChunkPoolSize = 2040 * units.GiB - storeStatefulSet.Options.HttpAddress = &net.TCPAddr{Port: 10902, IP: net.ParseIP("0.0.0.0")} - // storeStatefulSet.Options.StoreGrpcDownloadedBytesLimit - // indexCacheCfg, err := yaml.Marshal(cache.NewConfig(redis.RedisClientConfig{ - // Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", - // })) - // mimic.PanicOnErr(err) - - storeStatefulSet.Options.IndexCacheConfig = cache.NewConfig(redis.RedisClientConfig{ - Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", - }) - - return storeStatefulSet, addServiceCertAnnotation(storeStatefulSet.CommonLabels[k8sutil.NameLabel], tlsSecret) -} + storeStatefulSet.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} -func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { - proxyPort := int32(8443) - - return &k8sutil.Container{ - Name: "oauth-proxy", - Image: "quay.io/openshift/origin-oauth-proxy", - ImageTag: "v4.8.0", + // Store auto-sharding using a configMap and an initContainer + // The configMap contains a script that will be executed by the initContainer + // The script generates the relabeling config based on the replica ordinal and the number of replicas + // The relabeling config is then written to a volume shared with the store container + storeStatefulSet.ConfigMaps["hashmod-config-template"] = map[string]string{ + "entrypoint.sh": storeAutoShardRelabelConfigMap, + } + initContainer := corev1.Container{ + Name: "init-hashmod-file", + Image: "quay.io/app-sre/ubi8-ubi", + ImagePullPolicy: corev1.PullIfNotPresent, Args: []string{ - "-provider=openshift", - fmt.Sprintf("-https-address=:%d", proxyPort), - "-http-address=", - "-email-domain=*", - fmt.Sprintf("-upstream=http://localhost:%d", upstreamPort), - fmt.Sprintf("-openshift-service-account=%s", serviceAccount), - fmt.Sprintf(`-openshift-sar={"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}`, namespace, namespace), - fmt.Sprintf(`-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}}`, namespace, namespace), - "-tls-cert=/etc/tls/private/tls.crt", - "-tls-key=/etc/tls/private/tls.key", - "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token", - "-cookie-secret-file=/etc/proxy/secrets/session_secret", - "-openshift-ca=/etc/pki/tls/cert.pem", - "-openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + "/tmp/entrypoint/entrypoint.sh", }, - Resources: k8sutil.NewResourcesRequirements("100m", "200m", "100Mi", "200Mi"), - Ports: []corev1.ContainerPort{ + Env: []corev1.EnvVar{ { - Name: "https", - ContainerPort: proxyPort, - Protocol: corev1.ProtocolTCP, + Name: "THANOS_STORE_REPLICAS", + Value: strconv.Itoa(int(replicas)), }, }, - ServicePorts: []corev1.ServicePort{ - k8sutil.NewServicePort("https", int(proxyPort), int(proxyPort)), - }, VolumeMounts: []corev1.VolumeMount{ { - Name: "compact-tls", - MountPath: "/etc/tls/private", - ReadOnly: true, + Name: "hashmod-config-template", + MountPath: "/tmp/entrypoint", }, { - Name: "compact-proxy", - MountPath: "/etc/proxy/secrets", - ReadOnly: true, + Name: "hashmod-config", + MountPath: "/etc/config", }, }, - Volumes: []corev1.Volume{ - k8sutil.NewPodVolumeFromSecret("compact-tls", tlsSecret), - k8sutil.NewPodVolumeFromSecret("compact-proxy", "compact-proxy"), - }, - Secrets: map[string]map[string][]byte{ - "compact-proxy": { - "session_secret": []byte("secret"), + } + defaultMode := int32(0777) + postProcessFuncs := []PostProcessFunc{ + addPodVolume(storeStatefulSet.Name, corev1.Volume{ + Name: "hashmod-config-template", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: storeStatefulSet.CommonLabels[k8sutil.NameLabel], + }, + DefaultMode: &defaultMode, + }, + }, + }), + addPodVolume(storeStatefulSet.Name, corev1.Volume{ + Name: "hashmod-config", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, }, + }), + addContainerVolumeMount(storeStatefulSet.Name, corev1.VolumeMount{ + Name: "hashmod-config", + MountPath: "/etc/config", + }), + addPodInitContainer(storeStatefulSet.Name, initContainer), + } + + // Store config + storeStatefulSet.Options.LogLevel = common.LogLevelWarn + storeStatefulSet.Options.LogFormat = common.LogFormatLogfmt + storeStatefulSet.Options.IgnoreDeletionMarksDelay = 24 * time.Hour + maxTimeDur := time.Duration(-22) * time.Hour + storeStatefulSet.Options.MaxTime = &common.TimeOrDurationValue{Dur: &maxTimeDur} + storeStatefulSet.Options.SelectorRelabelConfigFile = "/tmp/config/hashmod-config.yaml" + storeStatefulSet.Options.TracingConfig = &trclient.TracingConfig{ + Type: trclient.Jaeger, + Config: jaeger.Config{ + SamplerParam: 2, + SamplerType: jaeger.SamplerTypeRateLimiting, + ServiceName: "thanos-store", }, } + storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. + storeStatefulSet.Options.IndexCacheConfig = cache.NewConfig(redis.RedisClientConfig{ + Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", + }) + + return storeStatefulSet, postProcessFuncs } diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 500d3a5871..179f44dee4 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -29,11 +29,14 @@ type TenantInstanceConfiguration struct { // Tenant *obs_api.tenant } +// type ComponentsConfig struct { + type InstanceConfiguration struct { Cluster string Instance string Namespace string Tenants []TenantInstanceConfiguration + // Components } type PostProcessFunc func(obj runtime.Object) @@ -48,9 +51,9 @@ type Observatorium struct { func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { postProcessFuncs := []PostProcessFunc{updateServiceMonitorNamespace} storeComponent, postProcess := makeStore(cfg.Namespace) - postProcessFuncs = append(postProcessFuncs, postProcess) + postProcessFuncs = append(postProcessFuncs, postProcess...) compactorComponent, postProcess := makeCompactor(cfg.Namespace) - postProcessFuncs = append(postProcessFuncs, postProcess) + postProcessFuncs = append(postProcessFuncs, postProcess...) return &Observatorium{ Cfg: cfg, diff --git a/services_go/observatorium/postprocess.go b/services_go/observatorium/postprocess.go index 8ca3ec101b..11e31232cc 100644 --- a/services_go/observatorium/postprocess.go +++ b/services_go/observatorium/postprocess.go @@ -1,34 +1,106 @@ package observatorium import ( - "github.com/observatorium/observatorium/configuration_go/k8sutil" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) +const ( + servingCertSecretNameAnnotation = "service.alpha.openshift.io/serving-cert-secret-name" +) + func updateServiceMonitorNamespace(obj runtime.Object) { if serviceMonitor, ok := obj.(*monv1.ServiceMonitor); ok { serviceMonitor.ObjectMeta.Namespace = monitoringNamespace } } -func addServiceCertAnnotation(nameLabelSelector, secretName string) func(object runtime.Object) { +func addAnnotation(objectType, objectName, key, value string) func(object runtime.Object) { return func(object runtime.Object) { - if service, ok := object.(*corev1.Service); ok { - if service.ObjectMeta.Labels == nil { - return - } + if object.GetObjectKind().GroupVersionKind().Kind != objectType { + return + } + + objectMeta, ok := object.(metav1.ObjectMetaAccessor) + if !ok { + return + } - if service.ObjectMeta.Labels[k8sutil.NameLabel] != nameLabelSelector { - return - } + if objectMeta.GetObjectMeta().GetName() != objectName { + return + } + + if objectMeta.GetObjectMeta().GetAnnotations() == nil { + objectMeta.GetObjectMeta().SetAnnotations(map[string]string{}) + } + + objectMeta.GetObjectMeta().GetAnnotations()[key] = value + } +} - if service.ObjectMeta.Annotations == nil { - service.ObjectMeta.Annotations = map[string]string{} - } +func addPodInitContainer(objectName string, container corev1.Container) func(object runtime.Object) { + return func(object runtime.Object) { + name, pod := getPodFromObject(object) + if pod == nil { + return + } - service.ObjectMeta.Annotations["service.beta.openshift.io/serving-cert-secret-name"] = secretName + if name != objectName { + return } + + pod.Spec.InitContainers = append(pod.Spec.InitContainers, container) } } + +func addPodVolume(objectName string, volume corev1.Volume) func(object runtime.Object) { + return func(object runtime.Object) { + name, pod := getPodFromObject(object) + + if pod == nil { + return + } + + if name != objectName { + return + } + + pod.Spec.Volumes = append(pod.Spec.Volumes, volume) + } +} + +func addContainerVolumeMount(objectName string, volumeMount corev1.VolumeMount) func(object runtime.Object) { + return func(object runtime.Object) { + name, pod := getPodFromObject(object) + + if pod == nil { + return + } + + if name != objectName { + return + } + + container := &pod.Spec.Containers[0] + + container.VolumeMounts = append(container.VolumeMounts, volumeMount) + } +} + +func getPodFromObject(object runtime.Object) (string, *corev1.PodTemplateSpec) { + switch object.GetObjectKind().GroupVersionKind().Kind { + case "Deployment": + if deployment, ok := object.(*appsv1.Deployment); ok { + return deployment.ObjectMeta.Name, &deployment.Spec.Template + } + case "StatefulSet": + if statefulSet, ok := object.(*appsv1.StatefulSet); ok { + return statefulSet.ObjectMeta.Name, &statefulSet.Spec.Template + } + } + + return "", nil +} diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go new file mode 100644 index 0000000000..995c8134b0 --- /dev/null +++ b/services_go/observatorium/sidecars.go @@ -0,0 +1,120 @@ +package observatorium + +import ( + "fmt" + + "github.com/observatorium/observatorium/configuration_go/k8sutil" + corev1 "k8s.io/api/core/v1" +) + +func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { + proxyPort := int32(8443) + + return &k8sutil.Container{ + Name: "oauth-proxy", + Image: "quay.io/openshift/origin-oauth-proxy", + ImageTag: "v4.8.0", + Args: []string{ + "-provider=openshift", + fmt.Sprintf("-https-address=:%d", proxyPort), + "-http-address=", + "-email-domain=*", + fmt.Sprintf("-upstream=http://localhost:%d", upstreamPort), + fmt.Sprintf("-openshift-service-account=%s", serviceAccount), + fmt.Sprintf(`-openshift-sar={"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}`, namespace, namespace), + fmt.Sprintf(`-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "name": "%s", "namespace": "%s"}}`, namespace, namespace), + "-tls-cert=/etc/tls/private/tls.crt", + "-tls-key=/etc/tls/private/tls.key", + "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token", + "-cookie-secret-file=/etc/proxy/secrets/session_secret", + "-openshift-ca=/etc/pki/tls/cert.pem", + "-openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + }, + Resources: k8sutil.NewResourcesRequirements("100m", "200m", "100Mi", "200Mi"), + Ports: []corev1.ContainerPort{ + { + Name: "https", + ContainerPort: proxyPort, + Protocol: corev1.ProtocolTCP, + }, + }, + ServicePorts: []corev1.ServicePort{ + k8sutil.NewServicePort("https", int(proxyPort), int(proxyPort)), + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "compact-tls", + MountPath: "/etc/tls/private", + ReadOnly: true, + }, + { + Name: "compact-proxy", + MountPath: "/etc/proxy/secrets", + ReadOnly: true, + }, + }, + Volumes: []corev1.Volume{ + k8sutil.NewPodVolumeFromSecret("compact-tls", tlsSecret), + k8sutil.NewPodVolumeFromSecret("compact-proxy", "compact-proxy"), + }, + Secrets: map[string]map[string][]byte{ + "compact-proxy": { + "session_secret": []byte("secret"), + }, + }, + } +} + +func makeJaegerAgent(collectorNamespace string) *k8sutil.Container { + metricsPort := int32(14271) + livelinesProbe := k8sutil.NewProbe("/", int(metricsPort), k8sutil.ProbeConfig{FailureThreshold: 5}) + readinessProbe := k8sutil.NewProbe("/", int(metricsPort), k8sutil.ProbeConfig{InitialDelaySeconds: 1}) + return &k8sutil.Container{ + Name: "jaeger-agent", + Image: "quay.io/app-sre/jaegertracing-jaeger-agent", + ImageTag: "1.22.0", + Args: []string{ + fmt.Sprintf("--reporter.grpc.host-port=dns:///jaeger-collector-headless.%s.svc:14250", collectorNamespace), + "--reporter.type=grpc", + "--agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD)", + }, + Resources: k8sutil.NewResourcesRequirements("32m", "128m", "64Mi", "128Mi"), + Ports: []corev1.ContainerPort{ + { + Name: "configs", + ContainerPort: 5778, + Protocol: corev1.ProtocolTCP, + }, + { + Name: "jaeger-thrift", + ContainerPort: 6831, + Protocol: corev1.ProtocolTCP, + }, + { + Name: "metrics", + ContainerPort: metricsPort, + Protocol: corev1.ProtocolTCP, + }, + }, + Env: []corev1.EnvVar{ + { + Name: "NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + { + Name: "POD", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + }, + LivenessProbe: &livelinesProbe, + ReadinessProbe: &readinessProbe, + } +} From 87e0de7778f7c3f87c2633702d935ca1599f69d1 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Mon, 25 Sep 2023 17:18:34 +0200 Subject: [PATCH 05/32] add replicas in instance config Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- services_go/instances/rhobs/rhobs.go | 18 ++++++++++-------- services_go/observatorium/metrics.go | 3 +-- services_go/observatorium/observatorium.go | 12 ++++++------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index 31a91128a8..cfad9b4280 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -7,16 +7,18 @@ import ( func ClusterConfigs() []observatorium.InstanceConfiguration { return []observatorium.InstanceConfiguration{ { - Cluster: "app-sre-stage-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, + Cluster: "app-sre-stage-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + ThanosStoreReplicas: 2, }, { - Cluster: "telemeter-prod-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, + Cluster: "telemeter-prod-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + ThanosStoreReplicas: 3, }, } } diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index ab70858199..a8d68dd6cb 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -60,9 +60,8 @@ func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, []PostPro } -func makeStore(namespace string) (*store.StoreStatefulSet, []PostProcessFunc) { +func makeStore(namespace string, replicas int32) (*store.StoreStatefulSet, []PostProcessFunc) { // K8s config - replicas := int32(1) storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage storeStatefulSet.ImageTag = thanosImageTag diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 179f44dee4..7b1ea3aee3 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -32,11 +32,11 @@ type TenantInstanceConfiguration struct { // type ComponentsConfig struct { type InstanceConfiguration struct { - Cluster string - Instance string - Namespace string - Tenants []TenantInstanceConfiguration - // Components + Cluster string + Instance string + Namespace string + Tenants []TenantInstanceConfiguration + ThanosStoreReplicas int32 } type PostProcessFunc func(obj runtime.Object) @@ -50,7 +50,7 @@ type Observatorium struct { func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { postProcessFuncs := []PostProcessFunc{updateServiceMonitorNamespace} - storeComponent, postProcess := makeStore(cfg.Namespace) + storeComponent, postProcess := makeStore(cfg.Namespace, cfg.ThanosStoreReplicas) postProcessFuncs = append(postProcessFuncs, postProcess...) compactorComponent, postProcess := makeCompactor(cfg.Namespace) postProcessFuncs = append(postProcessFuncs, postProcess...) From 096004780f6dbc0c0cc6140046c7475f845deb6f Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:25:15 +0200 Subject: [PATCH 06/32] update generated files Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- mimic.go | 3 ++- .../observatorium-metrics-compact-template.yaml | 12 +++++------- .../observatorium-metrics-store-template.yaml | 8 +++----- .../observatorium-metrics-compact-template.yaml | 4 +--- .../observatorium-metrics-store-template.yaml | 16 +++++++--------- 5 files changed, 18 insertions(+), 25 deletions(-) diff --git a/mimic.go b/mimic.go index 3f5a3ff3e0..4fa92ce3b4 100644 --- a/mimic.go +++ b/mimic.go @@ -2,6 +2,7 @@ package main import ( "github.com/bwplotka/mimic" + cfgobservatorium "github.com/rhobs/configuration/configuration/observatorium" services "github.com/rhobs/configuration/services_go" ) @@ -12,7 +13,7 @@ func main() { // cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules")) - // cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) + cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) services.Generate(gen.With("services_go")) diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index f13e272e60..1507b7b249 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -75,9 +75,7 @@ objects: namespace: openshift-monitoring spec: endpoints: - - bearerTokenSecret: - key: "" - port: http + - port: http relabelings: - action: replace separator: / @@ -132,14 +130,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-compact - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact namespaces: - observatorium topologyKey: kubernetes.io/hostname diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index f589fb8adc..fec3b04fc3 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -76,9 +76,7 @@ objects: namespace: openshift-monitoring spec: endpoints: - - bearerTokenSecret: - key: "" - port: http + - port: http relabelings: - action: replace separator: / @@ -108,7 +106,7 @@ objects: name: observatorium-thanos-store namespace: rhobs spec: - replicas: 1 + replicas: 2 selector: matchLabels: app.kubernetes.io/component: object-store-gateway @@ -256,7 +254,7 @@ objects: - /tmp/entrypoint/entrypoint.sh env: - name: THANOS_STORE_REPLICAS - value: "1" + value: "2" image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index f13e272e60..90c46fadb5 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -75,9 +75,7 @@ objects: namespace: openshift-monitoring spec: endpoints: - - bearerTokenSecret: - key: "" - port: http + - port: http relabelings: - action: replace separator: / diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index bcca62e334..722af14f0c 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -76,9 +76,7 @@ objects: namespace: openshift-monitoring spec: endpoints: - - bearerTokenSecret: - key: "" - port: http + - port: http relabelings: - action: replace separator: / @@ -108,7 +106,7 @@ objects: name: observatorium-thanos-store namespace: rhobs spec: - replicas: 1 + replicas: 3 selector: matchLabels: app.kubernetes.io/component: object-store-gateway @@ -133,14 +131,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - key: app.kubernetes.io/name operator: In values: - thanos-store + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium namespaces: - observatorium topologyKey: kubernetes.io/hostname @@ -256,7 +254,7 @@ objects: - /tmp/entrypoint/entrypoint.sh env: - name: THANOS_STORE_REPLICAS - value: "1" + value: "3" image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file From 3232889d6654151df3c83af62914bbfdc2b0cf17 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 27 Sep 2023 11:16:02 +0200 Subject: [PATCH 07/32] change dir of generated tamplates Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- mimic.go | 4 ++-- .../rhobs/observatorium-metrics-compact-template.yaml | 0 .../rhobs/observatorium-metrics-store-template.yaml | 0 .../rhobs/observatorium-metrics-compact-template.yaml | 8 ++++---- .../rhobs/observatorium-metrics-store-template.yaml | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) rename resources/{services_go/telemeter-prod-01 => services/app-sre-stage-01}/rhobs/observatorium-metrics-compact-template.yaml (100%) rename resources/{services_go => services}/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml (100%) rename resources/{services_go/app-sre-stage-01 => services/telemeter-prod-01}/rhobs/observatorium-metrics-compact-template.yaml (100%) rename resources/{services_go => services}/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml (100%) diff --git a/mimic.go b/mimic.go index 4fa92ce3b4..e87da5391e 100644 --- a/mimic.go +++ b/mimic.go @@ -11,10 +11,10 @@ func main() { defer gen.Generate() - // cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules")) + cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules")) cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) - services.Generate(gen.With("services_go")) + services.Generate(gen.With("services")) } diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml similarity index 100% rename from resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml rename to resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml similarity index 100% rename from resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml rename to resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml diff --git a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml similarity index 100% rename from resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml rename to resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 1507b7b249..90c46fadb5 100755 --- a/resources/services_go/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -130,14 +130,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - key: app.kubernetes.io/name operator: In values: - thanos-compact + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium namespaces: - observatorium topologyKey: kubernetes.io/hostname diff --git a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml similarity index 100% rename from resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml rename to resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 722af14f0c..df8c6b8962 100755 --- a/resources/services_go/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -131,14 +131,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-store - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store namespaces: - observatorium topologyKey: kubernetes.io/hostname From 68a5f86354eeca70217c9f489bba343986101cee Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:01:19 +0200 Subject: [PATCH 08/32] update go.mod Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 11 +++++++---- go.sum | 27 +++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index f6e67bace1..6d43d5839d 100644 --- a/go.mod +++ b/go.mod @@ -5,10 +5,13 @@ go 1.21 toolchain go1.21.1 require ( - github.com/bwplotka/mimic v0.1.1-0.20220621130344-a6338e3b8238 - github.com/observatorium/api v0.1.3-0.20220621123450-69c5f2661d01 + github.com/bwplotka/mimic v0.2.1-0.20230303101552-f705cca2f4a4 + github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44 + github.com/observatorium/observatorium v0.0.0-00010101000000-000000000000 + github.com/openshift/api v3.9.0+incompatible github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 github.com/pyrra-dev/pyrra v0.7.0 + k8s.io/api v0.28.2 k8s.io/apimachinery v0.28.2 ) @@ -52,6 +55,7 @@ require ( github.com/prometheus/prometheus v1.8.2-0.20220211202545-56e14463bccf // indirect github.com/rodaine/hclencoder v0.0.1 // indirect github.com/stretchr/testify v1.8.4 // indirect + github.com/thanos-io/thanos v0.32.2 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/goleak v1.2.1 // indirect go.uber.org/zap v1.26.0 // indirect @@ -69,7 +73,6 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.28.2 // indirect k8s.io/apiextensions-apiserver v0.28.2 // indirect k8s.io/client-go v0.28.2 // indirect k8s.io/klog/v2 v2.100.1 // indirect @@ -81,4 +84,4 @@ require ( sigs.k8s.io/yaml v1.3.0 // indirect ) -replace github.com/observatorium/observatorium => ../observatorium +replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910 diff --git a/go.sum b/go.sum index ebeac84660..ad7f29602d 100644 --- a/go.sum +++ b/go.sum @@ -381,6 +381,8 @@ github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFP github.com/eclipse/paho.mqtt.golang v1.2.0/go.mod h1:H9keYFcgq3Qr5OUJm/JZI/i6U7joQ8SYLhZwfeOo6Ts= github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= +github.com/efficientgo/core v1.0.0-rc.2 h1:7j62qHLnrZqO3V3UA0AqOGd5d5aXV3AX6m/NZBHp78I= +github.com/efficientgo/core v1.0.0-rc.2/go.mod h1:FfGdkzWarkuzOlY04VY+bGfb1lWrjaL6x/GLcQ4vJps= github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b h1:ZHiD4/yE4idlbqvAO6iYCOYRzOMRpxkW+FKasRA3tsQ= github.com/efficientgo/tools/core v0.0.0-20220225185207-fe763185946b/go.mod h1:OmVcnJopJL8d3X3sSXTiypGoUSgFq1aDGmlrdi9dn/M= github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= @@ -404,6 +406,7 @@ github.com/evanphx/json-patch v4.2.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v4.11.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.7.0 h1:nJqP7uwL84RJInrohHfW0Fx3awjbm8qZeFv0nW9SYGc= github.com/evanphx/json-patch/v5 v5.7.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -458,6 +461,7 @@ github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbV github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= +github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= @@ -566,6 +570,7 @@ github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-zookeeper/zk v1.0.2/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0= github.com/gobuffalo/depgen v0.0.0-20190329151759-d478694a28d3/go.mod h1:3STtPUQYuzV0gBVOY3vy6CfMm/ljR4pABfrTeHNLHUY= @@ -703,7 +708,6 @@ github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/pprof v0.0.0-20230406165453-00490a63f317 h1:hFhpt7CTmR3DX+b4R19ydQFtofxT0Sv3QsKNMVQYTMQ= github.com/google/pprof v0.0.0-20230406165453-00490a63f317/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= @@ -804,6 +808,7 @@ github.com/imdario/mergo v0.3.10/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/influxdata/flux v0.65.0/go.mod h1:BwN2XG2lMszOoquQaFdPET8FRQfrXiZsWmcMO9rkaVY= github.com/influxdata/flux v0.131.0/go.mod h1:CKvnYe6FHpTj/E0YGI7TcOZdGiYHoToOPSnoa12RtKI= @@ -879,6 +884,7 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= @@ -992,8 +998,8 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS github.com/ncw/swift v1.0.47/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/observatorium/api v0.1.3-0.20220621123450-69c5f2661d01 h1:RH3c6jLZBEXvTbCOjKaRO+q177oGEA8h+yoeDLYOHlA= -github.com/observatorium/api v0.1.3-0.20220621123450-69c5f2661d01/go.mod h1:BvCKJmkIxb578L3fK3IH3jA6A6jWmIVB+HKbyTKEmrc= +github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44 h1:QX1PSo1E9PdUbVJkA5FhZ1BA0GzDTfDLW3dbrGbjU5k= +github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44/go.mod h1:xwDIn6xpTsymHor6ST57bJQm4FXjey31OfHyEKDFsdM= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= @@ -1009,8 +1015,9 @@ github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo v1.16.3 h1:3s86PZkI1ApJh6HFIzC1gXby/mIyZqfE5zxSvtoBSsM= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= +github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= @@ -1019,6 +1026,7 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= +github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= @@ -1166,6 +1174,7 @@ github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/cors v1.6.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/cors v1.8.0/go.mod h1:EBwu+T5AvHOcXwvZIkQFjUN6s8Czyqw12GL/Y0tUyRM= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -1227,6 +1236,8 @@ github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -1244,6 +1255,10 @@ github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= +github.com/thanos-io/thanos v0.32.2 h1:W9vzOUdiIBKUc947IbTPAj4Lnv5r4MP8iXuUF1G1GkM= +github.com/thanos-io/thanos v0.32.2/go.mod h1:zfpzKCtqaqCy1D9/6ksZfL+U+KKt8mkcX6v3btuDHgg= +github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910 h1:HKkk7aBcPd4542+YTo9bJKS+IYCoGJTM8eOt/tM8+2c= +github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910/go.mod h1:Bfegly2FCsugSsgbEQ+2wGW14Hj6pH7yaE+vQA/dw2M= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tinylib/msgp v1.0.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= @@ -1344,6 +1359,7 @@ go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+ go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= @@ -1536,6 +1552,7 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1770,6 +1787,7 @@ golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyj golang.org/x/tools v0.1.9-0.20211209172050-90a85b2969be/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -2030,6 +2048,7 @@ k8s.io/component-base v0.20.1/go.mod h1:guxkoJnNoh8LNrbtiQOlyp2Y2XFCZQmrcg2n/DeY k8s.io/component-base v0.20.4/go.mod h1:t4p9EdiagbVCJKrQ1RsA5/V4rFQNDfRlevJajlGwgjI= k8s.io/component-base v0.20.6/go.mod h1:6f1MPBAeI+mvuts3sIdtpjljHWBQ2cIy38oBIWMYnrM= k8s.io/component-base v0.28.2 h1:Yc1yU+6AQSlpJZyvehm/NkJBII72rzlEsd6MkBQ+G0E= +k8s.io/component-base v0.28.2/go.mod h1:4IuQPQviQCg3du4si8GpMrhAIegxpsgPngPRR/zWpzc= k8s.io/cri-api v0.17.3/go.mod h1:X1sbHmuXhwaHs9xxYffLqJogVsnI+f6cPRcgPel7ywM= k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= From b2050798c4bcf7bf4c8d8f77e01a99be735a081d Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:13:37 +0200 Subject: [PATCH 09/32] remove store cache Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../rhobs/observatorium-metrics-store-template.yaml | 4 ---- .../rhobs/observatorium-metrics-store-template.yaml | 12 ++++-------- services_go/observatorium/metrics.go | 5 ----- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index fec3b04fc3..050df86d2b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -148,10 +148,6 @@ objects: - store - --data-dir=/var/thanos/store - --ignore-deletion-marks-delay=24h0m0s - - | - --index-cache.config=type: REDIS - config: - addr: rhobs-redis.rhobs.svc.cluster.local:6379 - --log.format=logfmt - --log.level=warn - --max-time=-22h0m0s diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index df8c6b8962..47c00ac5c3 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -131,14 +131,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - key: app.kubernetes.io/name operator: In values: - thanos-store + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium namespaces: - observatorium topologyKey: kubernetes.io/hostname @@ -148,10 +148,6 @@ objects: - store - --data-dir=/var/thanos/store - --ignore-deletion-marks-delay=24h0m0s - - | - --index-cache.config=type: REDIS - config: - addr: rhobs-redis.rhobs.svc.cluster.local:6379 - --log.format=logfmt - --log.level=warn - --max-time=-22h0m0s diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index a8d68dd6cb..fa473d60e6 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -8,8 +8,6 @@ import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/cache/redis" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" @@ -153,9 +151,6 @@ func makeStore(namespace string, replicas int32) (*store.StoreStatefulSet, []Pos }, } storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. - storeStatefulSet.Options.IndexCacheConfig = cache.NewConfig(redis.RedisClientConfig{ - Addr: "rhobs-redis.rhobs.svc.cluster.local:6379", - }) return storeStatefulSet, postProcessFuncs } From 2953d152dd22c6e6b3617921ddc64ba6ba9548b9 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:29:43 +0200 Subject: [PATCH 10/32] add some comments Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- mimic.go | 1 + services_go/observatorium/observatorium.go | 10 ++++++++-- services_go/observatorium/postprocess.go | 8 ++++++++ services_go/observatorium/sidecars.go | 2 ++ services_go/services.go | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/mimic.go b/mimic.go index e87da5391e..15ab5012fd 100644 --- a/mimic.go +++ b/mimic.go @@ -15,6 +15,7 @@ func main() { cfgobservatorium.GenerateRBAC(gen.With(".tmp", "tenants")) + // Generate the manifests for all observatorium instances. services.Generate(gen.With("services")) } diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 7b1ea3aee3..8030b03b49 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -21,6 +21,7 @@ const ( monitoringNamespace = "openshift-monitoring" ) +// TenantInstanceConfiguration is the configuration for a single tenant in an instance of observatorium. type TenantInstanceConfiguration struct { IngestRateLimit []struct{} QueryRateLimit []struct{} @@ -29,8 +30,7 @@ type TenantInstanceConfiguration struct { // Tenant *obs_api.tenant } -// type ComponentsConfig struct { - +// InstanceConfiguration is the configuration for an instance of observatorium. type InstanceConfiguration struct { Cluster string Instance string @@ -39,8 +39,11 @@ type InstanceConfiguration struct { ThanosStoreReplicas int32 } +// PostProcessFunc is a function that can be applied to a Kubernetes object after it has been generated. type PostProcessFunc func(obj runtime.Object) +// Observatorium is a representation of an instance of observatorium. +// It contains all the components that make up the instance. type Observatorium struct { Cfg *InstanceConfiguration Compactor *compactor.CompactorStatefulSet @@ -48,6 +51,7 @@ type Observatorium struct { PostProcessFuncs []PostProcessFunc } +// NewObservatorium creates a new instance of observatorium. func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { postProcessFuncs := []PostProcessFunc{updateServiceMonitorNamespace} storeComponent, postProcess := makeStore(cfg.Namespace, cfg.ThanosStoreReplicas) @@ -63,6 +67,7 @@ func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { } } +// Manifests generates the manifests for the instance of observatorium. func (o *Observatorium) Manifests(generator *mimic.Generator) { components := []struct { name string @@ -81,6 +86,7 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { } } +// postProcess applies all the post process functions to the manifests. func (o *Observatorium) postProcess(manifests k8sutil.ObjectMap) { for _, manifest := range manifests { for _, postProcessFunc := range o.PostProcessFuncs { diff --git a/services_go/observatorium/postprocess.go b/services_go/observatorium/postprocess.go index 11e31232cc..6374ed767d 100644 --- a/services_go/observatorium/postprocess.go +++ b/services_go/observatorium/postprocess.go @@ -12,12 +12,16 @@ const ( servingCertSecretNameAnnotation = "service.alpha.openshift.io/serving-cert-secret-name" ) +// updateServiceMonitorNamespace updates the namespace of all ServiceMonitor objects. +// This is useful when the ServiceMonitor must be deployed in a different namespace than observatorium. func updateServiceMonitorNamespace(obj runtime.Object) { if serviceMonitor, ok := obj.(*monv1.ServiceMonitor); ok { serviceMonitor.ObjectMeta.Namespace = monitoringNamespace } } +// addAnnotation adds an annotation to an object. +// The object is filtered by objectType and objectName. func addAnnotation(objectType, objectName, key, value string) func(object runtime.Object) { return func(object runtime.Object) { if object.GetObjectKind().GroupVersionKind().Kind != objectType { @@ -41,6 +45,7 @@ func addAnnotation(objectType, objectName, key, value string) func(object runtim } } +// addPodContainer adds an init container to a pod. func addPodInitContainer(objectName string, container corev1.Container) func(object runtime.Object) { return func(object runtime.Object) { name, pod := getPodFromObject(object) @@ -56,6 +61,7 @@ func addPodInitContainer(objectName string, container corev1.Container) func(obj } } +// addPodContainer adds a volume to a pod whose deployment/statefulset name is objectName. func addPodVolume(objectName string, volume corev1.Volume) func(object runtime.Object) { return func(object runtime.Object) { name, pod := getPodFromObject(object) @@ -72,6 +78,8 @@ func addPodVolume(objectName string, volume corev1.Volume) func(object runtime.O } } +// addContainerVolumeMount adds a volume mount to the main container whose deployment/statefulset name is objectName. +// The main container is the first container in the pod. func addContainerVolumeMount(objectName string, volumeMount corev1.VolumeMount) func(object runtime.Object) { return func(object runtime.Object) { name, pod := getPodFromObject(object) diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 995c8134b0..74a9eafb38 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -7,6 +7,7 @@ import ( corev1 "k8s.io/api/core/v1" ) +// makeOauthProxy creates a container for the oauth-proxy sidecar. func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { proxyPort := int32(8443) @@ -65,6 +66,7 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str } } +// makeJaegerAgent creates a container for the jaeger-agent sidecar. func makeJaegerAgent(collectorNamespace string) *k8sutil.Container { metricsPort := int32(14271) livelinesProbe := k8sutil.NewProbe("/", int(metricsPort), k8sutil.ProbeConfig{FailureThreshold: 5}) diff --git a/services_go/services.go b/services_go/services.go index 387cd6d41c..13d06c9dd6 100644 --- a/services_go/services.go +++ b/services_go/services.go @@ -6,6 +6,7 @@ import ( "github.com/rhobs/configuration/services_go/observatorium" ) +// Generate generates the manifests for all observatorium instances. func Generate(gen *mimic.Generator) { rhobsConfigs := rhobs.ClusterConfigs() for _, cfg := range rhobsConfigs { From dd0b4412c3ffb913d23921cfcf777c343d30d559 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 27 Sep 2023 18:59:29 +0200 Subject: [PATCH 11/32] remove post process Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 5 +- .../observatorium-metrics-store-template.yaml | 7 +- ...bservatorium-metrics-compact-template.yaml | 5 +- .../observatorium-metrics-store-template.yaml | 7 +- services_go/observatorium/metrics.go | 102 +++++++++++----- services_go/observatorium/observatorium.go | 46 ++----- services_go/observatorium/postprocess.go | 114 ------------------ services_go/observatorium/sidecars.go | 2 +- 8 files changed, 93 insertions(+), 195 deletions(-) delete mode 100644 services_go/observatorium/postprocess.go diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 90c46fadb5..3f57df5ea2 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -71,8 +71,9 @@ objects: app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 + prometheus: app-sre name: observatorium-thanos-compact - namespace: openshift-monitoring + namespace: openshift-customer-monitoring spec: endpoints: - port: http @@ -220,7 +221,7 @@ objects: - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 name: oauth-proxy ports: - containerPort: 8443 diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 050df86d2b..198dfdd901 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -72,8 +72,9 @@ objects: app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 + prometheus: app-sre name: observatorium-thanos-store - namespace: openshift-monitoring + namespace: openshift-customer-monitoring spec: endpoints: - port: http @@ -268,12 +269,12 @@ objects: serviceAccountName: observatorium-thanos-store terminationGracePeriodSeconds: 120 volumes: + - emptyDir: {} + name: hashmod-config - configMap: defaultMode: 511 name: thanos-store name: hashmod-config-template - - emptyDir: {} - name: hashmod-config updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 90c46fadb5..3f57df5ea2 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -71,8 +71,9 @@ objects: app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 + prometheus: app-sre name: observatorium-thanos-compact - namespace: openshift-monitoring + namespace: openshift-customer-monitoring spec: endpoints: - port: http @@ -220,7 +221,7 @@ objects: - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - image: quay.io/openshift/origin-oauth-proxy:v4.8.0 + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 name: oauth-proxy ports: - containerPort: 8443 diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 47c00ac5c3..064ba2196c 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -72,8 +72,9 @@ objects: app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: v0.32.3 + prometheus: app-sre name: observatorium-thanos-store - namespace: openshift-monitoring + namespace: openshift-customer-monitoring spec: endpoints: - port: http @@ -268,12 +269,12 @@ objects: serviceAccountName: observatorium-thanos-store terminationGracePeriodSeconds: 120 volumes: + - emptyDir: {} + name: hashmod-config - configMap: defaultMode: 511 name: thanos-store name: hashmod-config-template - - emptyDir: {} - name: hashmod-config updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index fa473d60e6..331e24c874 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -2,6 +2,8 @@ package observatorium import ( _ "embed" + "fmt" + "maps" "strconv" "time" @@ -11,14 +13,23 @@ import ( "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) +const ( + thanosImage = "quay.io/thanos/thanos" + thanosImageTag = "v0.32.3" + monitoringNamespace = "openshift-customer-monitoring" + servingCertSecretNameAnnotation = "service.alpha.openshift.io/serving-cert-secret-name" +) + //go:embed assets/store-auto-shard-relabel-configMap.sh var storeAutoShardRelabelConfigMap string -func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, []PostProcessFunc) { +func makeCompactor(namespace string) k8sutil.ObjectMap { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -50,15 +61,17 @@ func makeCompactor(namespace string) (*compactor.CompactorStatefulSet, []PostPro compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") - posProcessFuncs := []PostProcessFunc{ - addAnnotation("Service", compactorSatefulset.Name, servingCertSecretNameAnnotation, tlsSecret), - } + // Post process + manifests := compactorSatefulset.Manifests() + service := getObject[*corev1.Service](manifests) + service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret + postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) - return compactorSatefulset, posProcessFuncs + return manifests } -func makeStore(namespace string, replicas int32) (*store.StoreStatefulSet, []PostProcessFunc) { +func makeStore(namespace string, replicas int32) k8sutil.ObjectMap { // K8s config storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage @@ -109,31 +122,6 @@ func makeStore(namespace string, replicas int32) (*store.StoreStatefulSet, []Pos }, }, } - defaultMode := int32(0777) - postProcessFuncs := []PostProcessFunc{ - addPodVolume(storeStatefulSet.Name, corev1.Volume{ - Name: "hashmod-config-template", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: storeStatefulSet.CommonLabels[k8sutil.NameLabel], - }, - DefaultMode: &defaultMode, - }, - }, - }), - addPodVolume(storeStatefulSet.Name, corev1.Volume{ - Name: "hashmod-config", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - }), - addContainerVolumeMount(storeStatefulSet.Name, corev1.VolumeMount{ - Name: "hashmod-config", - MountPath: "/etc/config", - }), - addPodInitContainer(storeStatefulSet.Name, initContainer), - } // Store config storeStatefulSet.Options.LogLevel = common.LogLevelWarn @@ -152,5 +140,55 @@ func makeStore(namespace string, replicas int32) (*store.StoreStatefulSet, []Pos } storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. - return storeStatefulSet, postProcessFuncs + // Post process + manifests := storeStatefulSet.Manifests() + postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) + statefulset := getObject[*appsv1.StatefulSet](manifests) + defaultMode := int32(0777) + statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "hashmod-config", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, corev1.Volume{ + Name: "hashmod-config-template", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: storeStatefulSet.CommonLabels[k8sutil.NameLabel], + }, + DefaultMode: &defaultMode, + }, + }, + }) + statefulset.Spec.Template.Spec.InitContainers = append(statefulset.Spec.Template.Spec.InitContainers, initContainer) + mainContainer := &statefulset.Spec.Template.Spec.Containers[0] + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, corev1.VolumeMount{ + Name: "hashmod-config", + MountPath: "/etc/config", + }) + + return manifests +} + +type KubeObject interface { + *corev1.Service | *appsv1.StatefulSet | *monv1.ServiceMonitor +} + +func getObject[T KubeObject](manifests k8sutil.ObjectMap) T { + for _, obj := range manifests { + if service, ok := obj.(T); ok { + return service + } + } + + panic(fmt.Sprintf("could not find object of type %T", *new(T))) +} + +func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor) { + serviceMonitor.ObjectMeta.Namespace = monitoringNamespace + // Same labels map is shared between all objects in the manifests. Need to clone it to avoid modifying all. + labels := maps.Clone(serviceMonitor.ObjectMeta.Labels) + labels["prometheus"] = "app-sre" + serviceMonitor.ObjectMeta.Labels = labels } diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 8030b03b49..acc117f1d3 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -6,19 +6,10 @@ import ( "github.com/bwplotka/mimic" "github.com/bwplotka/mimic/encoding" "github.com/observatorium/api/rbac" - "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" - "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/openshift" templatev1 "github.com/openshift/api/template/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -const ( - thanosImage = "quay.io/thanos/thanos" - thanosImageTag = "v0.32.3" - monitoringNamespace = "openshift-monitoring" ) // TenantInstanceConfiguration is the configuration for a single tenant in an instance of observatorium. @@ -39,31 +30,20 @@ type InstanceConfiguration struct { ThanosStoreReplicas int32 } -// PostProcessFunc is a function that can be applied to a Kubernetes object after it has been generated. -type PostProcessFunc func(obj runtime.Object) - // Observatorium is a representation of an instance of observatorium. // It contains all the components that make up the instance. type Observatorium struct { - Cfg *InstanceConfiguration - Compactor *compactor.CompactorStatefulSet - Store *store.StoreStatefulSet - PostProcessFuncs []PostProcessFunc + Cfg *InstanceConfiguration + Compactor k8sutil.ObjectMap + Store k8sutil.ObjectMap } // NewObservatorium creates a new instance of observatorium. func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { - postProcessFuncs := []PostProcessFunc{updateServiceMonitorNamespace} - storeComponent, postProcess := makeStore(cfg.Namespace, cfg.ThanosStoreReplicas) - postProcessFuncs = append(postProcessFuncs, postProcess...) - compactorComponent, postProcess := makeCompactor(cfg.Namespace) - postProcessFuncs = append(postProcessFuncs, postProcess...) - return &Observatorium{ - Cfg: cfg, - Compactor: compactorComponent, - Store: storeComponent, - PostProcessFuncs: postProcessFuncs, + Cfg: cfg, + Compactor: makeCompactor(cfg.Namespace), + Store: makeStore(cfg.Namespace, cfg.ThanosStoreReplicas), } } @@ -73,24 +53,14 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { name string objects k8sutil.ObjectMap }{ - {"observatorium-metrics-compact", o.Compactor.Manifests()}, - {"observatorium-metrics-store", o.Store.Manifests()}, + {"observatorium-metrics-compact", o.Compactor}, + {"observatorium-metrics-store", o.Store}, } for _, component := range components { - o.postProcess(component.objects) template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ Name: component.name, }, []templatev1.Parameter{}) generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add(component.name+"-template.yaml", encoding.GhodssYAML(template[""])) } } - -// postProcess applies all the post process functions to the manifests. -func (o *Observatorium) postProcess(manifests k8sutil.ObjectMap) { - for _, manifest := range manifests { - for _, postProcessFunc := range o.PostProcessFuncs { - postProcessFunc(manifest) - } - } -} diff --git a/services_go/observatorium/postprocess.go b/services_go/observatorium/postprocess.go deleted file mode 100644 index 6374ed767d..0000000000 --- a/services_go/observatorium/postprocess.go +++ /dev/null @@ -1,114 +0,0 @@ -package observatorium - -import ( - monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -const ( - servingCertSecretNameAnnotation = "service.alpha.openshift.io/serving-cert-secret-name" -) - -// updateServiceMonitorNamespace updates the namespace of all ServiceMonitor objects. -// This is useful when the ServiceMonitor must be deployed in a different namespace than observatorium. -func updateServiceMonitorNamespace(obj runtime.Object) { - if serviceMonitor, ok := obj.(*monv1.ServiceMonitor); ok { - serviceMonitor.ObjectMeta.Namespace = monitoringNamespace - } -} - -// addAnnotation adds an annotation to an object. -// The object is filtered by objectType and objectName. -func addAnnotation(objectType, objectName, key, value string) func(object runtime.Object) { - return func(object runtime.Object) { - if object.GetObjectKind().GroupVersionKind().Kind != objectType { - return - } - - objectMeta, ok := object.(metav1.ObjectMetaAccessor) - if !ok { - return - } - - if objectMeta.GetObjectMeta().GetName() != objectName { - return - } - - if objectMeta.GetObjectMeta().GetAnnotations() == nil { - objectMeta.GetObjectMeta().SetAnnotations(map[string]string{}) - } - - objectMeta.GetObjectMeta().GetAnnotations()[key] = value - } -} - -// addPodContainer adds an init container to a pod. -func addPodInitContainer(objectName string, container corev1.Container) func(object runtime.Object) { - return func(object runtime.Object) { - name, pod := getPodFromObject(object) - if pod == nil { - return - } - - if name != objectName { - return - } - - pod.Spec.InitContainers = append(pod.Spec.InitContainers, container) - } -} - -// addPodContainer adds a volume to a pod whose deployment/statefulset name is objectName. -func addPodVolume(objectName string, volume corev1.Volume) func(object runtime.Object) { - return func(object runtime.Object) { - name, pod := getPodFromObject(object) - - if pod == nil { - return - } - - if name != objectName { - return - } - - pod.Spec.Volumes = append(pod.Spec.Volumes, volume) - } -} - -// addContainerVolumeMount adds a volume mount to the main container whose deployment/statefulset name is objectName. -// The main container is the first container in the pod. -func addContainerVolumeMount(objectName string, volumeMount corev1.VolumeMount) func(object runtime.Object) { - return func(object runtime.Object) { - name, pod := getPodFromObject(object) - - if pod == nil { - return - } - - if name != objectName { - return - } - - container := &pod.Spec.Containers[0] - - container.VolumeMounts = append(container.VolumeMounts, volumeMount) - } -} - -func getPodFromObject(object runtime.Object) (string, *corev1.PodTemplateSpec) { - switch object.GetObjectKind().GroupVersionKind().Kind { - case "Deployment": - if deployment, ok := object.(*appsv1.Deployment); ok { - return deployment.ObjectMeta.Name, &deployment.Spec.Template - } - case "StatefulSet": - if statefulSet, ok := object.(*appsv1.StatefulSet); ok { - return statefulSet.ObjectMeta.Name, &statefulSet.Spec.Template - } - } - - return "", nil -} diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 74a9eafb38..e35c3da17f 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -14,7 +14,7 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str return &k8sutil.Container{ Name: "oauth-proxy", Image: "quay.io/openshift/origin-oauth-proxy", - ImageTag: "v4.8.0", + ImageTag: "v4.13.0", Args: []string{ "-provider=openshift", fmt.Sprintf("-https-address=:%d", proxyPort), From 7d229f7c90ec689bcd1b83d74d837795eba8eb24 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 09:40:23 +0200 Subject: [PATCH 12/32] remove status section from yaml Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 3 -- .../observatorium-metrics-store-template.yaml | 3 -- ...bservatorium-metrics-compact-template.yaml | 3 -- .../observatorium-metrics-store-template.yaml | 3 -- services_go/observatorium/observatorium.go | 36 ++++++++++++++++++- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 3f57df5ea2..acf4a392d9 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -275,6 +275,3 @@ objects: storage: 500Gi storageClassName: gp2 status: {} - status: - availableReplicas: 0 - replicas: 0 diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 198dfdd901..e9d4f5f067 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -294,6 +294,3 @@ objects: storage: 500Gi storageClassName: gp2 status: {} - status: - availableReplicas: 0 - replicas: 0 diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 3f57df5ea2..acf4a392d9 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -275,6 +275,3 @@ objects: storage: 500Gi storageClassName: gp2 status: {} - status: - availableReplicas: 0 - replicas: 0 diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 064ba2196c..fa3dae8464 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -294,6 +294,3 @@ objects: storage: 500Gi storageClassName: gp2 status: {} - status: - availableReplicas: 0 - replicas: 0 diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index acc117f1d3..8ae2b23941 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -3,6 +3,10 @@ package observatorium // import "github.com/rhobs/configuration/services_go/components/thanos/compactor" import ( + "bytes" + "io" + "regexp" + "github.com/bwplotka/mimic" "github.com/bwplotka/mimic/encoding" "github.com/observatorium/api/rbac" @@ -61,6 +65,36 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ Name: component.name, }, []templatev1.Parameter{}) - generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add(component.name+"-template.yaml", encoding.GhodssYAML(template[""])) + generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add(component.name+"-template.yaml", &customYAML{encoder: encoding.GhodssYAML(template[""])}) + } +} + +// customYAML is a YAML encoder wrapper that allows cleaning of the output. +type customYAML struct { + encoder encoding.Encoder + reader io.Reader +} + +func (c *customYAML) Read(p []byte) (n int, err error) { + if c.reader == nil { + ret, err := io.ReadAll(c.encoder) + if err != nil { + panic(err) + } + + c.reader = bytes.NewBuffer(c.clean(ret)) } + + return c.reader.Read(p) +} + +func (c *customYAML) EncodeComment(lines string) []byte { + return c.encoder.EncodeComment(lines) +} + +func (c *customYAML) clean(input []byte) []byte { + // Remove status section from manifests + re := regexp.MustCompile(`\s*status:\n\s*availableReplicas: 0\n\s*replicas: 0`) + ret := re.ReplaceAllString(string(input), "") + return []byte(ret) } From 68d1289bbe9962094d3bb533217cb7d886404901 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 12:15:57 +0200 Subject: [PATCH 13/32] fix anti affinity namespace Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../rhobs/observatorium-metrics-compact-template.yaml | 2 -- .../rhobs/observatorium-metrics-store-template.yaml | 2 -- .../rhobs/observatorium-metrics-compact-template.yaml | 2 -- .../rhobs/observatorium-metrics-store-template.yaml | 2 -- services_go/observatorium/metrics.go | 6 ++++-- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index acf4a392d9..edc513bc58 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -139,8 +139,6 @@ objects: operator: In values: - observatorium - namespaces: - - observatorium topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index e9d4f5f067..dfc8a88028 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -140,8 +140,6 @@ objects: operator: In values: - observatorium - namespaces: - - observatorium topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index acf4a392d9..edc513bc58 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -139,8 +139,6 @@ objects: operator: In values: - observatorium - namespaces: - - observatorium topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index fa3dae8464..8ad4b35d6a 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -140,8 +140,6 @@ objects: operator: In values: - observatorium - namespaces: - - observatorium topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 331e24c874..7e89717d82 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -35,6 +35,7 @@ func makeCompactor(namespace string) k8sutil.ObjectMap { compactorSatefulset.Image = thanosImage compactorSatefulset.ImageTag = thanosImageTag compactorSatefulset.Namespace = namespace + compactorSatefulset.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} compactorSatefulset.Replicas = 1 delete(compactorSatefulset.PodResources.Limits, corev1.ResourceCPU) // To be confirmed compactorSatefulset.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") @@ -77,6 +78,7 @@ func makeStore(namespace string, replicas int32) k8sutil.ObjectMap { storeStatefulSet.Image = thanosImage storeStatefulSet.ImageTag = thanosImageTag storeStatefulSet.Namespace = namespace + storeStatefulSet.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} storeStatefulSet.Replicas = replicas delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) // To be confirmed storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("4") @@ -171,11 +173,11 @@ func makeStore(namespace string, replicas int32) k8sutil.ObjectMap { return manifests } -type KubeObject interface { +type kubeObject interface { *corev1.Service | *appsv1.StatefulSet | *monv1.ServiceMonitor } -func getObject[T KubeObject](manifests k8sutil.ObjectMap) T { +func getObject[T kubeObject](manifests k8sutil.ObjectMap) T { for _, obj := range manifests { if service, ok := obj.(T); ok { return service From e211a99abb51f30d6ca76f85fa2498db01aef4f6 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 15:10:06 +0200 Subject: [PATCH 14/32] update tracing sidecar reporter host Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- services_go/observatorium/sidecars.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index e35c3da17f..051d738764 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -76,7 +76,7 @@ func makeJaegerAgent(collectorNamespace string) *k8sutil.Container { Image: "quay.io/app-sre/jaegertracing-jaeger-agent", ImageTag: "1.22.0", Args: []string{ - fmt.Sprintf("--reporter.grpc.host-port=dns:///jaeger-collector-headless.%s.svc:14250", collectorNamespace), + fmt.Sprintf("--reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.%s.svc:14250", collectorNamespace), "--reporter.type=grpc", "--agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD)", }, From 5b4510f671109e7d16bbf5a48ccb0e959b72b51b Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 15:13:46 +0200 Subject: [PATCH 15/32] add preManifests hooks Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 2 +- .../observatorium-metrics-store-template.yaml | 6 ++-- .../observatorium-metrics-store-template.yaml | 4 +-- services_go/instances/rhobs/rhobs.go | 35 +++++++++++++------ services_go/observatorium/metrics.go | 11 ++++-- services_go/observatorium/observatorium.go | 23 ++++++++---- 6 files changed, 56 insertions(+), 25 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index edc513bc58..e96d1c3465 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -150,7 +150,7 @@ objects: - --delete-delay=24h0m0s - --downsample.concurrency=1 - --log.format=logfmt - - --log.level=warn + - --log.level=info - --objstore.config=$(OBJSTORE_CONFIG) - --retention.resolution-1h=8760h0m0s - --retention.resolution-5m=8760h0m0s diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index dfc8a88028..30f64e209c 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -148,7 +148,7 @@ objects: - --data-dir=/var/thanos/store - --ignore-deletion-marks-delay=24h0m0s - --log.format=logfmt - - --log.level=warn + - --log.level=info - --max-time=-22h0m0s - --objstore.config=$(OBJSTORE_CONFIG) - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml @@ -202,7 +202,7 @@ objects: - mountPath: /etc/config name: hashmod-config - args: - - --reporter.grpc.host-port=dns:///jaeger-collector-headless.observatorium-tools.svc:14250 + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - --reporter.type=grpc - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) env: @@ -249,7 +249,7 @@ objects: - /tmp/entrypoint/entrypoint.sh env: - name: THANOS_STORE_REPLICAS - value: "2" + value: "1" image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 8ad4b35d6a..b67874301b 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -202,7 +202,7 @@ objects: - mountPath: /etc/config name: hashmod-config - args: - - --reporter.grpc.host-port=dns:///jaeger-collector-headless.observatorium-tools.svc:14250 + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 - --reporter.type=grpc - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) env: @@ -249,7 +249,7 @@ objects: - /tmp/entrypoint/entrypoint.sh env: - name: THANOS_STORE_REPLICAS - value: "3" + value: "1" image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index cfad9b4280..be0c86263b 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -1,24 +1,39 @@ package rhobs import ( + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" "github.com/rhobs/configuration/services_go/observatorium" ) func ClusterConfigs() []observatorium.InstanceConfiguration { return []observatorium.InstanceConfiguration{ { - Cluster: "app-sre-stage-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, - ThanosStoreReplicas: 2, + Cluster: "app-sre-stage-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + PreManifestsHooks: observatorium.PreManifestsHooks{ + ThanosStore: func(store *store.StoreStatefulSet) { + store.Replicas = 2 + store.Options.LogLevel = common.LogLevelInfo + }, + Compactor: func(compactor *compactor.CompactorStatefulSet) { + compactor.Options.LogLevel = common.LogLevelInfo + }, + }, }, { - Cluster: "telemeter-prod-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, - ThanosStoreReplicas: 3, + Cluster: "telemeter-prod-01", + Namespace: "rhobs", + Instance: "rhobs", + Tenants: []observatorium.TenantInstanceConfiguration{}, + PreManifestsHooks: observatorium.PreManifestsHooks{ + ThanosStore: func(store *store.StoreStatefulSet) { + store.Replicas = 3 + }, + }, }, } } diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 7e89717d82..a43449bd15 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -29,7 +29,7 @@ const ( //go:embed assets/store-auto-shard-relabel-configMap.sh var storeAutoShardRelabelConfigMap string -func makeCompactor(namespace string) k8sutil.ObjectMap { +func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -63,6 +63,9 @@ func makeCompactor(namespace string) k8sutil.ObjectMap { compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") // Post process + if preManifestsHook != nil { + preManifestsHook(compactorSatefulset) + } manifests := compactorSatefulset.Manifests() service := getObject[*corev1.Service](manifests) service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret @@ -72,8 +75,9 @@ func makeCompactor(namespace string) k8sutil.ObjectMap { } -func makeStore(namespace string, replicas int32) k8sutil.ObjectMap { +func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { // K8s config + replicas := int32(1) storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage storeStatefulSet.ImageTag = thanosImageTag @@ -143,6 +147,9 @@ func makeStore(namespace string, replicas int32) k8sutil.ObjectMap { storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. // Post process + if preManifestHook != nil { + preManifestHook(storeStatefulSet) + } manifests := storeStatefulSet.Manifests() postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) statefulset := getObject[*appsv1.StatefulSet](manifests) diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 8ae2b23941..2982b028b7 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -10,6 +10,8 @@ import ( "github.com/bwplotka/mimic" "github.com/bwplotka/mimic/encoding" "github.com/observatorium/api/rbac" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" + "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/openshift" templatev1 "github.com/openshift/api/template/v1" @@ -25,13 +27,20 @@ type TenantInstanceConfiguration struct { // Tenant *obs_api.tenant } +// PreManifestsHooks is a collection of hooks that can be used to modify the manifests before they are generated. +// This provides the instance configuration with the ability to customize each component deployed. +type PreManifestsHooks struct { + ThanosStore func(*store.StoreStatefulSet) + Compactor func(*compactor.CompactorStatefulSet) +} + // InstanceConfiguration is the configuration for an instance of observatorium. type InstanceConfiguration struct { - Cluster string - Instance string - Namespace string - Tenants []TenantInstanceConfiguration - ThanosStoreReplicas int32 + Cluster string + Instance string + Namespace string + Tenants []TenantInstanceConfiguration + PreManifestsHooks PreManifestsHooks } // Observatorium is a representation of an instance of observatorium. @@ -46,8 +55,8 @@ type Observatorium struct { func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { return &Observatorium{ Cfg: cfg, - Compactor: makeCompactor(cfg.Namespace), - Store: makeStore(cfg.Namespace, cfg.ThanosStoreReplicas), + Compactor: makeCompactor(cfg.Namespace, cfg.PreManifestsHooks.Compactor), + Store: makeStore(cfg.Namespace, cfg.PreManifestsHooks.ThanosStore), } } From ee9c31909545e10dce499d547d035f6e6ace639b Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 16:06:58 +0200 Subject: [PATCH 16/32] update store sharding Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../observatorium-metrics-store-template.yaml | 50 ++++++++++++++-- .../observatorium-metrics-store-template.yaml | 50 ++++++++++++++-- .../store-auto-shard-relabel-configMap.sh | 5 +- services_go/observatorium/metrics.go | 58 +++++++++++++++++-- services_go/observatorium/observatorium.go | 18 +++--- 5 files changed, 153 insertions(+), 28 deletions(-) mode change 100644 => 100755 services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 30f64e209c..2b1adeca8b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -4,13 +4,51 @@ metadata: creationTimestamp: null name: observatorium-metrics-store objects: +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: list-pods + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: list-pods + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store + namespace: rhobs - apiVersion: v1 data: entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n\n# - Logging parameters\necho \"generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} - THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}\"\n\ncat + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" @@ -248,8 +286,10 @@ objects: - args: - /tmp/entrypoint/entrypoint.sh env: - - name: THANOS_STORE_REPLICAS - value: "1" + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index b67874301b..d3b4279a01 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -4,13 +4,51 @@ metadata: creationTimestamp: null name: observatorium-metrics-store objects: +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: list-pods + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: list-pods + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store + namespace: rhobs - apiVersion: v1 data: entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following convention \"-\". \n# This parameter expansion removes - all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n\n# - Logging parameters\necho \"generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} - THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}\"\n\ncat + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" @@ -248,8 +286,10 @@ objects: - args: - /tmp/entrypoint/entrypoint.sh env: - - name: THANOS_STORE_REPLICAS - value: "1" + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace image: quay.io/app-sre/ubi8-ubi imagePullPolicy: IfNotPresent name: init-hashmod-file diff --git a/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh b/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh old mode 100644 new mode 100755 index 30604acd77..2a6e47ae93 --- a/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh +++ b/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh @@ -3,9 +3,12 @@ # Kubernetes replicas are named with the following convention "-". # This parameter expansion removes all characters until the last hyphen, capturing only the ordinal. export ORDINAL_INDEX=${HOSTNAME##*-} +# This parameter expansion removes all characters after the last hyphen, capturing only the statefulset name. +export STATEFULSET_NAME="${HOSTNAME%-*}" +export THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} -o=jsonpath='{.status.replicas}') # Logging parameters -echo "generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS} HOSTNAME=${HOSTNAME}" +echo "generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}" cat </tmp/config/hashmod-config.yaml - action: hashmod diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index a43449bd15..38293e2bc0 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -4,7 +4,6 @@ import ( _ "embed" "fmt" "maps" - "strconv" "time" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" @@ -16,7 +15,9 @@ import ( monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -29,6 +30,7 @@ const ( //go:embed assets/store-auto-shard-relabel-configMap.sh var storeAutoShardRelabelConfigMap string +// makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { // K8s config compactorSatefulset := compactor.NewCompactor() @@ -75,15 +77,15 @@ func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorS } +// makeStore creates a base store component that can be derived from using the preManifestsHook. func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { // K8s config - replicas := int32(1) storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage storeStatefulSet.ImageTag = thanosImageTag storeStatefulSet.Namespace = namespace storeStatefulSet.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} - storeStatefulSet.Replicas = replicas + storeStatefulSet.Replicas = 1 delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) // To be confirmed storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("4") storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("20Gi") @@ -113,8 +115,12 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) }, Env: []corev1.EnvVar{ { - Name: "THANOS_STORE_REPLICAS", - Value: strconv.Itoa(int(replicas)), + Name: "NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, }, }, VolumeMounts: []corev1.VolumeMount{ @@ -177,11 +183,51 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) MountPath: "/etc/config", }) + // add rbac for reading the number of replicas from the statefulset in the initContainer + labels := maps.Clone(statefulset.ObjectMeta.Labels) + delete(labels, k8sutil.VersionLabel) + manifests["list-pods-rbac"] = &rbacv1.Role{ + // ObjectMeta: + ObjectMeta: metav1.ObjectMeta{ + Name: "list-pods", + Namespace: namespace, + Labels: labels, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"apps"}, + Resources: []string{"statefulsets"}, + Verbs: []string{"get", "list"}, + }, + }, + } + manifests["list-pods-rbac-binding"] = &rbacv1.RoleBinding{ + // ObjectMeta: + ObjectMeta: metav1.ObjectMeta{ + Name: "list-pods", + Namespace: namespace, + Labels: labels, + }, + Subjects: []rbacv1.Subject{ + { + + Kind: "ServiceAccount", + Name: statefulset.Spec.Template.Spec.ServiceAccountName, + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: "list-pods", + APIGroup: "rbac.authorization.k8s.io", + }, + } + return manifests } type kubeObject interface { - *corev1.Service | *appsv1.StatefulSet | *monv1.ServiceMonitor + *corev1.Service | *appsv1.StatefulSet | *monv1.ServiceMonitor | *corev1.ServiceAccount } func getObject[T kubeObject](manifests k8sutil.ObjectMap) T { diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 2982b028b7..e426da23d6 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -43,20 +43,16 @@ type InstanceConfiguration struct { PreManifestsHooks PreManifestsHooks } -// Observatorium is a representation of an instance of observatorium. -// It contains all the components that make up the instance. +// Observatorium is an instance of observatorium. +// It contains the configuration for the instance and the ability to generate the manifests for the instance. type Observatorium struct { - Cfg *InstanceConfiguration - Compactor k8sutil.ObjectMap - Store k8sutil.ObjectMap + cfg *InstanceConfiguration } // NewObservatorium creates a new instance of observatorium. func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { return &Observatorium{ - Cfg: cfg, - Compactor: makeCompactor(cfg.Namespace, cfg.PreManifestsHooks.Compactor), - Store: makeStore(cfg.Namespace, cfg.PreManifestsHooks.ThanosStore), + cfg: cfg, } } @@ -66,15 +62,15 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { name string objects k8sutil.ObjectMap }{ - {"observatorium-metrics-compact", o.Compactor}, - {"observatorium-metrics-store", o.Store}, + {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.PreManifestsHooks.Compactor)}, + {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.PreManifestsHooks.ThanosStore)}, } for _, component := range components { template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ Name: component.name, }, []templatev1.Parameter{}) - generator.With(o.Cfg.Cluster, o.Cfg.Instance).Add(component.name+"-template.yaml", &customYAML{encoder: encoding.GhodssYAML(template[""])}) + generator.With(o.cfg.Cluster, o.cfg.Instance).Add(component.name+"-template.yaml", &customYAML{encoder: encoding.GhodssYAML(template[""])}) } } From 12768483efea193ae88d32f7ca2964b241dc3344 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 16:52:53 +0200 Subject: [PATCH 17/32] add pdb Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../observatorium-metrics-store-template.yaml | 17 ++++++++++++++ .../observatorium-metrics-store-template.yaml | 17 ++++++++++++++ services_go/observatorium/metrics.go | 23 +++++++++++++++++-- services_go/observatorium/observatorium.go | 13 +++++++++-- 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 2b1adeca8b..6e7bd9e2a3 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -63,6 +63,23 @@ objects: app.kubernetes.io/version: v0.32.3 name: hashmod-config-template namespace: rhobs +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: store + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index d3b4279a01..49bd3b5ab3 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -63,6 +63,23 @@ objects: app.kubernetes.io/version: v0.32.3 name: hashmod-config-template namespace: rhobs +- metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + name: store + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium - apiVersion: v1 kind: Service metadata: diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 38293e2bc0..71aa9aa021 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -15,9 +15,11 @@ import ( monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) const ( @@ -187,7 +189,6 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) labels := maps.Clone(statefulset.ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) manifests["list-pods-rbac"] = &rbacv1.Role{ - // ObjectMeta: ObjectMeta: metav1.ObjectMeta{ Name: "list-pods", Namespace: namespace, @@ -202,7 +203,6 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) }, } manifests["list-pods-rbac-binding"] = &rbacv1.RoleBinding{ - // ObjectMeta: ObjectMeta: metav1.ObjectMeta{ Name: "list-pods", Namespace: namespace, @@ -223,6 +223,25 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) }, } + // Add pod disruption budget + manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "store", + Namespace: namespace, + Labels: labels, + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MaxUnavailable: &intstr.IntOrString{ + + Type: intstr.Int, + IntVal: 1, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + }, + } + return manifests } diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index e426da23d6..25c0394eff 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -99,7 +99,16 @@ func (c *customYAML) EncodeComment(lines string) []byte { func (c *customYAML) clean(input []byte) []byte { // Remove status section from manifests - re := regexp.MustCompile(`\s*status:\n\s*availableReplicas: 0\n\s*replicas: 0`) - ret := re.ReplaceAllString(string(input), "") + re := []*regexp.Regexp{ + regexp.MustCompile(`\s*status:\n\s*availableReplicas: 0\n\s*replicas: 0`), + regexp.MustCompile(`\s*status:\n\s*currentHealthy: 0\n\s*desiredHealthy: 0\n\s*disruptionsAllowed: 0\n\s*expectedPods: 0`), + } + + ret := input + + for _, r := range re { + ret = r.ReplaceAll(ret, []byte{}) + } + return []byte(ret) } From a34478a6c42ade064d90d3c8b11c3ba556869256 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:08:38 +0200 Subject: [PATCH 18/32] set oauth proxy secret as param Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../observatorium-metrics-compact-template.yaml | 5 +++++ .../observatorium-metrics-compact-template.yaml | 5 +++++ services_go/observatorium/observatorium.go | 14 +++++++++++--- services_go/observatorium/sidecars.go | 1 + 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index e96d1c3465..13dd15c574 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -216,6 +216,7 @@ objects: - -tls-cert=/etc/tls/private/tls.crt - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt @@ -273,3 +274,7 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index edc513bc58..29393dc14c 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -216,6 +216,7 @@ objects: - -tls-cert=/etc/tls/private/tls.crt - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt @@ -273,3 +274,7 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 25c0394eff..9d66dd5d90 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -61,20 +61,28 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { components := []struct { name string objects k8sutil.ObjectMap + params []templatev1.Parameter }{ - {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.PreManifestsHooks.Compactor)}, - {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.PreManifestsHooks.ThanosStore)}, + {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.PreManifestsHooks.Compactor), []templatev1.Parameter{ + { + Name: "OAUTH_PROXY_COOKIE_SECRET", + Generate: "expression", + From: "[a-zA-Z0-9]{40}", + }, + }}, + {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.PreManifestsHooks.ThanosStore), []templatev1.Parameter{}}, } for _, component := range components { template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ Name: component.name, - }, []templatev1.Parameter{}) + }, component.params) generator.With(o.cfg.Cluster, o.cfg.Instance).Add(component.name+"-template.yaml", &customYAML{encoder: encoding.GhodssYAML(template[""])}) } } // customYAML is a YAML encoder wrapper that allows cleaning of the output. +// Wihtout this, the manifests would contain a status section that is not needed. type customYAML struct { encoder encoding.Encoder reader io.Reader diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 051d738764..3aab881241 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -27,6 +27,7 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str "-tls-cert=/etc/tls/private/tls.crt", "-tls-key=/etc/tls/private/tls.key", "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token", + "-cookie-secret=${OAUTH_PROXY_COOKIE_SECRET}", // replaced by openshift template parameter "-cookie-secret-file=/etc/proxy/secrets/session_secret", "-openshift-ca=/etc/pki/tls/cert.pem", "-openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", From 09f7b643066b8d4965a549bda342f4ede8ab1dae Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:18:51 +0200 Subject: [PATCH 19/32] fix type meta Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 19 +++++++++ .../observatorium-metrics-store-template.yaml | 14 +++++-- ...bservatorium-metrics-compact-template.yaml | 19 +++++++++ .../observatorium-metrics-store-template.yaml | 14 +++++-- services_go/observatorium/metrics.go | 39 ++++++++++++++++++- 5 files changed, 96 insertions(+), 9 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 13dd15c574..a53d05d8c5 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -274,6 +274,25 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-compact + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 6e7bd9e2a3..e3511f4e1d 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -4,7 +4,9 @@ metadata: creationTimestamp: null name: observatorium-metrics-store objects: -- metadata: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -21,7 +23,9 @@ objects: verbs: - get - list -- metadata: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -63,14 +67,16 @@ objects: app.kubernetes.io/version: v0.32.3 name: hashmod-config-template namespace: rhobs -- metadata: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: store + name: observatorium-thanos-store namespace: rhobs spec: maxUnavailable: 1 diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 29393dc14c..a45a6b8099 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -274,6 +274,25 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-compact + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 49bd3b5ab3..574e162e05 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -4,7 +4,9 @@ metadata: creationTimestamp: null name: observatorium-metrics-store objects: -- metadata: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -21,7 +23,9 @@ objects: verbs: - get - list -- metadata: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway @@ -63,14 +67,16 @@ objects: app.kubernetes.io/version: v0.32.3 name: hashmod-config-template namespace: rhobs -- metadata: +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: creationTimestamp: null labels: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: store + name: observatorium-thanos-store namespace: rhobs spec: maxUnavailable: 1 diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 71aa9aa021..c3a8af34cd 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -75,6 +75,31 @@ func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorS service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) + // Add pod disruption budget + labels := maps.Clone(getObject[*appsv1.StatefulSet](manifests).ObjectMeta.Labels) + delete(labels, k8sutil.VersionLabel) + manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + TypeMeta: metav1.TypeMeta{ + Kind: "PodDisruptionBudget", + APIVersion: policyv1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: compactorSatefulset.Name, + Namespace: namespace, + Labels: labels, + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MaxUnavailable: &intstr.IntOrString{ + + Type: intstr.Int, + IntVal: 1, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + }, + } + return manifests } @@ -189,6 +214,10 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) labels := maps.Clone(statefulset.ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) manifests["list-pods-rbac"] = &rbacv1.Role{ + TypeMeta: metav1.TypeMeta{ + Kind: "Role", + APIVersion: rbacv1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ Name: "list-pods", Namespace: namespace, @@ -203,6 +232,10 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) }, } manifests["list-pods-rbac-binding"] = &rbacv1.RoleBinding{ + TypeMeta: metav1.TypeMeta{ + Kind: "RoleBinding", + APIVersion: rbacv1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ Name: "list-pods", Namespace: namespace, @@ -225,8 +258,12 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) // Add pod disruption budget manifests["store-pdb"] = &policyv1.PodDisruptionBudget{ + TypeMeta: metav1.TypeMeta{ + Kind: "PodDisruptionBudget", + APIVersion: policyv1.SchemeGroupVersion.String(), + }, ObjectMeta: metav1.ObjectMeta{ - Name: "store", + Name: storeStatefulSet.Name, Namespace: namespace, Labels: labels, }, From 73a6ce430efdde5d0e8efccdfbcf39300bfc7299 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:55:48 +0200 Subject: [PATCH 20/32] remove oauth secret Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 22 ++++--------------- ...bservatorium-metrics-compact-template.yaml | 14 ------------ services_go/observatorium/sidecars.go | 5 ----- 3 files changed, 4 insertions(+), 37 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index a53d05d8c5..f47e817568 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -4,20 +4,6 @@ metadata: creationTimestamp: null name: observatorium-metrics-compact objects: -- apiVersion: v1 - data: - session_secret: c2VjcmV0 - kind: Secret - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: compact-proxy - namespace: rhobs - apiVersion: v1 kind: Service metadata: @@ -131,14 +117,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-compact - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index a45a6b8099..df09678c9e 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -4,20 +4,6 @@ metadata: creationTimestamp: null name: observatorium-metrics-compact objects: -- apiVersion: v1 - data: - session_secret: c2VjcmV0 - kind: Secret - metadata: - creationTimestamp: null - labels: - app.kubernetes.io/component: database-compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: thanos-compact - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: compact-proxy - namespace: rhobs - apiVersion: v1 kind: Service metadata: diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 3aab881241..7e7a706571 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -59,11 +59,6 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str k8sutil.NewPodVolumeFromSecret("compact-tls", tlsSecret), k8sutil.NewPodVolumeFromSecret("compact-proxy", "compact-proxy"), }, - Secrets: map[string]map[string][]byte{ - "compact-proxy": { - "session_secret": []byte("secret"), - }, - }, } } From 9725b92fd8a1d7b3918b3f59756d8afd1a4ead03 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:58:48 +0200 Subject: [PATCH 21/32] remove cookie file config in oauth proxy Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .../observatorium-metrics-compact-template.yaml | 15 ++++----------- .../observatorium-metrics-compact-template.yaml | 7 ------- services_go/observatorium/sidecars.go | 7 ------- 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index f47e817568..5798829c2c 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -117,14 +117,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - observatorium - key: app.kubernetes.io/name operator: In values: - thanos-compact + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium topologyKey: kubernetes.io/hostname weight: 100 containers: @@ -203,7 +203,6 @@ objects: - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt image: quay.io/openshift/origin-oauth-proxy:v4.13.0 @@ -224,9 +223,6 @@ objects: - mountPath: /etc/tls/private name: compact-tls readOnly: true - - mountPath: /etc/proxy/secrets - name: compact-proxy - readOnly: true nodeSelector: kubernetes.io/os: linux securityContext: @@ -238,9 +234,6 @@ objects: - name: compact-tls secret: secretName: compact-tls - - name: compact-proxy - secret: - secretName: compact-proxy updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index df09678c9e..19759c203a 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -203,7 +203,6 @@ objects: - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} - - -cookie-secret-file=/etc/proxy/secrets/session_secret - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt image: quay.io/openshift/origin-oauth-proxy:v4.13.0 @@ -224,9 +223,6 @@ objects: - mountPath: /etc/tls/private name: compact-tls readOnly: true - - mountPath: /etc/proxy/secrets - name: compact-proxy - readOnly: true nodeSelector: kubernetes.io/os: linux securityContext: @@ -238,9 +234,6 @@ objects: - name: compact-tls secret: secretName: compact-tls - - name: compact-proxy - secret: - secretName: compact-proxy updateStrategy: {} volumeClaimTemplates: - metadata: diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 7e7a706571..8fd69e9028 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -28,7 +28,6 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str "-tls-key=/etc/tls/private/tls.key", "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token", "-cookie-secret=${OAUTH_PROXY_COOKIE_SECRET}", // replaced by openshift template parameter - "-cookie-secret-file=/etc/proxy/secrets/session_secret", "-openshift-ca=/etc/pki/tls/cert.pem", "-openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", }, @@ -49,15 +48,9 @@ func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret str MountPath: "/etc/tls/private", ReadOnly: true, }, - { - Name: "compact-proxy", - MountPath: "/etc/proxy/secrets", - ReadOnly: true, - }, }, Volumes: []corev1.Volume{ k8sutil.NewPodVolumeFromSecret("compact-tls", tlsSecret), - k8sutil.NewPodVolumeFromSecret("compact-proxy", "compact-proxy"), }, } } From 27e2a59aa92d542f730d55ee75ab89952054425f Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Mon, 2 Oct 2023 15:24:06 +0200 Subject: [PATCH 22/32] upgrade observatorium dep for fixes Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 3 ++- go.sum | 2 ++ .../observatorium-metrics-compact-template.yaml | 8 ++++---- .../observatorium-metrics-store-template.yaml | 15 +++++++++++---- .../observatorium-metrics-compact-template.yaml | 8 ++++---- .../observatorium-metrics-store-template.yaml | 15 +++++++++++---- services_go/observatorium/metrics.go | 12 +++++------- 7 files changed, 39 insertions(+), 24 deletions(-) diff --git a/go.mod b/go.mod index 6d43d5839d..dccefed668 100644 --- a/go.mod +++ b/go.mod @@ -84,4 +84,5 @@ require ( sigs.k8s.io/yaml v1.3.0 // indirect ) -replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910 +// Delete when https://github.com/observatorium/observatorium/pull/543 is merged to main branch +replace github.com/observatorium/observatorium => github.com/thibaultmg/observatorium v0.0.0-20231002130052-3e0c7849c81f diff --git a/go.sum b/go.sum index ad7f29602d..e5a1462d7a 100644 --- a/go.sum +++ b/go.sum @@ -1259,6 +1259,8 @@ github.com/thanos-io/thanos v0.32.2 h1:W9vzOUdiIBKUc947IbTPAj4Lnv5r4MP8iXuUF1G1G github.com/thanos-io/thanos v0.32.2/go.mod h1:zfpzKCtqaqCy1D9/6ksZfL+U+KKt8mkcX6v3btuDHgg= github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910 h1:HKkk7aBcPd4542+YTo9bJKS+IYCoGJTM8eOt/tM8+2c= github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910/go.mod h1:Bfegly2FCsugSsgbEQ+2wGW14Hj6pH7yaE+vQA/dw2M= +github.com/thibaultmg/observatorium v0.0.0-20231002130052-3e0c7849c81f h1:bs2Q0quoMiEXncDLKSyzzj2K57zwE64aIebMLxWQ83s= +github.com/thibaultmg/observatorium v0.0.0-20231002130052-3e0c7849c81f/go.mod h1:Bfegly2FCsugSsgbEQ+2wGW14Hj6pH7yaE+vQA/dw2M= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tinylib/msgp v1.0.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 5798829c2c..4fa0f93116 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -117,14 +117,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-compact - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index e3511f4e1d..bfc9da284b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -104,6 +104,10 @@ objects: port: 10902 protocol: TCP targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 selector: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium @@ -193,14 +197,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-store - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store topologyKey: kubernetes.io/hostname weight: 100 containers: @@ -244,6 +248,9 @@ objects: - containerPort: 10902 name: http protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP readinessProbe: failureThreshold: 20 httpGet: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 19759c203a..5762fd4abe 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -117,14 +117,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-compact - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact topologyKey: kubernetes.io/hostname weight: 100 containers: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index 574e162e05..c920844310 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -104,6 +104,10 @@ objects: port: 10902 protocol: TCP targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 selector: app.kubernetes.io/component: object-store-gateway app.kubernetes.io/instance: observatorium @@ -193,14 +197,14 @@ objects: - podAffinityTerm: labelSelector: matchExpressions: - - key: app.kubernetes.io/name - operator: In - values: - - thanos-store - key: app.kubernetes.io/instance operator: In values: - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store topologyKey: kubernetes.io/hostname weight: 100 containers: @@ -244,6 +248,9 @@ objects: - containerPort: 10902 name: http protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP readinessProbe: failureThreshold: 20 httpGet: diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index c3a8af34cd..989c9608e6 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -73,7 +73,7 @@ func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorS manifests := compactorSatefulset.Manifests() service := getObject[*corev1.Service](manifests) service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret - postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) + postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests), compactorSatefulset.Namespace) // Add pod disruption budget labels := maps.Clone(getObject[*appsv1.StatefulSet](manifests).ObjectMeta.Labels) @@ -184,7 +184,7 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) preManifestHook(storeStatefulSet) } manifests := storeStatefulSet.Manifests() - postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests)) + postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests), storeStatefulSet.Namespace) statefulset := getObject[*appsv1.StatefulSet](manifests) defaultMode := int32(0777) statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, corev1.Volume{ @@ -296,10 +296,8 @@ func getObject[T kubeObject](manifests k8sutil.ObjectMap) T { panic(fmt.Sprintf("could not find object of type %T", *new(T))) } -func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor) { +func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor, namespaceSelector string) { serviceMonitor.ObjectMeta.Namespace = monitoringNamespace - // Same labels map is shared between all objects in the manifests. Need to clone it to avoid modifying all. - labels := maps.Clone(serviceMonitor.ObjectMeta.Labels) - labels["prometheus"] = "app-sre" - serviceMonitor.ObjectMeta.Labels = labels + serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{namespaceSelector} + serviceMonitor.ObjectMeta.Labels["prometheus"] = "app-sre" } From 64d7e9da8f3e9bb7648585b9c63fe6f3a248bdd7 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:07:42 +0200 Subject: [PATCH 23/32] improve objstore config management Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 31 ++++++++-- .../observatorium-metrics-store-template.yaml | 31 ++++++++-- ...bservatorium-metrics-compact-template.yaml | 31 ++++++++-- .../observatorium-metrics-store-template.yaml | 31 ++++++++-- services_go/instances/rhobs/rhobs.go | 18 +++--- services_go/observatorium/metrics.go | 61 ++++++++++++++----- services_go/observatorium/observatorium.go | 5 +- 7 files changed, 164 insertions(+), 44 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 4fa0f93116..fade9e162c 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -144,21 +144,42 @@ objects: - --wait - --debug.max-compaction-level=3 env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: aws_access_key_id - name: rhobs-thanos-s3 + name: telemeter-tenant-s3 - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: aws_secret_access_key - name: rhobs-thanos-s3 - - name: OBJSTORE_CONFIG + name: telemeter-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: telemeter-tenant-s3 + - name: OBJ_STORE_REGION valueFrom: secretKeyRef: - key: thanos.yaml - name: rhobs-thanos-objectstorage + key: aws_region + name: telemeter-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: telemeter-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) image: quay.io/thanos/thanos:v0.32.3 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index bfc9da284b..402581e8dc 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -219,21 +219,42 @@ objects: - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml - --store.enable-index-header-lazy-reader=true env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: aws_access_key_id - name: rhobs-thanos-s3 + name: telemeter-tenant-s3 - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: aws_secret_access_key - name: rhobs-thanos-s3 - - name: OBJSTORE_CONFIG + name: telemeter-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: telemeter-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: telemeter-tenant-s3 + - name: OBJ_STORE_ENDPOINT valueFrom: secretKeyRef: - key: thanos.yaml - name: rhobs-thanos-objectstorage + key: endpoint + name: telemeter-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) image: quay.io/thanos/thanos:v0.32.3 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 5762fd4abe..0063eebf07 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -144,21 +144,42 @@ objects: - --wait - --debug.max-compaction-level=3 env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: aws_access_key_id - name: rhobs-thanos-s3 + name: telemeter-tenant-s3 - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: aws_secret_access_key - name: rhobs-thanos-s3 - - name: OBJSTORE_CONFIG + name: telemeter-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: telemeter-tenant-s3 + - name: OBJ_STORE_REGION valueFrom: secretKeyRef: - key: thanos.yaml - name: rhobs-thanos-objectstorage + key: aws_region + name: telemeter-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: telemeter-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) image: quay.io/thanos/thanos:v0.32.3 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index c920844310..ab4d481da6 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -219,21 +219,42 @@ objects: - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml - --store.enable-index-header-lazy-reader=true env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: key: aws_access_key_id - name: rhobs-thanos-s3 + name: telemeter-tenant-s3 - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: key: aws_secret_access_key - name: rhobs-thanos-s3 - - name: OBJSTORE_CONFIG + name: telemeter-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: telemeter-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: telemeter-tenant-s3 + - name: OBJ_STORE_ENDPOINT valueFrom: secretKeyRef: - key: thanos.yaml - name: rhobs-thanos-objectstorage + key: endpoint + name: telemeter-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) image: quay.io/thanos/thanos:v0.32.3 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index be0c86263b..bb9a631ceb 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -10,10 +10,11 @@ import ( func ClusterConfigs() []observatorium.InstanceConfiguration { return []observatorium.InstanceConfiguration{ { - Cluster: "app-sre-stage-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, + Cluster: "app-sre-stage-01", + Namespace: "rhobs", + Instance: "rhobs", + ObjStoreSecret: "telemeter-tenant-s3", + Tenants: []observatorium.TenantInstanceConfiguration{}, PreManifestsHooks: observatorium.PreManifestsHooks{ ThanosStore: func(store *store.StoreStatefulSet) { store.Replicas = 2 @@ -25,10 +26,11 @@ func ClusterConfigs() []observatorium.InstanceConfiguration { }, }, { - Cluster: "telemeter-prod-01", - Namespace: "rhobs", - Instance: "rhobs", - Tenants: []observatorium.TenantInstanceConfiguration{}, + Cluster: "telemeter-prod-01", + Namespace: "rhobs", + Instance: "rhobs", + ObjStoreSecret: "telemeter-tenant-s3", + Tenants: []observatorium.TenantInstanceConfiguration{}, PreManifestsHooks: observatorium.PreManifestsHooks{ ThanosStore: func(store *store.StoreStatefulSet) { store.Replicas = 3 diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 989c9608e6..e6e8898ddf 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -10,9 +10,12 @@ import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" + "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore" + objstore3 "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore/s3" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "gopkg.in/yaml.v3" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" @@ -33,7 +36,7 @@ const ( var storeAutoShardRelabelConfigMap string // makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. -func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { +func makeCompactor(namespace string, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -41,17 +44,14 @@ func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorS compactorSatefulset.Namespace = namespace compactorSatefulset.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} compactorSatefulset.Replicas = 1 - delete(compactorSatefulset.PodResources.Limits, corev1.ResourceCPU) // To be confirmed + delete(compactorSatefulset.PodResources.Limits, corev1.ResourceCPU) compactorSatefulset.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("200m") compactorSatefulset.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("1Gi") compactorSatefulset.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("5Gi") compactorSatefulset.VolumeType = "gp2" compactorSatefulset.VolumeSize = "500Gi" - compactorSatefulset.Env = []corev1.EnvVar{ - k8sutil.NewEnvFromSecret("AWS_ACCESS_KEY_ID", "rhobs-thanos-s3", "aws_access_key_id"), - k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", "rhobs-thanos-s3", "aws_secret_access_key"), - k8sutil.NewEnvFromSecret("OBJSTORE_CONFIG", "rhobs-thanos-objectstorage", "thanos.yaml"), - } + compactorSatefulset.Env = deleteObjStoreEnv(compactorSatefulset.Env) // delete the default objstore env vars + compactorSatefulset.Env = append(compactorSatefulset.Env, objStoreEnvVars(objstoreSecret)...) tlsSecret := "compact-tls" compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} @@ -105,7 +105,7 @@ func makeCompactor(namespace string, preManifestsHook func(*compactor.CompactorS } // makeStore creates a base store component that can be derived from using the preManifestsHook. -func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { +func makeStore(namespace string, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { // K8s config storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage @@ -113,17 +113,14 @@ func makeStore(namespace string, preManifestHook func(*store.StoreStatefulSet)) storeStatefulSet.Namespace = namespace storeStatefulSet.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} storeStatefulSet.Replicas = 1 - delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) // To be confirmed + delete(storeStatefulSet.PodResources.Limits, corev1.ResourceCPU) storeStatefulSet.PodResources.Requests[corev1.ResourceCPU] = resource.MustParse("4") storeStatefulSet.PodResources.Requests[corev1.ResourceMemory] = resource.MustParse("20Gi") storeStatefulSet.PodResources.Limits[corev1.ResourceMemory] = resource.MustParse("80Gi") storeStatefulSet.VolumeType = "gp2" storeStatefulSet.VolumeSize = "500Gi" - storeStatefulSet.Env = []corev1.EnvVar{ - k8sutil.NewEnvFromSecret("AWS_ACCESS_KEY_ID", "rhobs-thanos-s3", "aws_access_key_id"), - k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", "rhobs-thanos-s3", "aws_secret_access_key"), - k8sutil.NewEnvFromSecret("OBJSTORE_CONFIG", "rhobs-thanos-objectstorage", "thanos.yaml"), - } + storeStatefulSet.Env = deleteObjStoreEnv(storeStatefulSet.Env) // delete the default objstore env vars + storeStatefulSet.Env = append(storeStatefulSet.Env, objStoreEnvVars(objstoreSecret)...) storeStatefulSet.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} // Store auto-sharding using a configMap and an initContainer @@ -301,3 +298,39 @@ func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor, namespaceSe serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{namespaceSelector} serviceMonitor.ObjectMeta.Labels["prometheus"] = "app-sre" } + +func deleteObjStoreEnv(objStoreEnv []corev1.EnvVar) []corev1.EnvVar { + for i, env := range objStoreEnv { + if env.Name == "OBJSTORE_CONFIG" { + return append(objStoreEnv[:i], objStoreEnv[i+1:]...) + } + } + + return objStoreEnv +} + +func objStoreEnvVars(objstoreSecret string) []corev1.EnvVar { + objStoreCfg, err := yaml.Marshal(objstore.BucketConfig{ + Type: objstore.S3, + Config: objstore3.Config{ + Bucket: "$(OBJ_STORE_BUCKET)", + Endpoint: "$(OBJ_STORE_ENDPOINT)", + Region: "$(OBJ_STORE_REGION)", + }, + }) + if err != nil { + panic(err) + } + + return []corev1.EnvVar{ + k8sutil.NewEnvFromSecret("AWS_ACCESS_KEY_ID", objstoreSecret, "aws_access_key_id"), + k8sutil.NewEnvFromSecret("AWS_SECRET_ACCESS_KEY", objstoreSecret, "aws_secret_access_key"), + k8sutil.NewEnvFromSecret("OBJ_STORE_BUCKET", objstoreSecret, "bucket"), + k8sutil.NewEnvFromSecret("OBJ_STORE_REGION", objstoreSecret, "aws_region"), + k8sutil.NewEnvFromSecret("OBJ_STORE_ENDPOINT", objstoreSecret, "endpoint"), + { + Name: "OBJSTORE_CONFIG", + Value: string(objStoreCfg), + }, + } +} diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 9d66dd5d90..4606ee3810 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -39,6 +39,7 @@ type InstanceConfiguration struct { Cluster string Instance string Namespace string + ObjStoreSecret string Tenants []TenantInstanceConfiguration PreManifestsHooks PreManifestsHooks } @@ -63,14 +64,14 @@ func (o *Observatorium) Manifests(generator *mimic.Generator) { objects k8sutil.ObjectMap params []templatev1.Parameter }{ - {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.PreManifestsHooks.Compactor), []templatev1.Parameter{ + {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.Compactor), []templatev1.Parameter{ { Name: "OAUTH_PROXY_COOKIE_SECRET", Generate: "expression", From: "[a-zA-Z0-9]{40}", }, }}, - {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.PreManifestsHooks.ThanosStore), []templatev1.Parameter{}}, + {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.ThanosStore), []templatev1.Parameter{}}, } for _, component := range components { From 4398ae8271529ed4a256f7898e02edadefe1a7dc Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:45:06 +0200 Subject: [PATCH 24/32] add route for compactor ui Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 2 +- ...bservatorium-metrics-compact-template.yaml | 25 +++++++++++++ ...bservatorium-metrics-compact-template.yaml | 25 +++++++++++++ services_go/observatorium/metrics.go | 35 +++++++++++++++++-- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dccefed668..70e911a1e3 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/openshift/api v3.9.0+incompatible github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 github.com/pyrra-dev/pyrra v0.7.0 + gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.28.2 k8s.io/apimachinery v0.28.2 ) @@ -72,7 +73,6 @@ require ( gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.28.2 // indirect k8s.io/client-go v0.28.2 // indirect k8s.io/klog/v2 v2.100.1 // indirect diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index fade9e162c..325a133adf 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -274,6 +274,31 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-compact + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact + weight: null - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 0063eebf07..2b297a8805 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -274,6 +274,31 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + name: observatorium-thanos-compact + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact + weight: null - apiVersion: policy/v1 kind: PodDisruptionBudget metadata: diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index e6e8898ddf..4f516c64d8 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -14,6 +14,7 @@ import ( objstore3 "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore/s3" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" + routev1 "github.com/openshift/api/route/v1" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "gopkg.in/yaml.v3" appsv1 "k8s.io/api/apps/v1" @@ -36,7 +37,7 @@ const ( var storeAutoShardRelabelConfigMap string // makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. -func makeCompactor(namespace string, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { +func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -100,12 +101,42 @@ func makeCompactor(namespace string, objstoreSecret string, preManifestsHook fun }, } + // Add route for oauth-proxy + manifests["oauth-proxy-route"] = &routev1.Route{ + TypeMeta: metav1.TypeMeta{ + Kind: "Route", + APIVersion: routev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: compactorSatefulset.Name, + Namespace: namespace, + Labels: labels, + Annotations: map[string]string{ + "cert-manager.io/issuer-kind": "ClusterIssuer", + "cert-manager.io/issuer-name": "letsencrypt-prod-http", + }, + }, + Spec: routev1.RouteSpec{ + Port: &routev1.RoutePort{ + TargetPort: intstr.FromString("https"), + }, + TLS: &routev1.TLSConfig{ + Termination: routev1.TLSTerminationReencrypt, + InsecureEdgeTerminationPolicy: routev1.InsecureEdgeTerminationPolicyRedirect, + }, + To: routev1.RouteTargetReference{ + Kind: "Service", + Name: compactorSatefulset.Name, + }, + }, + } + return manifests } // makeStore creates a base store component that can be derived from using the preManifestsHook. -func makeStore(namespace string, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { +func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { // K8s config storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage From e62acfc9eea9289aeb232661113c270f1cbce36d Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:52:49 +0200 Subject: [PATCH 25/32] fix Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.mod | 2 -- services_go/observatorium/observatorium.go | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 70e911a1e3..32dc30cf2b 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,6 @@ module github.com/rhobs/configuration go 1.21 -toolchain go1.21.1 - require ( github.com/bwplotka/mimic v0.2.1-0.20230303101552-f705cca2f4a4 github.com/observatorium/api v0.1.3-0.20230711132510-96e8799ade44 diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 4606ee3810..6b83b350b3 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -111,6 +111,7 @@ func (c *customYAML) clean(input []byte) []byte { re := []*regexp.Regexp{ regexp.MustCompile(`\s*status:\n\s*availableReplicas: 0\n\s*replicas: 0`), regexp.MustCompile(`\s*status:\n\s*currentHealthy: 0\n\s*desiredHealthy: 0\n\s*disruptionsAllowed: 0\n\s*expectedPods: 0`), + regexp.MustCompile(`\s*status:\n\s*ingress: null`), } ret := input From 81fbaff9fcab8f9aae8849e2767d850d630dc56e Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:58:21 +0200 Subject: [PATCH 26/32] upgrade go ci image Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 690122142a..b23350442b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2.1 jobs: build: docker: - - image: cimg/go:1.19 + - image: cimg/go:1.21 steps: - checkout - run: make vendor_jsonnet From 9eda9755c483dcf8413a8c09c78649386cae40a9 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:03:44 +0200 Subject: [PATCH 27/32] go mod tidy Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.sum b/go.sum index e5a1462d7a..a8b0fc4a61 100644 --- a/go.sum +++ b/go.sum @@ -1257,8 +1257,6 @@ github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= github.com/thanos-io/thanos v0.32.2 h1:W9vzOUdiIBKUc947IbTPAj4Lnv5r4MP8iXuUF1G1GkM= github.com/thanos-io/thanos v0.32.2/go.mod h1:zfpzKCtqaqCy1D9/6ksZfL+U+KKt8mkcX6v3btuDHgg= -github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910 h1:HKkk7aBcPd4542+YTo9bJKS+IYCoGJTM8eOt/tM8+2c= -github.com/thibaultmg/observatorium v0.0.0-20230922090626-0f7a82fd0910/go.mod h1:Bfegly2FCsugSsgbEQ+2wGW14Hj6pH7yaE+vQA/dw2M= github.com/thibaultmg/observatorium v0.0.0-20231002130052-3e0c7849c81f h1:bs2Q0quoMiEXncDLKSyzzj2K57zwE64aIebMLxWQ83s= github.com/thibaultmg/observatorium v0.0.0-20231002130052-3e0c7849c81f/go.mod h1:Bfegly2FCsugSsgbEQ+2wGW14Hj6pH7yaE+vQA/dw2M= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= From 7e42b2505439542f413fc8e8598275fc8536c7b9 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:11:47 +0200 Subject: [PATCH 28/32] upgrade go ci Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79bfbe472c..8196decb7e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v2 with: - go-version: 1.19 + go-version: 1.21 - name: Vendor run: make vendor_jsonnet - name: Build From 83888ae72a76e3297cc1f59d62e6ce1fe3fb0dfc Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:48:54 +0200 Subject: [PATCH 29/32] add template parameters for support Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...bservatorium-metrics-compact-template.yaml | 12 ++-- .../observatorium-metrics-store-template.yaml | 13 ++-- ...bservatorium-metrics-compact-template.yaml | 12 ++-- .../observatorium-metrics-store-template.yaml | 13 ++-- services_go/observatorium/metrics.go | 71 +++++++++++++++++-- services_go/observatorium/observatorium.go | 67 ++--------------- services_go/observatorium/yaml.go | 67 +++++++++++++++++ 7 files changed, 169 insertions(+), 86 deletions(-) create mode 100644 services_go/observatorium/yaml.go diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml index 325a133adf..3838675fa0 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml @@ -33,8 +33,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - status: - loadBalancer: {} - apiVersion: v1 kind: ServiceAccount metadata: @@ -92,7 +90,7 @@ objects: name: observatorium-thanos-compact namespace: rhobs spec: - replicas: 1 + replicas: ${THANOS_REPLICAS} selector: matchLabels: app.kubernetes.io/component: database-compactor @@ -136,7 +134,7 @@ objects: - --delete-delay=24h0m0s - --downsample.concurrency=1 - --log.format=logfmt - - --log.level=info + - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - --retention.resolution-1h=8760h0m0s - --retention.resolution-5m=8760h0m0s @@ -322,3 +320,9 @@ parameters: - from: '[a-zA-Z0-9]{40}' generate: expression name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: info +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml index 402581e8dc..74804381ea 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml @@ -113,8 +113,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - status: - loadBalancer: {} - apiVersion: v1 kind: ServiceAccount metadata: @@ -172,7 +170,7 @@ objects: name: observatorium-thanos-store namespace: rhobs spec: - replicas: 2 + replicas: ${THANOS_REPLICAS} selector: matchLabels: app.kubernetes.io/component: object-store-gateway @@ -213,7 +211,7 @@ objects: - --data-dir=/var/thanos/store - --ignore-deletion-marks-delay=24h0m0s - --log.format=logfmt - - --log.level=info + - --log.level=${THANOS_LOG_LEVEL} - --max-time=-22h0m0s - --objstore.config=$(OBJSTORE_CONFIG) - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml @@ -383,3 +381,10 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: info +- name: THANOS_REPLICAS + required: true + value: "2" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml index 2b297a8805..564e1516b2 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml @@ -33,8 +33,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - status: - loadBalancer: {} - apiVersion: v1 kind: ServiceAccount metadata: @@ -92,7 +90,7 @@ objects: name: observatorium-thanos-compact namespace: rhobs spec: - replicas: 1 + replicas: ${THANOS_REPLICAS} selector: matchLabels: app.kubernetes.io/component: database-compactor @@ -136,7 +134,7 @@ objects: - --delete-delay=24h0m0s - --downsample.concurrency=1 - --log.format=logfmt - - --log.level=warn + - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - --retention.resolution-1h=8760h0m0s - --retention.resolution-5m=8760h0m0s @@ -322,3 +320,9 @@ parameters: - from: '[a-zA-Z0-9]{40}' generate: expression name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml index ab4d481da6..a3611e9bf7 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml @@ -113,8 +113,6 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - status: - loadBalancer: {} - apiVersion: v1 kind: ServiceAccount metadata: @@ -172,7 +170,7 @@ objects: name: observatorium-thanos-store namespace: rhobs spec: - replicas: 3 + replicas: ${THANOS_REPLICAS} selector: matchLabels: app.kubernetes.io/component: object-store-gateway @@ -213,7 +211,7 @@ objects: - --data-dir=/var/thanos/store - --ignore-deletion-marks-delay=24h0m0s - --log.format=logfmt - - --log.level=warn + - --log.level=${THANOS_LOG_LEVEL} - --max-time=-22h0m0s - --objstore.config=$(OBJSTORE_CONFIG) - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml @@ -383,3 +381,10 @@ objects: storage: 500Gi storageClassName: gp2 status: {} +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "3" diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 4f516c64d8..914c082c3b 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -6,15 +6,18 @@ import ( "maps" "time" + "github.com/bwplotka/mimic/encoding" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" "github.com/observatorium/observatorium/configuration_go/k8sutil" + "github.com/observatorium/observatorium/configuration_go/openshift" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore" objstore3 "github.com/observatorium/observatorium/configuration_go/schemas/thanos/objstore/s3" trclient "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/client" "github.com/observatorium/observatorium/configuration_go/schemas/thanos/tracing/jaeger" routev1 "github.com/openshift/api/route/v1" + templatev1 "github.com/openshift/api/template/v1" monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "gopkg.in/yaml.v3" appsv1 "k8s.io/api/apps/v1" @@ -37,7 +40,7 @@ const ( var storeAutoShardRelabelConfigMap string // makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. -func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) k8sutil.ObjectMap { +func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) encoding.Encoder { // K8s config compactorSatefulset := compactor.NewCompactor() compactorSatefulset.Image = thanosImage @@ -57,7 +60,6 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} // Compactor config - compactorSatefulset.Options.LogLevel = "warn" compactorSatefulset.Options.RetentionResolutionRaw = 365 * 24 * time.Hour compactorSatefulset.Options.RetentionResolution5m = 365 * 24 * time.Hour compactorSatefulset.Options.RetentionResolution1h = 365 * 24 * time.Hour @@ -67,10 +69,14 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp compactorSatefulset.Options.DeduplicationReplicaLabel = "replica" compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") - // Post process + // Execute preManifestsHook if preManifestsHook != nil { preManifestsHook(compactorSatefulset) } + logLevel := string(compactorSatefulset.Options.LogLevel) // capture final log level for use in template + compactorSatefulset.Options.LogLevel = "${THANOS_LOG_LEVEL}" + + // Post process manifests := compactorSatefulset.Manifests() service := getObject[*corev1.Service](manifests) service.ObjectMeta.Annotations[servingCertSecretNameAnnotation] = tlsSecret @@ -131,12 +137,38 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp }, } - return manifests + // Wrap in template + template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ + Name: "observatorium-metrics-compact", + }, []templatev1.Parameter{ + { + Name: "OAUTH_PROXY_COOKIE_SECRET", + Generate: "expression", + From: "[a-zA-Z0-9]{40}", + }, + { + Name: "THANOS_LOG_LEVEL", + Value: logLevel, + Required: true, + }, + { + Name: "THANOS_REPLICAS", + Value: fmt.Sprintf("%d", compactorSatefulset.Replicas), + Required: true, + }, + }) + + // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter + // As the replicas value is typed as an int, it cannot be replaced using the compactor config. + yamlDecoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} + yamlDecoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, compactorSatefulset.Replicas), "${1}$${THANOS_REPLICAS}") + + return &yamlDecoder } // makeStore creates a base store component that can be derived from using the preManifestsHook. -func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) k8sutil.ObjectMap { +func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) encoding.Encoder { // K8s config storeStatefulSet := store.NewStore() storeStatefulSet.Image = thanosImage @@ -207,10 +239,14 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto } storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. - // Post process + // Execute preManifestHook if preManifestHook != nil { preManifestHook(storeStatefulSet) } + logLevel := string(storeStatefulSet.Options.LogLevel) // capture final log level for use in template + storeStatefulSet.Options.LogLevel = "${THANOS_LOG_LEVEL}" + + // Post process manifests := storeStatefulSet.Manifests() postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests), storeStatefulSet.Namespace) statefulset := getObject[*appsv1.StatefulSet](manifests) @@ -307,7 +343,28 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto }, } - return manifests + // Wrap in template + template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ + Name: "observatorium-metrics-store", + }, []templatev1.Parameter{ + { + Name: "THANOS_LOG_LEVEL", + Value: logLevel, + Required: true, + }, + { + Name: "THANOS_REPLICAS", + Value: fmt.Sprintf("%d", storeStatefulSet.Replicas), + Required: true, + }, + }) + + yamlDecoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} + // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter + // As the replicas value is typed as an int, it cannot be replaced using the compactor config. + yamlDecoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, storeStatefulSet.Replicas), "${1}$${THANOS_REPLICAS}") + + return &yamlDecoder } type kubeObject interface { diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index 6b83b350b3..f7277aa439 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -3,19 +3,11 @@ package observatorium // import "github.com/rhobs/configuration/services_go/components/thanos/compactor" import ( - "bytes" - "io" - "regexp" - "github.com/bwplotka/mimic" "github.com/bwplotka/mimic/encoding" "github.com/observatorium/api/rbac" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" - "github.com/observatorium/observatorium/configuration_go/k8sutil" - "github.com/observatorium/observatorium/configuration_go/openshift" - templatev1 "github.com/openshift/api/template/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // TenantInstanceConfiguration is the configuration for a single tenant in an instance of observatorium. @@ -61,64 +53,13 @@ func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { func (o *Observatorium) Manifests(generator *mimic.Generator) { components := []struct { name string - objects k8sutil.ObjectMap - params []templatev1.Parameter + encoder encoding.Encoder }{ - {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.Compactor), []templatev1.Parameter{ - { - Name: "OAUTH_PROXY_COOKIE_SECRET", - Generate: "expression", - From: "[a-zA-Z0-9]{40}", - }, - }}, - {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.ThanosStore), []templatev1.Parameter{}}, + {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.Compactor)}, + {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.ThanosStore)}, } for _, component := range components { - template := openshift.WrapInTemplate("", component.objects, metav1.ObjectMeta{ - Name: component.name, - }, component.params) - generator.With(o.cfg.Cluster, o.cfg.Instance).Add(component.name+"-template.yaml", &customYAML{encoder: encoding.GhodssYAML(template[""])}) + generator.With(o.cfg.Cluster, o.cfg.Instance).Add(component.name+"-template.yaml", &statusRemoveEncoder{encoder: component.encoder}) } } - -// customYAML is a YAML encoder wrapper that allows cleaning of the output. -// Wihtout this, the manifests would contain a status section that is not needed. -type customYAML struct { - encoder encoding.Encoder - reader io.Reader -} - -func (c *customYAML) Read(p []byte) (n int, err error) { - if c.reader == nil { - ret, err := io.ReadAll(c.encoder) - if err != nil { - panic(err) - } - - c.reader = bytes.NewBuffer(c.clean(ret)) - } - - return c.reader.Read(p) -} - -func (c *customYAML) EncodeComment(lines string) []byte { - return c.encoder.EncodeComment(lines) -} - -func (c *customYAML) clean(input []byte) []byte { - // Remove status section from manifests - re := []*regexp.Regexp{ - regexp.MustCompile(`\s*status:\n\s*availableReplicas: 0\n\s*replicas: 0`), - regexp.MustCompile(`\s*status:\n\s*currentHealthy: 0\n\s*desiredHealthy: 0\n\s*disruptionsAllowed: 0\n\s*expectedPods: 0`), - regexp.MustCompile(`\s*status:\n\s*ingress: null`), - } - - ret := input - - for _, r := range re { - ret = r.ReplaceAll(ret, []byte{}) - } - - return []byte(ret) -} diff --git a/services_go/observatorium/yaml.go b/services_go/observatorium/yaml.go new file mode 100644 index 0000000000..967c2ac60d --- /dev/null +++ b/services_go/observatorium/yaml.go @@ -0,0 +1,67 @@ +package observatorium + +import ( + "bytes" + "io" + "regexp" + + "github.com/bwplotka/mimic/encoding" +) + +// statusRemoveEncoder is a YAML encoder wrapper that allows cleaning of the output. +// Wihtout this, the manifests would contain a status section that is not needed. +type statusRemoveEncoder struct { + encoder encoding.Encoder + reader io.Reader +} + +func (c *statusRemoveEncoder) Read(p []byte) (n int, err error) { + if c.reader == nil { + ret, err := io.ReadAll(c.encoder) + if err != nil { + panic(err) + } + + // Remove status sections from manifests + c.reader = bytes.NewBuffer(regexp.MustCompile(`(?m)^( {2})status:\n( {4}.*\n)+`).ReplaceAll(ret, []byte{})) + } + + return c.reader.Read(p) +} + +func (c *statusRemoveEncoder) EncodeComment(lines string) []byte { + return c.encoder.EncodeComment(lines) +} + +// templateYAML is a YAML encoder wrapper that allows templating of the output. +// This is used when the target value is not typed as a string in Go. +type templateYAML struct { + encoder encoding.Encoder + reader io.Reader + replacements [][]string // regexp, replace tuples +} + +func (c *templateYAML) Read(p []byte) (n int, err error) { + if c.reader == nil { + ret, err := io.ReadAll(c.encoder) + if err != nil { + panic(err) + } + + for _, r := range c.replacements { + ret = regexp.MustCompile(r[0]).ReplaceAll(ret, []byte(r[1])) + } + + c.reader = bytes.NewBuffer(ret) + } + + return c.reader.Read(p) +} + +func (c *templateYAML) EncodeComment(lines string) []byte { + return c.encoder.EncodeComment(lines) +} + +func (c *templateYAML) AddReplacement(reg, replace string) { + c.replacements = append(c.replacements, []string{reg, replace}) +} From 8ceeca844d1ed9618dd13446dfc669e9d8be8d62 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 4 Oct 2023 10:29:58 +0200 Subject: [PATCH 30/32] multi tenant observatorium support Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...atorium-metrics-compact-rhel-template.yaml | 339 +++++++++++++++ ...orium-metrics-compact-shared-template.yaml | 339 +++++++++++++++ ...m-metrics-compact-telemeter-template.yaml} | 53 ++- ...rvatorium-metrics-store-rhel-template.yaml | 403 ++++++++++++++++++ ...atorium-metrics-store-shared-template.yaml | 403 ++++++++++++++++++ ...ium-metrics-store-telemeter-template.yaml} | 59 ++- ...atorium-metrics-compact-rhel-template.yaml | 339 +++++++++++++++ ...orium-metrics-compact-shared-template.yaml | 339 +++++++++++++++ ...m-metrics-compact-telemeter-template.yaml} | 55 ++- ...rvatorium-metrics-store-rhel-template.yaml | 403 ++++++++++++++++++ ...atorium-metrics-store-shared-template.yaml | 403 ++++++++++++++++++ ...ium-metrics-store-telemeter-template.yaml} | 61 +-- services_go/instances/rhobs/rhobs.go | 71 ++- .../observatorium/{yaml.go => encoders.go} | 12 +- services_go/observatorium/metrics.go | 58 +-- services_go/observatorium/observatorium.go | 70 ++- services_go/services.go | 6 +- 17 files changed, 3231 insertions(+), 182 deletions(-) create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml rename resources/services/{telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml => app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml} (87%) create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml rename resources/services/{telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml => app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml} (88%) create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml rename resources/services/{app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml => telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml} (87%) create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml rename resources/services/{app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml => telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml} (88%) rename services_go/observatorium/{yaml.go => encoders.go} (76%) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml new file mode 100755 index 0000000000..67b2707d93 --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -0,0 +1,339 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-rhel +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-rhel + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + prometheus: app-sre + name: observatorium-thanos-compact-rhel + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + serviceName: observatorium-thanos-compact-rhel + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhel-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhel-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: rhel-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: rhel-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: rhel-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-rhel + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-rhel + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-rhel + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-rhel + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml new file mode 100755 index 0000000000..a959281af5 --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml @@ -0,0 +1,339 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-shared +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-shared + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + prometheus: app-sre + name: observatorium-thanos-compact-shared + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + serviceName: observatorium-thanos-compact-shared + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: shared-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: shared-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: shared-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: shared-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: shared-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-shared + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-shared + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-shared + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-shared + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml similarity index 87% rename from resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml rename to resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 564e1516b2..3af907cf3b 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -2,21 +2,22 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-metrics-compact + name: observatorium-thanos-compact-telemeter objects: - apiVersion: v1 kind: Service metadata: annotations: - service.alpha.openshift.io/serving-cert-secret-name: compact-tls + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-telemeter creationTimestamp: null labels: app.kubernetes.io/component: database-compactor app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: ports: @@ -33,6 +34,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: ServiceAccount metadata: @@ -42,8 +44,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -54,9 +57,10 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-compact + name: observatorium-thanos-compact-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -77,6 +81,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: apps/v1 kind: StatefulSet metadata: @@ -86,8 +91,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: replicas: ${THANOS_REPLICAS} @@ -97,7 +103,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - serviceName: observatorium-thanos-compact + observatorium/tenant: telemeter + serviceName: observatorium-thanos-compact-telemeter template: metadata: creationTimestamp: null @@ -106,7 +113,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter namespace: rhobs spec: affinity: @@ -178,7 +186,7 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - image: quay.io/thanos/thanos:v0.32.3 + image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 4 @@ -213,7 +221,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-compact + - -openshift-service-account=observatorium-thanos-compact-telemeter - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -247,12 +255,12 @@ objects: securityContext: fsGroup: 65534 runAsUser: 65534 - serviceAccountName: observatorium-thanos-compact + serviceAccountName: observatorium-thanos-compact-telemeter terminationGracePeriodSeconds: 120 volumes: - name: compact-tls secret: - secretName: compact-tls + secretName: compact-tls-telemeter updateStrategy: {} volumeClaimTemplates: - metadata: @@ -262,7 +270,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter name: data spec: accessModes: @@ -271,7 +280,6 @@ objects: requests: storage: 500Gi storageClassName: gp2 - status: {} - apiVersion: route.openshift.io/v1 kind: Route metadata: @@ -284,7 +292,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-compact + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: host: "" @@ -295,7 +304,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-compact + name: observatorium-thanos-compact-telemeter weight: null - apiVersion: policy/v1 kind: PodDisruptionBudget @@ -306,7 +315,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-compact + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: maxUnavailable: 1 @@ -316,6 +326,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter parameters: - from: '[a-zA-Z0-9]{40}' generate: expression diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml new file mode 100755 index 0000000000..25c60a4d28 --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml @@ -0,0 +1,403 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-rhel +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: list-pods-rhel + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: list-pods-rhel + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-rhel + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-rhel + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: hashmod-config-template-rhel + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + prometheus: app-sre + name: observatorium-thanos-store-rhel + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + serviceName: observatorium-thanos-store-rhel + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhel-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhel-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: rhel-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: rhel-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: rhel-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 80Gi + requests: + cpu: "4" + memory: 20Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-rhel + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml new file mode 100755 index 0000000000..94e588032a --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml @@ -0,0 +1,403 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-shared +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: list-pods-shared + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: list-pods-shared + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-shared + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-shared + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: hashmod-config-template-shared + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + prometheus: app-sre + name: observatorium-thanos-store-shared + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + serviceName: observatorium-thanos-store-shared + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: shared-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: shared-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: shared-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: shared-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: shared-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 80Gi + requests: + cpu: "4" + memory: 20Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-shared + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml similarity index 88% rename from resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml rename to resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml index a3611e9bf7..d25a78ab55 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-metrics-store + name: observatorium-thanos-store-telemeter objects: - apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -13,7 +13,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: list-pods + observatorium/tenant: telemeter + name: list-pods-telemeter namespace: rhobs rules: - apiGroups: @@ -32,15 +33,16 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: list-pods + observatorium/tenant: telemeter + name: list-pods-telemeter namespace: rhobs roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: list-pods + name: list-pods-telemeter subjects: - kind: ServiceAccount - name: observatorium-thanos-store + name: observatorium-thanos-store-telemeter namespace: rhobs - apiVersion: v1 data: @@ -64,8 +66,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: hashmod-config-template + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: hashmod-config-template-telemeter namespace: rhobs - apiVersion: policy/v1 kind: PodDisruptionBudget @@ -76,7 +79,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-store + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: maxUnavailable: 1 @@ -86,6 +90,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -95,8 +100,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: ports: @@ -113,6 +119,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: ServiceAccount metadata: @@ -122,8 +129,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -134,9 +142,10 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store + name: observatorium-thanos-store-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -157,6 +166,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: apps/v1 kind: StatefulSet metadata: @@ -166,8 +176,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: replicas: ${THANOS_REPLICAS} @@ -177,7 +188,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - serviceName: observatorium-thanos-store + observatorium/tenant: telemeter + serviceName: observatorium-thanos-store-telemeter template: metadata: creationTimestamp: null @@ -186,7 +198,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter namespace: rhobs spec: affinity: @@ -253,7 +266,7 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - image: quay.io/thanos/thanos:v0.32.3 + image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 8 @@ -353,7 +366,7 @@ objects: securityContext: fsGroup: 65534 runAsUser: 65534 - serviceAccountName: observatorium-thanos-store + serviceAccountName: observatorium-thanos-store-telemeter terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} @@ -371,7 +384,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter name: data spec: accessModes: @@ -380,11 +394,10 @@ objects: requests: storage: 500Gi storageClassName: gp2 - status: {} parameters: - name: THANOS_LOG_LEVEL required: true value: warn - name: THANOS_REPLICAS required: true - value: "3" + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml new file mode 100755 index 0000000000..67b2707d93 --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -0,0 +1,339 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-rhel +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-rhel + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + prometheus: app-sre + name: observatorium-thanos-compact-rhel + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + serviceName: observatorium-thanos-compact-rhel + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhel-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhel-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: rhel-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: rhel-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: rhel-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-rhel + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-rhel + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-rhel + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-rhel + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-compact-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml new file mode 100755 index 0000000000..a959281af5 --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml @@ -0,0 +1,339 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-shared +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-shared + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + prometheus: app-sre + name: observatorium-thanos-compact-shared + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + serviceName: observatorium-thanos-compact-shared + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --retention.resolution-1h=8760h0m0s + - --retention.resolution-5m=8760h0m0s + - --retention.resolution-raw=8760h0m0s + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: shared-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: shared-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: shared-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: shared-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: shared-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 5Gi + requests: + cpu: 200m + memory: 1Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-shared + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-shared + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-shared + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-shared + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-compact-shared + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +parameters: +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml similarity index 87% rename from resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml rename to resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 3838675fa0..3af907cf3b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -2,21 +2,22 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-metrics-compact + name: observatorium-thanos-compact-telemeter objects: - apiVersion: v1 kind: Service metadata: annotations: - service.alpha.openshift.io/serving-cert-secret-name: compact-tls + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-telemeter creationTimestamp: null labels: app.kubernetes.io/component: database-compactor app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: ports: @@ -33,6 +34,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: ServiceAccount metadata: @@ -42,8 +44,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -54,9 +57,10 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-compact + name: observatorium-thanos-compact-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -77,6 +81,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: apps/v1 kind: StatefulSet metadata: @@ -86,8 +91,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-compact + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: replicas: ${THANOS_REPLICAS} @@ -97,7 +103,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - serviceName: observatorium-thanos-compact + observatorium/tenant: telemeter + serviceName: observatorium-thanos-compact-telemeter template: metadata: creationTimestamp: null @@ -106,7 +113,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter namespace: rhobs spec: affinity: @@ -178,7 +186,7 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - image: quay.io/thanos/thanos:v0.32.3 + image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 4 @@ -213,7 +221,7 @@ objects: - -http-address= - -email-domain=* - -upstream=http://localhost:10902 - - -openshift-service-account=observatorium-thanos-compact + - -openshift-service-account=observatorium-thanos-compact-telemeter - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", "namespace": "rhobs"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", @@ -247,12 +255,12 @@ objects: securityContext: fsGroup: 65534 runAsUser: 65534 - serviceAccountName: observatorium-thanos-compact + serviceAccountName: observatorium-thanos-compact-telemeter terminationGracePeriodSeconds: 120 volumes: - name: compact-tls secret: - secretName: compact-tls + secretName: compact-tls-telemeter updateStrategy: {} volumeClaimTemplates: - metadata: @@ -262,7 +270,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter name: data spec: accessModes: @@ -271,7 +280,6 @@ objects: requests: storage: 500Gi storageClassName: gp2 - status: {} - apiVersion: route.openshift.io/v1 kind: Route metadata: @@ -284,7 +292,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-compact + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: host: "" @@ -295,7 +304,7 @@ objects: termination: reencrypt to: kind: Service - name: observatorium-thanos-compact + name: observatorium-thanos-compact-telemeter weight: null - apiVersion: policy/v1 kind: PodDisruptionBudget @@ -306,7 +315,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-compact + observatorium/tenant: telemeter + name: observatorium-thanos-compact-telemeter namespace: rhobs spec: maxUnavailable: 1 @@ -316,13 +326,14 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-compact app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter parameters: - from: '[a-zA-Z0-9]{40}' generate: expression name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL required: true - value: info + value: warn - name: THANOS_REPLICAS required: true value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml new file mode 100755 index 0000000000..25c60a4d28 --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml @@ -0,0 +1,403 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-rhel +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: list-pods-rhel + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: list-pods-rhel + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-rhel + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-rhel + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: hashmod-config-template-rhel + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + prometheus: app-sre + name: observatorium-thanos-store-rhel + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: observatorium-thanos-store-rhel + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: rhel + serviceName: observatorium-thanos-store-rhel + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: rhel-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: rhel-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: rhel-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: rhel-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: rhel-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 80Gi + requests: + cpu: "4" + memory: 20Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-rhel + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: rhel + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml new file mode 100755 index 0000000000..94e588032a --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml @@ -0,0 +1,403 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-shared +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: list-pods-shared + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: list-pods-shared + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-shared + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-shared + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: hashmod-config-template-shared + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + prometheus: app-sre + name: observatorium-thanos-store-shared + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: observatorium-thanos-store-shared + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: shared + serviceName: observatorium-thanos-store-shared + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: shared-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: shared-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: shared-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: shared-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: shared-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: 80Gi + requests: + cpu: "4" + memory: 20Gi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-shared + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: shared + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + required: true + value: warn +- name: THANOS_REPLICAS + required: true + value: "1" diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml similarity index 88% rename from resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml rename to resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml index 74804381ea..d25a78ab55 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml @@ -2,7 +2,7 @@ apiVersion: template.openshift.io/v1 kind: Template metadata: creationTimestamp: null - name: observatorium-metrics-store + name: observatorium-thanos-store-telemeter objects: - apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -13,7 +13,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: list-pods + observatorium/tenant: telemeter + name: list-pods-telemeter namespace: rhobs rules: - apiGroups: @@ -32,15 +33,16 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: list-pods + observatorium/tenant: telemeter + name: list-pods-telemeter namespace: rhobs roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: list-pods + name: list-pods-telemeter subjects: - kind: ServiceAccount - name: observatorium-thanos-store + name: observatorium-thanos-store-telemeter namespace: rhobs - apiVersion: v1 data: @@ -64,8 +66,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: hashmod-config-template + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: hashmod-config-template-telemeter namespace: rhobs - apiVersion: policy/v1 kind: PodDisruptionBudget @@ -76,7 +79,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - name: observatorium-thanos-store + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: maxUnavailable: 1 @@ -86,6 +90,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: Service metadata: @@ -95,8 +100,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: ports: @@ -113,6 +119,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: v1 kind: ServiceAccount metadata: @@ -122,8 +129,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs - apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -134,9 +142,10 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter prometheus: app-sre - name: observatorium-thanos-store + name: observatorium-thanos-store-telemeter namespace: openshift-customer-monitoring spec: endpoints: @@ -157,6 +166,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium + observatorium/tenant: telemeter - apiVersion: apps/v1 kind: StatefulSet metadata: @@ -166,8 +176,9 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 - name: observatorium-thanos-store + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter + name: observatorium-thanos-store-telemeter namespace: rhobs spec: replicas: ${THANOS_REPLICAS} @@ -177,7 +188,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - serviceName: observatorium-thanos-store + observatorium/tenant: telemeter + serviceName: observatorium-thanos-store-telemeter template: metadata: creationTimestamp: null @@ -186,7 +198,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter namespace: rhobs spec: affinity: @@ -253,7 +266,7 @@ objects: bucket: $(OBJ_STORE_BUCKET) endpoint: $(OBJ_STORE_ENDPOINT) region: $(OBJ_STORE_REGION) - image: quay.io/thanos/thanos:v0.32.3 + image: quay.io/thanos/thanos:v0.32.4 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 8 @@ -353,7 +366,7 @@ objects: securityContext: fsGroup: 65534 runAsUser: 65534 - serviceAccountName: observatorium-thanos-store + serviceAccountName: observatorium-thanos-store-telemeter terminationGracePeriodSeconds: 120 volumes: - emptyDir: {} @@ -371,7 +384,8 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: thanos-store app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: v0.32.3 + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: telemeter name: data spec: accessModes: @@ -380,11 +394,10 @@ objects: requests: storage: 500Gi storageClassName: gp2 - status: {} parameters: - name: THANOS_LOG_LEVEL required: true - value: info + value: warn - name: THANOS_REPLICAS required: true - value: "2" + value: "1" diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index bb9a631ceb..2173cb2c7e 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -3,25 +3,42 @@ package rhobs import ( "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" - "github.com/observatorium/observatorium/configuration_go/schemas/thanos/common" "github.com/rhobs/configuration/services_go/observatorium" ) -func ClusterConfigs() []observatorium.InstanceConfiguration { - return []observatorium.InstanceConfiguration{ +func ClusterConfigs() []observatorium.Observatorium { + return []observatorium.Observatorium{ { Cluster: "app-sre-stage-01", Namespace: "rhobs", Instance: "rhobs", - ObjStoreSecret: "telemeter-tenant-s3", - Tenants: []observatorium.TenantInstanceConfiguration{}, - PreManifestsHooks: observatorium.PreManifestsHooks{ - ThanosStore: func(store *store.StoreStatefulSet) { - store.Replicas = 2 - store.Options.LogLevel = common.LogLevelInfo + ThanosImageTag: "v0.32.4", + Stores: []observatorium.ThanosTenantConfig[store.StoreStatefulSet]{ + { + Tenant: "shared", + ObjStoreSecret: "shared-tenant-s3", }, - Compactor: func(compactor *compactor.CompactorStatefulSet) { - compactor.Options.LogLevel = common.LogLevelInfo + { + Tenant: "rhel", + ObjStoreSecret: "rhel-tenant-s3", + }, + { + Tenant: "telemeter", + ObjStoreSecret: "telemeter-tenant-s3", + }, + }, + Compactors: []observatorium.ThanosTenantConfig[compactor.CompactorStatefulSet]{ + { + Tenant: "shared", + ObjStoreSecret: "shared-tenant-s3", + }, + { + Tenant: "rhel", + ObjStoreSecret: "rhel-tenant-s3", + }, + { + Tenant: "telemeter", + ObjStoreSecret: "telemeter-tenant-s3", }, }, }, @@ -29,11 +46,33 @@ func ClusterConfigs() []observatorium.InstanceConfiguration { Cluster: "telemeter-prod-01", Namespace: "rhobs", Instance: "rhobs", - ObjStoreSecret: "telemeter-tenant-s3", - Tenants: []observatorium.TenantInstanceConfiguration{}, - PreManifestsHooks: observatorium.PreManifestsHooks{ - ThanosStore: func(store *store.StoreStatefulSet) { - store.Replicas = 3 + ThanosImageTag: "v0.32.4", + Stores: []observatorium.ThanosTenantConfig[store.StoreStatefulSet]{ + { + Tenant: "shared", + ObjStoreSecret: "shared-tenant-s3", + }, + { + Tenant: "rhel", + ObjStoreSecret: "rhel-tenant-s3", + }, + { + Tenant: "telemeter", + ObjStoreSecret: "telemeter-tenant-s3", + }, + }, + Compactors: []observatorium.ThanosTenantConfig[compactor.CompactorStatefulSet]{ + { + Tenant: "shared", + ObjStoreSecret: "shared-tenant-s3", + }, + { + Tenant: "rhel", + ObjStoreSecret: "rhel-tenant-s3", + }, + { + Tenant: "telemeter", + ObjStoreSecret: "telemeter-tenant-s3", }, }, }, diff --git a/services_go/observatorium/yaml.go b/services_go/observatorium/encoders.go similarity index 76% rename from services_go/observatorium/yaml.go rename to services_go/observatorium/encoders.go index 967c2ac60d..1c349fd233 100644 --- a/services_go/observatorium/yaml.go +++ b/services_go/observatorium/encoders.go @@ -17,13 +17,15 @@ type statusRemoveEncoder struct { func (c *statusRemoveEncoder) Read(p []byte) (n int, err error) { if c.reader == nil { - ret, err := io.ReadAll(c.encoder) + yamlData, err := io.ReadAll(c.encoder) if err != nil { panic(err) } // Remove status sections from manifests - c.reader = bytes.NewBuffer(regexp.MustCompile(`(?m)^( {2})status:\n( {4}.*\n)+`).ReplaceAll(ret, []byte{})) + yamlData = regexp.MustCompile(`(?m)^( {2})status:\n( {4}.*\n)+`).ReplaceAll(yamlData, []byte{}) + yamlData = regexp.MustCompile(`(?m)^ +status: \{\}\n`).ReplaceAll(yamlData, []byte{}) + c.reader = bytes.NewBuffer(yamlData) } return c.reader.Read(p) @@ -43,16 +45,16 @@ type templateYAML struct { func (c *templateYAML) Read(p []byte) (n int, err error) { if c.reader == nil { - ret, err := io.ReadAll(c.encoder) + yamlData, err := io.ReadAll(c.encoder) if err != nil { panic(err) } for _, r := range c.replacements { - ret = regexp.MustCompile(r[0]).ReplaceAll(ret, []byte(r[1])) + yamlData = regexp.MustCompile(r[0]).ReplaceAll(yamlData, []byte(r[1])) } - c.reader = bytes.NewBuffer(ret) + c.reader = bytes.NewBuffer(yamlData) } return c.reader.Read(p) diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 914c082c3b..8d3554d6bc 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -31,20 +31,22 @@ import ( const ( thanosImage = "quay.io/thanos/thanos" - thanosImageTag = "v0.32.3" monitoringNamespace = "openshift-customer-monitoring" servingCertSecretNameAnnotation = "service.alpha.openshift.io/serving-cert-secret-name" + tenantLabel = "observatorium/tenant" ) //go:embed assets/store-auto-shard-relabel-configMap.sh var storeAutoShardRelabelConfigMap string // makeCompactor creates a base compactor component that can be derived from using the preManifestsHook. -func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*compactor.CompactorStatefulSet)) encoding.Encoder { +func makeCompactor(namespace, imageTag string, cfg ThanosTenantConfig[compactor.CompactorStatefulSet]) encoding.Encoder { // K8s config compactorSatefulset := compactor.NewCompactor() + compactorSatefulset.Name = fmt.Sprintf("%s-%s", compactorSatefulset.Name, cfg.Tenant) + compactorSatefulset.CommonLabels[tenantLabel] = cfg.Tenant compactorSatefulset.Image = thanosImage - compactorSatefulset.ImageTag = thanosImageTag + compactorSatefulset.ImageTag = imageTag compactorSatefulset.Namespace = namespace compactorSatefulset.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} compactorSatefulset.Replicas = 1 @@ -55,8 +57,8 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp compactorSatefulset.VolumeType = "gp2" compactorSatefulset.VolumeSize = "500Gi" compactorSatefulset.Env = deleteObjStoreEnv(compactorSatefulset.Env) // delete the default objstore env vars - compactorSatefulset.Env = append(compactorSatefulset.Env, objStoreEnvVars(objstoreSecret)...) - tlsSecret := "compact-tls" + compactorSatefulset.Env = append(compactorSatefulset.Env, objStoreEnvVars(cfg.ObjStoreSecret)...) + tlsSecret := "compact-tls-" + cfg.Tenant compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} // Compactor config @@ -70,8 +72,8 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp compactorSatefulset.Options.AddExtraOpts("--debug.max-compaction-level=3") // Execute preManifestsHook - if preManifestsHook != nil { - preManifestsHook(compactorSatefulset) + if cfg.PreManifestsHook != nil { + cfg.PreManifestsHook(compactorSatefulset) } logLevel := string(compactorSatefulset.Options.LogLevel) // capture final log level for use in template compactorSatefulset.Options.LogLevel = "${THANOS_LOG_LEVEL}" @@ -139,7 +141,7 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp // Wrap in template template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ - Name: "observatorium-metrics-compact", + Name: compactorSatefulset.Name, }, []templatev1.Parameter{ { Name: "OAUTH_PROXY_COOKIE_SECRET", @@ -160,19 +162,21 @@ func makeCompactor(namespace, objstoreSecret string, preManifestsHook func(*comp // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter // As the replicas value is typed as an int, it cannot be replaced using the compactor config. - yamlDecoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} - yamlDecoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, compactorSatefulset.Replicas), "${1}$${THANOS_REPLICAS}") + yamlEncoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} + yamlEncoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, compactorSatefulset.Replicas), "${1}$${THANOS_REPLICAS}") - return &yamlDecoder + return &yamlEncoder } // makeStore creates a base store component that can be derived from using the preManifestsHook. -func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.StoreStatefulSet)) encoding.Encoder { +func makeStore(namespace, imageTag string, cfg ThanosTenantConfig[store.StoreStatefulSet]) encoding.Encoder { // K8s config storeStatefulSet := store.NewStore() + storeStatefulSet.Name = fmt.Sprintf("%s-%s", storeStatefulSet.Name, cfg.Tenant) + storeStatefulSet.CommonLabels[tenantLabel] = cfg.Tenant storeStatefulSet.Image = thanosImage - storeStatefulSet.ImageTag = thanosImageTag + storeStatefulSet.ImageTag = imageTag storeStatefulSet.Namespace = namespace storeStatefulSet.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm.Namespaces = []string{} storeStatefulSet.Replicas = 1 @@ -183,14 +187,14 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto storeStatefulSet.VolumeType = "gp2" storeStatefulSet.VolumeSize = "500Gi" storeStatefulSet.Env = deleteObjStoreEnv(storeStatefulSet.Env) // delete the default objstore env vars - storeStatefulSet.Env = append(storeStatefulSet.Env, objStoreEnvVars(objstoreSecret)...) + storeStatefulSet.Env = append(storeStatefulSet.Env, objStoreEnvVars(cfg.ObjStoreSecret)...) storeStatefulSet.Sidecars = []k8sutil.ContainerProvider{makeJaegerAgent("observatorium-tools")} // Store auto-sharding using a configMap and an initContainer // The configMap contains a script that will be executed by the initContainer // The script generates the relabeling config based on the replica ordinal and the number of replicas // The relabeling config is then written to a volume shared with the store container - storeStatefulSet.ConfigMaps["hashmod-config-template"] = map[string]string{ + storeStatefulSet.ConfigMaps[fmt.Sprintf("hashmod-config-template-%s", cfg.Tenant)] = map[string]string{ "entrypoint.sh": storeAutoShardRelabelConfigMap, } initContainer := corev1.Container{ @@ -240,8 +244,8 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto storeStatefulSet.Options.StoreEnableIndexHeaderLazyReader = true // Enables parallel rolling update of store nodes. // Execute preManifestHook - if preManifestHook != nil { - preManifestHook(storeStatefulSet) + if cfg.PreManifestsHook != nil { + cfg.PreManifestsHook(storeStatefulSet) } logLevel := string(storeStatefulSet.Options.LogLevel) // capture final log level for use in template storeStatefulSet.Options.LogLevel = "${THANOS_LOG_LEVEL}" @@ -251,6 +255,7 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto postProcessServiceMonitor(getObject[*monv1.ServiceMonitor](manifests), storeStatefulSet.Namespace) statefulset := getObject[*appsv1.StatefulSet](manifests) defaultMode := int32(0777) + // Add volumes and volume mounts for the initContainer statefulset.Spec.Template.Spec.Volumes = append(statefulset.Spec.Template.Spec.Volumes, corev1.Volume{ Name: "hashmod-config", VolumeSource: corev1.VolumeSource{ @@ -277,13 +282,13 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto // add rbac for reading the number of replicas from the statefulset in the initContainer labels := maps.Clone(statefulset.ObjectMeta.Labels) delete(labels, k8sutil.VersionLabel) - manifests["list-pods-rbac"] = &rbacv1.Role{ + listPodsRole := &rbacv1.Role{ TypeMeta: metav1.TypeMeta{ Kind: "Role", APIVersion: rbacv1.SchemeGroupVersion.String(), }, ObjectMeta: metav1.ObjectMeta{ - Name: "list-pods", + Name: fmt.Sprintf("list-pods-%s", cfg.Tenant), Namespace: namespace, Labels: labels, }, @@ -295,13 +300,16 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto }, }, } + + manifests["list-pods-rbac"] = listPodsRole + manifests["list-pods-rbac-binding"] = &rbacv1.RoleBinding{ TypeMeta: metav1.TypeMeta{ Kind: "RoleBinding", APIVersion: rbacv1.SchemeGroupVersion.String(), }, ObjectMeta: metav1.ObjectMeta{ - Name: "list-pods", + Name: fmt.Sprintf("list-pods-%s", cfg.Tenant), Namespace: namespace, Labels: labels, }, @@ -315,7 +323,7 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto }, RoleRef: rbacv1.RoleRef{ Kind: "Role", - Name: "list-pods", + Name: listPodsRole.Name, APIGroup: "rbac.authorization.k8s.io", }, } @@ -345,7 +353,7 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto // Wrap in template template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ - Name: "observatorium-metrics-store", + Name: storeStatefulSet.Name, }, []templatev1.Parameter{ { Name: "THANOS_LOG_LEVEL", @@ -359,12 +367,12 @@ func makeStore(namespace, objstoreSecret string, preManifestHook func(*store.Sto }, }) - yamlDecoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} + yamlEncoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter // As the replicas value is typed as an int, it cannot be replaced using the compactor config. - yamlDecoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, storeStatefulSet.Replicas), "${1}$${THANOS_REPLICAS}") + yamlEncoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, storeStatefulSet.Replicas), "${1}$${THANOS_REPLICAS}") - return &yamlDecoder + return &yamlEncoder } type kubeObject interface { diff --git a/services_go/observatorium/observatorium.go b/services_go/observatorium/observatorium.go index f7277aa439..255625533c 100644 --- a/services_go/observatorium/observatorium.go +++ b/services_go/observatorium/observatorium.go @@ -1,65 +1,51 @@ package observatorium -// import "github.com/rhobs/configuration/services_go/components/thanos/compactor" - import ( "github.com/bwplotka/mimic" "github.com/bwplotka/mimic/encoding" - "github.com/observatorium/api/rbac" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/compactor" "github.com/observatorium/observatorium/configuration_go/abstr/kubernetes/thanos/store" ) // TenantInstanceConfiguration is the configuration for a single tenant in an instance of observatorium. -type TenantInstanceConfiguration struct { - IngestRateLimit []struct{} - QueryRateLimit []struct{} - IngestHardTenant bool - Authorizers map[string]rbac.Authorizer - // Tenant *obs_api.tenant -} - -// PreManifestsHooks is a collection of hooks that can be used to modify the manifests before they are generated. -// This provides the instance configuration with the ability to customize each component deployed. -type PreManifestsHooks struct { - ThanosStore func(*store.StoreStatefulSet) - Compactor func(*compactor.CompactorStatefulSet) -} - -// InstanceConfiguration is the configuration for an instance of observatorium. -type InstanceConfiguration struct { - Cluster string - Instance string - Namespace string - ObjStoreSecret string - Tenants []TenantInstanceConfiguration - PreManifestsHooks PreManifestsHooks +// type TenantInstanceConfiguration struct { +// IngestRateLimit []struct{} +// QueryRateLimit []struct{} +// IngestHardTenant bool +// Authorizers map[string]rbac.Authorizer +// // Tenant *obs_api.tenant +// } + +type ThanosTenantConfig[T compactor.CompactorStatefulSet | store.StoreStatefulSet] struct { + Tenant string + ObjStoreSecret string + PreManifestsHook func(*T) } // Observatorium is an instance of observatorium. // It contains the configuration for the instance and the ability to generate the manifests for the instance. type Observatorium struct { - cfg *InstanceConfiguration -} - -// NewObservatorium creates a new instance of observatorium. -func NewObservatorium(cfg *InstanceConfiguration) *Observatorium { - return &Observatorium{ - cfg: cfg, - } + Cluster string + Instance string + Namespace string + ThanosImageTag string + Stores []ThanosTenantConfig[store.StoreStatefulSet] + Compactors []ThanosTenantConfig[compactor.CompactorStatefulSet] } // Manifests generates the manifests for the instance of observatorium. func (o *Observatorium) Manifests(generator *mimic.Generator) { - components := []struct { - name string - encoder encoding.Encoder - }{ - {"observatorium-metrics-compact", makeCompactor(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.Compactor)}, - {"observatorium-metrics-store", makeStore(o.cfg.Namespace, o.cfg.ObjStoreSecret, o.cfg.PreManifestsHooks.ThanosStore)}, + components := map[string]encoding.Encoder{} // filename -> yaml encoder + + for _, storeCfg := range o.Stores { + components["observatorium-metrics-store-"+storeCfg.Tenant] = makeStore(o.Namespace, o.ThanosImageTag, storeCfg) + } + + for _, compactorCfg := range o.Compactors { + components["observatorium-metrics-compact-"+compactorCfg.Tenant] = makeCompactor(o.Namespace, o.ThanosImageTag, compactorCfg) } - for _, component := range components { - generator.With(o.cfg.Cluster, o.cfg.Instance).Add(component.name+"-template.yaml", &statusRemoveEncoder{encoder: component.encoder}) + for name, encoder := range components { + generator.With(o.Cluster, o.Instance).Add(name+"-template.yaml", &statusRemoveEncoder{encoder: encoder}) } } diff --git a/services_go/services.go b/services_go/services.go index 13d06c9dd6..28990d7384 100644 --- a/services_go/services.go +++ b/services_go/services.go @@ -3,14 +3,12 @@ package services import ( "github.com/bwplotka/mimic" "github.com/rhobs/configuration/services_go/instances/rhobs" - "github.com/rhobs/configuration/services_go/observatorium" ) // Generate generates the manifests for all observatorium instances. func Generate(gen *mimic.Generator) { rhobsConfigs := rhobs.ClusterConfigs() - for _, cfg := range rhobsConfigs { - observatorium := observatorium.NewObservatorium(&cfg) - observatorium.Manifests(gen) + for _, obsCfg := range rhobsConfigs { + obsCfg.Manifests(gen) } } From 43b163dd10724cd713cc133a55aab9048e852cb4 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 4 Oct 2023 14:54:49 +0200 Subject: [PATCH 31/32] add resources in template params Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...atorium-metrics-compact-rhel-template.yaml | 20 ++-- ...orium-metrics-compact-shared-template.yaml | 20 ++-- ...um-metrics-compact-telemeter-template.yaml | 20 ++-- ...rvatorium-metrics-store-rhel-template.yaml | 14 ++- ...atorium-metrics-store-shared-template.yaml | 14 ++- ...rium-metrics-store-telemeter-template.yaml | 14 ++- ...atorium-metrics-compact-rhel-template.yaml | 20 ++-- ...orium-metrics-compact-shared-template.yaml | 20 ++-- ...um-metrics-compact-telemeter-template.yaml | 20 ++-- ...rvatorium-metrics-store-rhel-template.yaml | 14 ++- ...atorium-metrics-store-shared-template.yaml | 14 ++- ...rium-metrics-store-telemeter-template.yaml | 14 ++- services_go/observatorium/encoders.go | 16 +++ services_go/observatorium/metrics.go | 107 +++++++++++------- services_go/observatorium/sidecars.go | 1 + 15 files changed, 207 insertions(+), 121 deletions(-) diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml index 67b2707d93..1368cc857b 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml index a959281af5..88e0cb09fc 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-shared-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: shared parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 3af907cf3b..2e1c775e5f 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml index 25c60a4d28..cf67a69732 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-rhel-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml index 94e588032a..428c1b60d0 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-shared-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml index d25a78ab55..bb07caaab7 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-telemeter-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml index 67b2707d93..1368cc857b 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: rhel parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml index a959281af5..88e0cb09fc 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-shared-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: shared parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 3af907cf3b..2e1c775e5f 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -207,10 +207,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 5Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: 200m - memory: 1Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/compactor @@ -328,12 +328,16 @@ objects: app.kubernetes.io/part-of: observatorium observatorium/tenant: telemeter parameters: -- from: '[a-zA-Z0-9]{40}' - generate: expression - name: OAUTH_PROXY_COOKIE_SECRET - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml index 25c60a4d28..cf67a69732 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-rhel-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml index 94e588032a..428c1b60d0 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-shared-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml index d25a78ab55..bb07caaab7 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-telemeter-template.yaml @@ -291,10 +291,10 @@ objects: periodSeconds: 5 resources: limits: - memory: 80Gi + memory: ${THANOS_MEMORY_LIMIT} requests: - cpu: "4" - memory: 20Gi + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/thanos/store @@ -396,8 +396,12 @@ objects: storageClassName: gp2 parameters: - name: THANOS_LOG_LEVEL - required: true value: warn - name: THANOS_REPLICAS - required: true value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/services_go/observatorium/encoders.go b/services_go/observatorium/encoders.go index 1c349fd233..c6eb900e46 100644 --- a/services_go/observatorium/encoders.go +++ b/services_go/observatorium/encoders.go @@ -43,6 +43,22 @@ type templateYAML struct { replacements [][]string // regexp, replace tuples } +func NewDefaultTemplateYAML(encoder encoding.Encoder) *templateYAML { + return &templateYAML{ + encoder: encoder, + replacements: [][]string{ + // (?s) is a flag that allows . to match newlines + // .*? is a non-greedy match of any character + // these matchers assume that the main container (thanos) is the first container in the pod + {`(?s)(containers:\n.*?limits:.*?memory: )\S+`, "${1}$${THANOS_MEMORY_LIMIT}"}, // replace memory limit + {`(?s)(containers:\n.*?requests:.*?memory: )\S+`, "${1}$${THANOS_MEMORY_REQUEST}"}, // replace memory request + {`(?s)(containers:\n.*?limits:.*?cpu: )\S+`, "${1}$${THANOS_CPU_REQUEST}"}, // replace cpu request + {`(?s)(kind: (Deployment|StatefulSet).*?replicas: )\d+`, "${1}$${THANOS_REPLICAS}"}, // replace replicas + {`(?s)(containers:\n.*?\s+--log\.level=)\w+`, "${1}$${THANOS_LOG_LEVEL}"}, // replace thanos log level + }, + } +} + func (c *templateYAML) Read(p []byte) (n int, err error) { if c.reader == nil { yamlData, err := io.ReadAll(c.encoder) diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 8d3554d6bc..443f308e2e 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -75,8 +75,6 @@ func makeCompactor(namespace, imageTag string, cfg ThanosTenantConfig[compactor. if cfg.PreManifestsHook != nil { cfg.PreManifestsHook(compactorSatefulset) } - logLevel := string(compactorSatefulset.Options.LogLevel) // capture final log level for use in template - compactorSatefulset.Options.LogLevel = "${THANOS_LOG_LEVEL}" // Post process manifests := compactorSatefulset.Manifests() @@ -139,34 +137,26 @@ func makeCompactor(namespace, imageTag string, cfg ThanosTenantConfig[compactor. }, } - // Wrap in template + // Wrap in template, add parameters + defaultParams := defaultTemplateParams(defaultTemplateParamsConfig{ + LogLevel: string(compactorSatefulset.Options.LogLevel), + Replicas: compactorSatefulset.Replicas, + CPURequest: compactorSatefulset.PodResources.Requests[corev1.ResourceCPU], + MemoryLimit: compactorSatefulset.PodResources.Limits[corev1.ResourceMemory], + MemoryRequest: compactorSatefulset.PodResources.Requests[corev1.ResourceMemory], + }) template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ Name: compactorSatefulset.Name, - }, []templatev1.Parameter{ + }, append(defaultParams, []templatev1.Parameter{ { Name: "OAUTH_PROXY_COOKIE_SECRET", Generate: "expression", From: "[a-zA-Z0-9]{40}", }, - { - Name: "THANOS_LOG_LEVEL", - Value: logLevel, - Required: true, - }, - { - Name: "THANOS_REPLICAS", - Value: fmt.Sprintf("%d", compactorSatefulset.Replicas), - Required: true, - }, - }) - - // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter - // As the replicas value is typed as an int, it cannot be replaced using the compactor config. - yamlEncoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} - yamlEncoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, compactorSatefulset.Replicas), "${1}$${THANOS_REPLICAS}") - - return &yamlEncoder + }...)) + // Adding a special encoder wrapper to replace the templated values in the template with their corresponding template parameter. + return NewDefaultTemplateYAML(encoding.GhodssYAML(template[""])) } // makeStore creates a base store component that can be derived from using the preManifestsHook. @@ -247,8 +237,6 @@ func makeStore(namespace, imageTag string, cfg ThanosTenantConfig[store.StoreSta if cfg.PreManifestsHook != nil { cfg.PreManifestsHook(storeStatefulSet) } - logLevel := string(storeStatefulSet.Options.LogLevel) // capture final log level for use in template - storeStatefulSet.Options.LogLevel = "${THANOS_LOG_LEVEL}" // Post process manifests := storeStatefulSet.Manifests() @@ -354,31 +342,24 @@ func makeStore(namespace, imageTag string, cfg ThanosTenantConfig[store.StoreSta // Wrap in template template := openshift.WrapInTemplate("", manifests, metav1.ObjectMeta{ Name: storeStatefulSet.Name, - }, []templatev1.Parameter{ - { - Name: "THANOS_LOG_LEVEL", - Value: logLevel, - Required: true, - }, - { - Name: "THANOS_REPLICAS", - Value: fmt.Sprintf("%d", storeStatefulSet.Replicas), - Required: true, - }, - }) - - yamlEncoder := templateYAML{encoder: encoding.GhodssYAML(template[""])} - // Adding a special encoder wrapper to replace the replicas value in the template with a template parameter - // As the replicas value is typed as an int, it cannot be replaced using the compactor config. - yamlEncoder.AddReplacement(fmt.Sprintf(`(?m)^(\s*replicas: )%d$`, storeStatefulSet.Replicas), "${1}$${THANOS_REPLICAS}") - - return &yamlEncoder + }, defaultTemplateParams(defaultTemplateParamsConfig{ + LogLevel: string(storeStatefulSet.Options.LogLevel), + Replicas: storeStatefulSet.Replicas, + CPURequest: storeStatefulSet.PodResources.Requests[corev1.ResourceCPU], + MemoryLimit: storeStatefulSet.PodResources.Limits[corev1.ResourceMemory], + MemoryRequest: storeStatefulSet.PodResources.Requests[corev1.ResourceMemory], + })) + + // Adding a special encoder wrapper to replace the templated values in the template with their corresponding template parameter. + return NewDefaultTemplateYAML(encoding.GhodssYAML(template[""])) } type kubeObject interface { *corev1.Service | *appsv1.StatefulSet | *monv1.ServiceMonitor | *corev1.ServiceAccount } +// getObject returns the first object of type T from the given map of kubernetes objects. +// This helper can be used for doing post processing on the objects. func getObject[T kubeObject](manifests k8sutil.ObjectMap) T { for _, obj := range manifests { if service, ok := obj.(T); ok { @@ -389,12 +370,15 @@ func getObject[T kubeObject](manifests k8sutil.ObjectMap) T { panic(fmt.Sprintf("could not find object of type %T", *new(T))) } +// postProcessServiceMonitor updates the service monitor to work with the app-sre prometheus. func postProcessServiceMonitor(serviceMonitor *monv1.ServiceMonitor, namespaceSelector string) { serviceMonitor.ObjectMeta.Namespace = monitoringNamespace serviceMonitor.Spec.NamespaceSelector.MatchNames = []string{namespaceSelector} serviceMonitor.ObjectMeta.Labels["prometheus"] = "app-sre" } +// deleteObjStoreEnv deletes the objstore env var from the list of env vars. +// This env var is included by default by the observatorium config for each thanos component. func deleteObjStoreEnv(objStoreEnv []corev1.EnvVar) []corev1.EnvVar { for i, env := range objStoreEnv { if env.Name == "OBJSTORE_CONFIG" { @@ -405,6 +389,9 @@ func deleteObjStoreEnv(objStoreEnv []corev1.EnvVar) []corev1.EnvVar { return objStoreEnv } +// objStoreEnvVars returns the env vars required for the objstore config. +// Base env vars are taken from the s3 secret generated by app-interface. +// The objstore config env var is generated by aggregating the other env vars. func objStoreEnvVars(objstoreSecret string) []corev1.EnvVar { objStoreCfg, err := yaml.Marshal(objstore.BucketConfig{ Type: objstore.S3, @@ -430,3 +417,37 @@ func objStoreEnvVars(objstoreSecret string) []corev1.EnvVar { }, } } + +type defaultTemplateParamsConfig struct { + LogLevel string + Replicas int32 + CPURequest resource.Quantity + MemoryLimit resource.Quantity + MemoryRequest resource.Quantity +} + +// defaultTemplateParams returns the default template parameters for the thanos components. +func defaultTemplateParams(cfg defaultTemplateParamsConfig) []templatev1.Parameter { + return []templatev1.Parameter{ + { + Name: "THANOS_LOG_LEVEL", + Value: cfg.LogLevel, + }, + { + Name: "THANOS_REPLICAS", + Value: fmt.Sprintf("%d", cfg.Replicas), + }, + { + Name: "THANOS_CPU_REQUEST", + Value: cfg.CPURequest.String(), + }, + { + Name: "THANOS_MEMORY_LIMIT", + Value: cfg.MemoryLimit.String(), + }, + { + Name: "THANOS_MEMORY_REQUEST", + Value: cfg.MemoryRequest.String(), + }, + } +} diff --git a/services_go/observatorium/sidecars.go b/services_go/observatorium/sidecars.go index 8fd69e9028..e45a599776 100644 --- a/services_go/observatorium/sidecars.go +++ b/services_go/observatorium/sidecars.go @@ -8,6 +8,7 @@ import ( ) // makeOauthProxy creates a container for the oauth-proxy sidecar. +// It contains a template parameter OAUTH_PROXY_COOKIE_SECRET that must be added to the template parameters. func makeOauthProxy(upstreamPort int32, namespace, serviceAccount, tlsSecret string) *k8sutil.Container { proxyPort := int32(8443) From 54b03f470ed23d659d0c3a131cb89ac4e60afb27 Mon Sep 17 00:00:00 2001 From: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> Date: Wed, 4 Oct 2023 15:04:43 +0200 Subject: [PATCH 32/32] add default tenant, infinite compactor retention Signed-off-by: Thibault Mange <22740367+thibaultmg@users.noreply.github.com> --- ...rium-metrics-compact-default-template.yaml | 340 +++++++++++++++ ...atorium-metrics-compact-rhel-template.yaml | 3 - ...um-metrics-compact-telemeter-template.yaml | 3 - ...torium-metrics-store-default-template.yaml | 407 ++++++++++++++++++ ...rium-metrics-compact-default-template.yaml | 340 +++++++++++++++ ...atorium-metrics-compact-rhel-template.yaml | 3 - ...um-metrics-compact-telemeter-template.yaml | 3 - ...torium-metrics-store-default-template.yaml | 407 ++++++++++++++++++ services_go/instances/rhobs/rhobs.go | 16 +- services_go/observatorium/metrics.go | 7 +- 10 files changed, 1506 insertions(+), 23 deletions(-) create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-default-template.yaml create mode 100755 resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-default-template.yaml create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-default-template.yaml create mode 100755 resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-default-template.yaml diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-default-template.yaml new file mode 100755 index 0000000000..793d65d315 --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-default-template.yaml @@ -0,0 +1,340 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-default +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-default + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + prometheus: app-sre + name: observatorium-thanos-compact-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + serviceName: observatorium-thanos-compact-default + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: default-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: default-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: default-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: default-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: default-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: ${THANOS_MEMORY_LIMIT} + requests: + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-default + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-default + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-default + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-default + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +parameters: +- name: THANOS_LOG_LEVEL + value: warn +- name: THANOS_REPLICAS + value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml index 1368cc857b..0afd164aff 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -144,9 +144,6 @@ objects: - --log.format=logfmt - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - - --retention.resolution-1h=8760h0m0s - - --retention.resolution-5m=8760h0m0s - - --retention.resolution-raw=8760h0m0s - --wait - --debug.max-compaction-level=3 env: diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 2e1c775e5f..b93eb3fb24 100755 --- a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -144,9 +144,6 @@ objects: - --log.format=logfmt - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - - --retention.resolution-1h=8760h0m0s - - --retention.resolution-5m=8760h0m0s - - --retention.resolution-raw=8760h0m0s - --wait - --debug.max-compaction-level=3 env: diff --git a/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-default-template.yaml b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-default-template.yaml new file mode 100755 index 0000000000..4f86d223cb --- /dev/null +++ b/resources/services/app-sre-stage-01/rhobs/observatorium-metrics-store-default-template.yaml @@ -0,0 +1,407 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-default +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: list-pods-default + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: list-pods-default + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-default + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-default + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: hashmod-config-template-default + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + prometheus: app-sre + name: observatorium-thanos-store-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + serviceName: observatorium-thanos-store-default + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: default-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: default-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: default-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: default-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: default-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: ${THANOS_MEMORY_LIMIT} + requests: + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-default + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + value: warn +- name: THANOS_REPLICAS + value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-default-template.yaml new file mode 100755 index 0000000000..793d65d315 --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-default-template.yaml @@ -0,0 +1,340 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-compact-default +objects: +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: compact-tls-default + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + prometheus: app-sre + name: observatorium-thanos-compact-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + serviceName: observatorium-thanos-compact-default + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-compact + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - compact + - --compact.concurrency=1 + - --data-dir=/var/thanos/compactor + - --deduplication.replica-label=replica + - --delete-delay=24h0m0s + - --downsample.concurrency=1 + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --objstore.config=$(OBJSTORE_CONFIG) + - --wait + - --debug.max-compaction-level=3 + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: default-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: default-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: default-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: default-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: default-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 4 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: ${THANOS_MEMORY_LIMIT} + requests: + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/compactor + name: data + - args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:10902 + - -openshift-service-account=observatorium-thanos-compact-default + - '-openshift-sar={"resource": "namespaces", "verb": "get", "name": "rhobs", + "namespace": "rhobs"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", + "name": "rhobs", "namespace": "rhobs"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret=${OAUTH_PROXY_COOKIE_SECRET} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + image: quay.io/openshift/origin-oauth-proxy:v4.13.0 + name: oauth-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: compact-tls + readOnly: true + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-compact-default + terminationGracePeriodSeconds: 120 + volumes: + - name: compact-tls + secret: + secretName: compact-tls-default + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + annotations: + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod-http + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + host: "" + port: + targetPort: https + tls: + insecureEdgeTerminationPolicy: Redirect + termination: reencrypt + to: + kind: Service + name: observatorium-thanos-compact-default + weight: null +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-compact-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: database-compactor + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-compact + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +parameters: +- name: THANOS_LOG_LEVEL + value: warn +- name: THANOS_REPLICAS + value: "1" +- name: THANOS_CPU_REQUEST + value: 200m +- name: THANOS_MEMORY_LIMIT + value: 5Gi +- name: THANOS_MEMORY_REQUEST + value: 1Gi +- from: '[a-zA-Z0-9]{40}' + generate: expression + name: OAUTH_PROXY_COOKIE_SECRET diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml index 1368cc857b..0afd164aff 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-rhel-template.yaml @@ -144,9 +144,6 @@ objects: - --log.format=logfmt - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - - --retention.resolution-1h=8760h0m0s - - --retention.resolution-5m=8760h0m0s - - --retention.resolution-raw=8760h0m0s - --wait - --debug.max-compaction-level=3 env: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml index 2e1c775e5f..b93eb3fb24 100755 --- a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-compact-telemeter-template.yaml @@ -144,9 +144,6 @@ objects: - --log.format=logfmt - --log.level=${THANOS_LOG_LEVEL} - --objstore.config=$(OBJSTORE_CONFIG) - - --retention.resolution-1h=8760h0m0s - - --retention.resolution-5m=8760h0m0s - - --retention.resolution-raw=8760h0m0s - --wait - --debug.max-compaction-level=3 env: diff --git a/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-default-template.yaml b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-default-template.yaml new file mode 100755 index 0000000000..4f86d223cb --- /dev/null +++ b/resources/services/telemeter-prod-01/rhobs/observatorium-metrics-store-default-template.yaml @@ -0,0 +1,407 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: observatorium-thanos-store-default +objects: +- apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: list-pods-default + namespace: rhobs + rules: + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list +- apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: list-pods-default + namespace: rhobs + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: list-pods-default + subjects: + - kind: ServiceAccount + name: observatorium-thanos-store-default + namespace: rhobs +- apiVersion: v1 + data: + entrypoint.sh: "#!/bin/bash\n\n# Kubernetes replicas are named with the following + convention \"-\". \n# This parameter expansion removes + all characters until the last hyphen, capturing only the ordinal.\nexport ORDINAL_INDEX=${HOSTNAME##*-}\n# + This parameter expansion removes all characters after the last hyphen, capturing + only the statefulset name.\nexport STATEFULSET_NAME=\"${HOSTNAME%-*}\"\nexport + THANOS_STORE_REPLICAS=$(kubectl get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} + -o=jsonpath='{.status.replicas}')\n\n# Logging parameters\necho \"generating + store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} + HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}\"\n\ncat + </tmp/config/hashmod-config.yaml\n- action: hashmod\n source_labels: + [\"__block_id\"]\n target_label: shard\n modulus: ${THANOS_STORE_REPLICAS}\n- + action: keep\n source_labels: [\"shard\"]\n regex: ${ORDINAL_INDEX}\nEOF\n" + kind: ConfigMap + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: hashmod-config-template-default + namespace: rhobs +- apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: Service + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + ports: + - name: http + port: 10902 + protocol: TCP + targetPort: 10902 + - name: grpc + port: 10901 + protocol: TCP + targetPort: 10901 + selector: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: v1 + kind: ServiceAccount + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs +- apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + prometheus: app-sre + name: observatorium-thanos-store-default + namespace: openshift-customer-monitoring + spec: + endpoints: + - port: http + relabelings: + - action: replace + separator: / + sourceLabels: + - namespace + - pod + targetLabel: instance + namespaceSelector: + matchNames: + - rhobs + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default +- apiVersion: apps/v1 + kind: StatefulSet + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: observatorium-thanos-store-default + namespace: rhobs + spec: + replicas: ${THANOS_REPLICAS} + selector: + matchLabels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + observatorium/tenant: default + serviceName: observatorium-thanos-store-default + template: + metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + namespace: rhobs + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - observatorium + - key: app.kubernetes.io/name + operator: In + values: + - thanos-store + topologyKey: kubernetes.io/hostname + weight: 100 + containers: + - args: + - store + - --data-dir=/var/thanos/store + - --ignore-deletion-marks-delay=24h0m0s + - --log.format=logfmt + - --log.level=${THANOS_LOG_LEVEL} + - --max-time=-22h0m0s + - --objstore.config=$(OBJSTORE_CONFIG) + - --selector.relabel-config-file=/tmp/config/hashmod-config.yaml + - --store.enable-index-header-lazy-reader=true + env: + - name: HOST_IP_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws_access_key_id + name: default-tenant-s3 + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws_secret_access_key + name: default-tenant-s3 + - name: OBJ_STORE_BUCKET + valueFrom: + secretKeyRef: + key: bucket + name: default-tenant-s3 + - name: OBJ_STORE_REGION + valueFrom: + secretKeyRef: + key: aws_region + name: default-tenant-s3 + - name: OBJ_STORE_ENDPOINT + valueFrom: + secretKeyRef: + key: endpoint + name: default-tenant-s3 + - name: OBJSTORE_CONFIG + value: | + type: S3 + config: + bucket: $(OBJ_STORE_BUCKET) + endpoint: $(OBJ_STORE_ENDPOINT) + region: $(OBJ_STORE_REGION) + image: quay.io/thanos/thanos:v0.32.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /-/healthy + port: 10902 + periodSeconds: 30 + timeoutSeconds: 1 + name: thanos + ports: + - containerPort: 10902 + name: http + protocol: TCP + - containerPort: 10901 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 20 + httpGet: + path: /-/ready + port: 10902 + periodSeconds: 5 + resources: + limits: + memory: ${THANOS_MEMORY_LIMIT} + requests: + cpu: ${THANOS_CPU_REQUEST} + memory: ${THANOS_MEMORY_REQUEST} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/thanos/store + name: data + - mountPath: /etc/config + name: hashmod-config + - args: + - --reporter.grpc.host-port=dns:///otel-trace-writer-collector-headless.observatorium-tools.svc:14250 + - --reporter.type=grpc + - --agent.tags=pod.namespace=$(NAMESPACE),pod.name=$(POD) + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD + valueFrom: + fieldRef: + fieldPath: metadata.name + image: quay.io/app-sre/jaegertracing-jaeger-agent:1.22.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: 14271 + name: jaeger-agent + ports: + - containerPort: 5778 + name: configs + protocol: TCP + - containerPort: 6831 + name: jaeger-thrift + protocol: TCP + - containerPort: 14271 + name: metrics + protocol: TCP + readinessProbe: + httpGet: + path: / + port: 14271 + initialDelaySeconds: 1 + resources: + limits: + cpu: 128m + memory: 128Mi + requests: + cpu: 32m + memory: 64Mi + terminationMessagePolicy: FallbackToLogsOnError + initContainers: + - args: + - /tmp/entrypoint/entrypoint.sh + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: quay.io/app-sre/ubi8-ubi + imagePullPolicy: IfNotPresent + name: init-hashmod-file + resources: {} + volumeMounts: + - mountPath: /tmp/entrypoint + name: hashmod-config-template + - mountPath: /etc/config + name: hashmod-config + nodeSelector: + kubernetes.io/os: linux + securityContext: + fsGroup: 65534 + runAsUser: 65534 + serviceAccountName: observatorium-thanos-store-default + terminationGracePeriodSeconds: 120 + volumes: + - emptyDir: {} + name: hashmod-config + - configMap: + defaultMode: 511 + name: thanos-store + name: hashmod-config-template + updateStrategy: {} + volumeClaimTemplates: + - metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: object-store-gateway + app.kubernetes.io/instance: observatorium + app.kubernetes.io/name: thanos-store + app.kubernetes.io/part-of: observatorium + app.kubernetes.io/version: v0.32.4 + observatorium/tenant: default + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: gp2 +parameters: +- name: THANOS_LOG_LEVEL + value: warn +- name: THANOS_REPLICAS + value: "1" +- name: THANOS_CPU_REQUEST + value: "4" +- name: THANOS_MEMORY_LIMIT + value: 80Gi +- name: THANOS_MEMORY_REQUEST + value: 20Gi diff --git a/services_go/instances/rhobs/rhobs.go b/services_go/instances/rhobs/rhobs.go index 2173cb2c7e..6fa4bb65f1 100644 --- a/services_go/instances/rhobs/rhobs.go +++ b/services_go/instances/rhobs/rhobs.go @@ -15,8 +15,8 @@ func ClusterConfigs() []observatorium.Observatorium { ThanosImageTag: "v0.32.4", Stores: []observatorium.ThanosTenantConfig[store.StoreStatefulSet]{ { - Tenant: "shared", - ObjStoreSecret: "shared-tenant-s3", + Tenant: "default", + ObjStoreSecret: "default-tenant-s3", }, { Tenant: "rhel", @@ -29,8 +29,8 @@ func ClusterConfigs() []observatorium.Observatorium { }, Compactors: []observatorium.ThanosTenantConfig[compactor.CompactorStatefulSet]{ { - Tenant: "shared", - ObjStoreSecret: "shared-tenant-s3", + Tenant: "default", + ObjStoreSecret: "default-tenant-s3", }, { Tenant: "rhel", @@ -49,8 +49,8 @@ func ClusterConfigs() []observatorium.Observatorium { ThanosImageTag: "v0.32.4", Stores: []observatorium.ThanosTenantConfig[store.StoreStatefulSet]{ { - Tenant: "shared", - ObjStoreSecret: "shared-tenant-s3", + Tenant: "default", + ObjStoreSecret: "default-tenant-s3", }, { Tenant: "rhel", @@ -63,8 +63,8 @@ func ClusterConfigs() []observatorium.Observatorium { }, Compactors: []observatorium.ThanosTenantConfig[compactor.CompactorStatefulSet]{ { - Tenant: "shared", - ObjStoreSecret: "shared-tenant-s3", + Tenant: "default", + ObjStoreSecret: "default-tenant-s3", }, { Tenant: "rhel", diff --git a/services_go/observatorium/metrics.go b/services_go/observatorium/metrics.go index 443f308e2e..9474781c93 100644 --- a/services_go/observatorium/metrics.go +++ b/services_go/observatorium/metrics.go @@ -62,9 +62,10 @@ func makeCompactor(namespace, imageTag string, cfg ThanosTenantConfig[compactor. compactorSatefulset.Sidecars = []k8sutil.ContainerProvider{makeOauthProxy(10902, namespace, compactorSatefulset.Name, tlsSecret)} // Compactor config - compactorSatefulset.Options.RetentionResolutionRaw = 365 * 24 * time.Hour - compactorSatefulset.Options.RetentionResolution5m = 365 * 24 * time.Hour - compactorSatefulset.Options.RetentionResolution1h = 365 * 24 * time.Hour + compactorSatefulset.Options.LogLevel = common.LogLevelWarn + compactorSatefulset.Options.RetentionResolutionRaw = 0 + compactorSatefulset.Options.RetentionResolution5m = 0 + compactorSatefulset.Options.RetentionResolution1h = 0 compactorSatefulset.Options.DeleteDelay = 24 * time.Hour compactorSatefulset.Options.CompactConcurrency = 1 compactorSatefulset.Options.DownsampleConcurrency = 1