From 620fa75eb593247a07c4dc39ea96fc6a059be111 Mon Sep 17 00:00:00 2001 From: Ahmed Mousa Date: Fri, 9 Jun 2023 11:26:03 +0200 Subject: [PATCH] feat: add k8s custom metrics collector (#1174) Collector that collects custom k8s metrics from custom.metrics.k8s.io/v1beta1/ and saves them in the bundle under the /metrics directory --- config/crds/troubleshoot.sh_collectors.yaml | 30 ++++ config/crds/troubleshoot.sh_preflights.yaml | 30 ++++ .../crds/troubleshoot.sh_supportbundles.yaml | 30 ++++ go.mod | 1 + go.sum | 2 + .../troubleshoot/v1beta2/collector_shared.go | 18 +++ .../v1beta2/zz_generated.deepcopy.go | 41 ++++++ pkg/collect/collector.go | 5 + pkg/collect/k8s_metrics.go | 130 ++++++++++++++++++ pkg/collect/k8s_metrics_test.go | 91 ++++++++++++ schemas/collector-troubleshoot-v1beta2.json | 35 +++++ schemas/preflight-troubleshoot-v1beta2.json | 35 +++++ .../supportbundle-troubleshoot-v1beta2.json | 35 +++++ 13 files changed, 483 insertions(+) create mode 100644 pkg/collect/k8s_metrics.go create mode 100644 pkg/collect/k8s_metrics_test.go diff --git a/config/crds/troubleshoot.sh_collectors.yaml b/config/crds/troubleshoot.sh_collectors.yaml index e5e2d85fa..c0db6d68b 100644 --- a/config/crds/troubleshoot.sh_collectors.yaml +++ b/config/crds/troubleshoot.sh_collectors.yaml @@ -245,6 +245,36 @@ spec: - image - namespace type: object + customMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + metricRequests: + items: + description: MetricRequest the details of the MetricValuesList + to be retrieved + properties: + namespace: + description: Namespace for which to collect the metric + values, empty for non-namespaces resources. + type: string + objectName: + description: ObjectName for which to collect metric + values, all resources when empty. Note that for + namespaced resources a Namespace has to be supplied + regardless. + type: string + resourceMetricName: + description: ResourceMetricName name of the MetricValueList + as per the APIResourceList from custom.metrics.k8s.io/v1beta1 + type: string + required: + - resourceMetricName + type: object + type: array + type: object data: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_preflights.yaml b/config/crds/troubleshoot.sh_preflights.yaml index 8db660533..ac66e1044 100644 --- a/config/crds/troubleshoot.sh_preflights.yaml +++ b/config/crds/troubleshoot.sh_preflights.yaml @@ -1740,6 +1740,36 @@ spec: - image - namespace type: object + customMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + metricRequests: + items: + description: MetricRequest the details of the MetricValuesList + to be retrieved + properties: + namespace: + description: Namespace for which to collect the metric + values, empty for non-namespaces resources. + type: string + objectName: + description: ObjectName for which to collect metric + values, all resources when empty. Note that for + namespaced resources a Namespace has to be supplied + regardless. + type: string + resourceMetricName: + description: ResourceMetricName name of the MetricValueList + as per the APIResourceList from custom.metrics.k8s.io/v1beta1 + type: string + required: + - resourceMetricName + type: object + type: array + type: object data: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_supportbundles.yaml b/config/crds/troubleshoot.sh_supportbundles.yaml index 774e0e6b2..50273871f 100644 --- a/config/crds/troubleshoot.sh_supportbundles.yaml +++ b/config/crds/troubleshoot.sh_supportbundles.yaml @@ -1771,6 +1771,36 @@ spec: - image - namespace type: object + customMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + metricRequests: + items: + description: MetricRequest the details of the MetricValuesList + to be retrieved + properties: + namespace: + description: Namespace for which to collect the metric + values, empty for non-namespaces resources. + type: string + objectName: + description: ObjectName for which to collect metric + values, all resources when empty. Note that for + namespaced resources a Namespace has to be supplied + regardless. + type: string + resourceMetricName: + description: ResourceMetricName name of the MetricValueList + as per the APIResourceList from custom.metrics.k8s.io/v1beta1 + type: string + required: + - resourceMetricName + type: object + type: array + type: object data: properties: collectorName: diff --git a/go.mod b/go.mod index 755c0153c..0818d7771 100644 --- a/go.mod +++ b/go.mod @@ -211,6 +211,7 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect + k8s.io/metrics v0.27.2 k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 periph.io/x/host/v3 v3.8.2 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index aba361dff..1a8738c88 100644 --- a/go.sum +++ b/go.sum @@ -1476,6 +1476,8 @@ k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f h1:2kWPakN3i/k81b0gvD5C5FJ2kxm1WrQFanWchyKuqGg= k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f/go.mod h1:byini6yhqGC14c3ebc/QwanvYwhuMWF6yz2F8uwW8eg= +k8s.io/metrics v0.27.2 h1:TD6z3dhhN9bgg5YkbTh72bPiC1BsxipBLPBWyC3VQAU= +k8s.io/metrics v0.27.2/go.mod h1:v3OT7U0DBvoAzWVzGZWQhdV4qsRJWchzs/LeVN8bhW4= k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 h1:qY1Ad8PODbnymg2pRbkyMT/ylpTrCM8P2RJ0yroCyIk= k8s.io/utils v0.0.0-20230406110748-d93618cff8a2/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= oras.land/oras-go v1.2.3 h1:v8PJl+gEAntI1pJ/LCrDgsuk+1PKVavVEPsYIHFE5uY= diff --git a/pkg/apis/troubleshoot/v1beta2/collector_shared.go b/pkg/apis/troubleshoot/v1beta2/collector_shared.go index 410775787..4c6d692fb 100644 --- a/pkg/apis/troubleshoot/v1beta2/collector_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/collector_shared.go @@ -27,6 +27,23 @@ type ClusterResources struct { IgnoreRBAC bool `json:"ignoreRBAC,omitempty" yaml:"ignoreRBAC"` } +// MetricRequest the details of the MetricValuesList to be retrieved +type MetricRequest struct { + // Namespace for which to collect the metric values, empty for non-namespaces resources. + Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"` + // ObjectName for which to collect metric values, all resources when empty. + // Note that for namespaced resources a Namespace has to be supplied regardless. + ObjectName string `json:"objectName,omitempty" yaml:"objectName,omitempty"` + // ResourceMetricName name of the MetricValueList as per the APIResourceList from + // custom.metrics.k8s.io/v1beta1 + ResourceMetricName string `json:"resourceMetricName" yaml:"resourceMetricName"` +} + +type CustomMetrics struct { + CollectorMeta `json:",inline" yaml:",inline"` + MetricRequests []MetricRequest `json:"metricRequests,omitempty" yaml:"metricRequests,omitempty"` +} + type Secret struct { CollectorMeta `json:",inline" yaml:",inline"` Name string `json:"name,omitempty" yaml:"name,omitempty"` @@ -231,6 +248,7 @@ type Collect struct { ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"` ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"` Secret *Secret `json:"secret,omitempty" yaml:"secret,omitempty"` + CustomMetrics *CustomMetrics `json:"customMetrics,omitempty" yaml:"customMetrics,omitempty"` ConfigMap *ConfigMap `json:"configMap,omitempty" yaml:"configMap,omitempty"` Logs *Logs `json:"logs,omitempty" yaml:"logs,omitempty"` Run *Run `json:"run,omitempty" yaml:"run,omitempty"` diff --git a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go index 2e59398f4..41b749335 100644 --- a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go @@ -782,6 +782,11 @@ func (in *Collect) DeepCopyInto(out *Collect) { *out = new(Secret) (*in).DeepCopyInto(*out) } + if in.CustomMetrics != nil { + in, out := &in.CustomMetrics, &out.CustomMetrics + *out = new(CustomMetrics) + (*in).DeepCopyInto(*out) + } if in.ConfigMap != nil { in, out := &in.ConfigMap, &out.ConfigMap *out = new(ConfigMap) @@ -1158,6 +1163,27 @@ func (in *CopyFromHost) DeepCopy() *CopyFromHost { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CustomMetrics) DeepCopyInto(out *CustomMetrics) { + *out = *in + in.CollectorMeta.DeepCopyInto(&out.CollectorMeta) + if in.MetricRequests != nil { + in, out := &in.MetricRequests, &out.MetricRequests + *out = make([]MetricRequest, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CustomMetrics. +func (in *CustomMetrics) DeepCopy() *CustomMetrics { + if in == nil { + return nil + } + out := new(CustomMetrics) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CustomResourceDefinition) DeepCopyInto(out *CustomResourceDefinition) { *out = *in @@ -2738,6 +2764,21 @@ func (in *MemoryAnalyze) DeepCopy() *MemoryAnalyze { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricRequest) DeepCopyInto(out *MetricRequest) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricRequest. +func (in *MetricRequest) DeepCopy() *MetricRequest { + if in == nil { + return nil + } + out := new(MetricRequest) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeResourceFilters) DeepCopyInto(out *NodeResourceFilters) { *out = *in diff --git a/pkg/collect/collector.go b/pkg/collect/collector.go index 2df153e72..d69a5df29 100644 --- a/pkg/collect/collector.go +++ b/pkg/collect/collector.go @@ -63,6 +63,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam return &CollectClusterInfo{collector.ClusterInfo, bundlePath, namespace, clientConfig, RBACErrors}, true case collector.ClusterResources != nil: return &CollectClusterResources{collector.ClusterResources, bundlePath, namespace, clientConfig, RBACErrors}, true + case collector.CustomMetrics != nil: + return &CollectMetrics{collector.CustomMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true case collector.Secret != nil: return &CollectSecret{collector.Secret, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true case collector.ConfigMap != nil: @@ -116,6 +118,9 @@ func getCollectorName(c interface{}) string { collector = "cluster-info" case *CollectClusterResources: collector = "cluster-resources" + case *CollectMetrics: + collector = "custom-metrics" + name = v.Collector.CollectorName case *CollectSecret: collector = "secret" name = v.Collector.CollectorName diff --git a/pkg/collect/k8s_metrics.go b/pkg/collect/k8s_metrics.go new file mode 100644 index 000000000..6276ab848 --- /dev/null +++ b/pkg/collect/k8s_metrics.go @@ -0,0 +1,130 @@ +package collect + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/url" + "path/filepath" + "strings" + + "github.com/pkg/errors" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + "k8s.io/metrics/pkg/apis/custom_metrics" +) + +const ( + namespaceSingular = "namespace" + namespacePlural = "namespaces" + urlBase = "/apis/custom.metrics.k8s.io/v1beta1" + metricsErrorFile = "metrics/errors.json" +) + +type CollectMetrics struct { + Collector *troubleshootv1beta2.CustomMetrics + BundlePath string + ClientConfig *rest.Config + Client kubernetes.Interface + Context context.Context + RBACErrors +} + +func (c *CollectMetrics) Title() string { + return getCollectorName(c) +} + +func (c *CollectMetrics) IsExcluded() (bool, error) { + return isExcluded(c.Collector.Exclude) +} + +func (c *CollectMetrics) Collect(progressChan chan<- interface{}) (CollectorResult, error) { + output := NewResult() + resultLists := make(map[string][]custom_metrics.MetricValue) + errorsList := make([]string, 0) + for _, metricRequest := range c.Collector.MetricRequests { + klog.V(2).Infof("Getting metric values: %+v\n", metricRequest.ResourceMetricName) + endpoint, metricName, err := constructEndpoint(metricRequest) + if err != nil { + errorsList = append(errorsList, errors.Wrapf(err, "could not construct endpoint for %s", metricRequest.ResourceMetricName).Error()) + continue + } + klog.V(2).Infof("Querying: %+v\n", endpoint) + response, err := c.Client.CoreV1().RESTClient().Get().AbsPath(endpoint).DoRaw(c.Context) + if err != nil { + errorsList = append(errorsList, errors.Wrapf(err, "could not query endpoint %s", endpoint).Error()) + continue + } + metricsValues := custom_metrics.MetricValueList{} + json.Unmarshal(response, &metricsValues) + // metrics + // |_ + // |_ + // |_ .json or .json + var path []string + for _, item := range metricsValues.Items { + if item.DescribedObject.Namespace != "" { + path = []string{"metrics", item.DescribedObject.Kind, metricName, fmt.Sprintf("%s.json", item.DescribedObject.Namespace)} + } else { + path = []string{"metrics", item.DescribedObject.Kind, metricName, fmt.Sprintf("%s.json", item.DescribedObject.Name)} + } + filePath := filepath.Join(path...) + if _, ok := resultLists[filePath]; !ok { + resultLists[filePath] = make([]custom_metrics.MetricValue, 0) + } + resultLists[filePath] = append(resultLists[filePath], item) + } + } + + // Construct output. + for relativePath, list := range resultLists { + payload, err := json.MarshalIndent(list, "", " ") + if err != nil { + klog.V(2).Infof("Could not parse for: %+v\n", relativePath) + errorsList = append(errorsList, errors.Wrapf(err, "could not format readings for %s", relativePath).Error()) + } + output.SaveResult(c.BundlePath, relativePath, bytes.NewBuffer(payload)) + } + errPayload := marshalErrors(errorsList) + output.SaveResult(c.BundlePath, metricsErrorFile, errPayload) + return output, nil +} + +func constructEndpoint(metricRequest troubleshootv1beta2.MetricRequest) (string, string, error) { + metricNameComponents := strings.Split(metricRequest.ResourceMetricName, "/") + if len(metricNameComponents) != 2 { + return "", "", errors.New("wrong metric name format %s") + } + objectType := metricNameComponents[0] + // Namespace related metrics are grouped under singular format "namespace/" + // unlike other resources. + if objectType == namespacePlural { + objectType = namespaceSingular + } + metricName := metricNameComponents[1] + objectSelector := "*" + if metricRequest.ObjectName != "" { + objectSelector = metricRequest.ObjectName + } + var endpoint string + var err error + if metricRequest.Namespace != "" { + // namespaced objects + // endpoint /namespaces/// + endpoint, err = url.JoinPath(urlBase, namespacePlural, metricRequest.Namespace, objectType, objectSelector, metricName) + if err != nil { + return "", "", errors.Wrap(err, "could not construct url") + } + } else { + // non-namespaced objects + // endpoint // + endpoint, err = url.JoinPath(urlBase, objectType, objectSelector, metricName) + if err != nil { + return "", "", errors.Wrap(err, "could not construct url") + } + } + return endpoint, metricName, nil +} diff --git a/pkg/collect/k8s_metrics_test.go b/pkg/collect/k8s_metrics_test.go new file mode 100644 index 000000000..4f077c7cc --- /dev/null +++ b/pkg/collect/k8s_metrics_test.go @@ -0,0 +1,91 @@ +package collect + +import ( + "testing" + + "github.com/pkg/errors" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConstructEndpoint(t *testing.T) { + // Define test cases + testCases := []struct { + name string + metricRequest troubleshootv1beta2.MetricRequest + expectedEndpoint string + expectedMetric string + expectedError error + }{ + { + name: "Namespaced object with namespace and object name", + metricRequest: troubleshootv1beta2.MetricRequest{ + Namespace: "namespace", + ObjectName: "object", + ResourceMetricName: "pods/metric", + }, + expectedEndpoint: "/apis/custom.metrics.k8s.io/v1beta1/namespaces/namespace/pods/object/metric", + expectedMetric: "metric", + expectedError: nil, + }, + { + name: "Namespaced object with namespace and empty object name", + metricRequest: troubleshootv1beta2.MetricRequest{ + Namespace: "namespace", + ObjectName: "", + ResourceMetricName: "pods/metric", + }, + expectedEndpoint: "/apis/custom.metrics.k8s.io/v1beta1/namespaces/namespace/pods/*/metric", + expectedMetric: "metric", + expectedError: nil, + }, + { + name: "Non-namespaced object", + metricRequest: troubleshootv1beta2.MetricRequest{ + ResourceMetricName: "nodes/metric", + ObjectName: "object", + }, + expectedEndpoint: "/apis/custom.metrics.k8s.io/v1beta1/nodes/object/metric", + expectedMetric: "metric", + expectedError: nil, + }, + { + name: "Non-namespaced object with empty object name", + metricRequest: troubleshootv1beta2.MetricRequest{ + ResourceMetricName: "namespaces/metric", + ObjectName: "", + }, + expectedEndpoint: "/apis/custom.metrics.k8s.io/v1beta1/namespace/*/metric", + expectedMetric: "metric", + expectedError: nil, + }, + { + name: "Invalid metric name format", + metricRequest: troubleshootv1beta2.MetricRequest{ + ResourceMetricName: "invalid-metric-name", + ObjectName: "object", + }, + expectedEndpoint: "", + expectedMetric: "", + expectedError: errors.New("wrong metric name format"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Call the function under test + endpoint, metric, err := constructEndpoint(tc.metricRequest) + + // Verify the results + if tc.expectedError != nil { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.expectedError.Error()) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedEndpoint, endpoint) + assert.Equal(t, tc.expectedMetric, metric) + } + }) + } +} diff --git a/schemas/collector-troubleshoot-v1beta2.json b/schemas/collector-troubleshoot-v1beta2.json index acf1c40a6..718d49c50 100644 --- a/schemas/collector-troubleshoot-v1beta2.json +++ b/schemas/collector-troubleshoot-v1beta2.json @@ -335,6 +335,41 @@ } } }, + "customMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "metricRequests": { + "type": "array", + "items": { + "description": "MetricRequest the details of the MetricValuesList to be retrieved", + "type": "object", + "required": [ + "resourceMetricName" + ], + "properties": { + "namespace": { + "description": "Namespace for which to collect the metric values, empty for non-namespaces resources.", + "type": "string" + }, + "objectName": { + "description": "ObjectName for which to collect metric values, all resources when empty. Note that for namespaced resources a Namespace has to be supplied regardless.", + "type": "string" + }, + "resourceMetricName": { + "description": "ResourceMetricName name of the MetricValueList as per the APIResourceList from custom.metrics.k8s.io/v1beta1", + "type": "string" + } + } + } + } + } + }, "data": { "type": "object", "required": [ diff --git a/schemas/preflight-troubleshoot-v1beta2.json b/schemas/preflight-troubleshoot-v1beta2.json index 07e498d60..bf6d466d2 100644 --- a/schemas/preflight-troubleshoot-v1beta2.json +++ b/schemas/preflight-troubleshoot-v1beta2.json @@ -2638,6 +2638,41 @@ } } }, + "customMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "metricRequests": { + "type": "array", + "items": { + "description": "MetricRequest the details of the MetricValuesList to be retrieved", + "type": "object", + "required": [ + "resourceMetricName" + ], + "properties": { + "namespace": { + "description": "Namespace for which to collect the metric values, empty for non-namespaces resources.", + "type": "string" + }, + "objectName": { + "description": "ObjectName for which to collect metric values, all resources when empty. Note that for namespaced resources a Namespace has to be supplied regardless.", + "type": "string" + }, + "resourceMetricName": { + "description": "ResourceMetricName name of the MetricValueList as per the APIResourceList from custom.metrics.k8s.io/v1beta1", + "type": "string" + } + } + } + } + } + }, "data": { "type": "object", "required": [ diff --git a/schemas/supportbundle-troubleshoot-v1beta2.json b/schemas/supportbundle-troubleshoot-v1beta2.json index 49aff9060..de206ffad 100644 --- a/schemas/supportbundle-troubleshoot-v1beta2.json +++ b/schemas/supportbundle-troubleshoot-v1beta2.json @@ -2684,6 +2684,41 @@ } } }, + "customMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "metricRequests": { + "type": "array", + "items": { + "description": "MetricRequest the details of the MetricValuesList to be retrieved", + "type": "object", + "required": [ + "resourceMetricName" + ], + "properties": { + "namespace": { + "description": "Namespace for which to collect the metric values, empty for non-namespaces resources.", + "type": "string" + }, + "objectName": { + "description": "ObjectName for which to collect metric values, all resources when empty. Note that for namespaced resources a Namespace has to be supplied regardless.", + "type": "string" + }, + "resourceMetricName": { + "description": "ResourceMetricName name of the MetricValueList as per the APIResourceList from custom.metrics.k8s.io/v1beta1", + "type": "string" + } + } + } + } + } + }, "data": { "type": "object", "required": [