From 819a9eb2026b7a3ba16d97c0f6fac76a4ef2267d Mon Sep 17 00:00:00 2001 From: Andrew Lavery Date: Thu, 19 Oct 2023 10:58:21 -0600 Subject: [PATCH] node usage metrics not being collected is not a fatal error --- pkg/helmvm/helmvm_node.go | 74 +++++++++++++++++--------------- pkg/helmvm/helmvm_nodes.go | 87 ++------------------------------------ 2 files changed, 43 insertions(+), 118 deletions(-) diff --git a/pkg/helmvm/helmvm_node.go b/pkg/helmvm/helmvm_node.go index 7805400c1d..7054e0cf26 100644 --- a/pkg/helmvm/helmvm_node.go +++ b/pkg/helmvm/helmvm_node.go @@ -31,7 +31,31 @@ func GetNode(ctx context.Context, client kubernetes.Interface, nodeName string) return nil, fmt.Errorf("failed to create metrics client: %w", err) } - nodePods, err := podsOnNode(ctx, client, nodeName) + return nodeMetrics(ctx, client, metricsClient, *node) +} + +func podsOnNode(ctx context.Context, client kubernetes.Interface, nodeName string) ([]corev1.Pod, error) { + namespaces, err := client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("list namespaces: %w", err) + } + + toReturn := []corev1.Pod{} + + for _, ns := range namespaces.Items { + nsPods, err := client.CoreV1().Pods(ns.Name).List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("spec.nodeName=%s", nodeName)}) + if err != nil { + return nil, fmt.Errorf("list pods on %s in namespace %s: %w", nodeName, ns.Name, err) + } + + toReturn = append(toReturn, nsPods.Items...) + } + return toReturn, nil +} + +// nodeMetrics takes a corev1.Node and gets metrics + status for that node +func nodeMetrics(ctx context.Context, client kubernetes.Interface, metricsClient *metricsv.Clientset, node corev1.Node) (*types.Node, error) { + nodePods, err := podsOnNode(ctx, client, node.Name) if err != nil { return nil, fmt.Errorf("pods per node: %w", err) } @@ -49,17 +73,18 @@ func GetNode(ctx context.Context, client kubernetes.Interface, nodeName string) podCapacity.Capacity = float64(node.Status.Capacity.Pods().Value()) - nodeMetrics, err := metricsClient.MetricsV1beta1().NodeMetricses().Get(ctx, node.Name, metav1.GetOptions{}) - if err != nil { - return nil, fmt.Errorf("list pod metrics: %w", err) - } - - if nodeMetrics.Usage.Memory() != nil { - memoryCapacity.Available = memoryCapacity.Capacity - float64(nodeMetrics.Usage.Memory().Value())/math.Pow(2, 30) - } + nodeUsageMetrics, err := metricsClient.MetricsV1beta1().NodeMetricses().Get(ctx, node.Name, metav1.GetOptions{}) + if err == nil { + if nodeUsageMetrics.Usage.Memory() != nil { + memoryCapacity.Available = memoryCapacity.Capacity - float64(nodeUsageMetrics.Usage.Memory().Value())/math.Pow(2, 30) + } - if nodeMetrics.Usage.Cpu() != nil { - cpuCapacity.Available = cpuCapacity.Capacity - nodeMetrics.Usage.Cpu().AsApproximateFloat64() + if nodeUsageMetrics.Usage.Cpu() != nil { + cpuCapacity.Available = cpuCapacity.Capacity - nodeUsageMetrics.Usage.Cpu().AsApproximateFloat64() + } + } else { + // if we can't get metrics, we'll do nothing for now + // in the future we may decide to retry or log a warning } podCapacity.Available = podCapacity.Capacity - float64(len(nodePods)) @@ -71,10 +96,10 @@ func GetNode(ctx context.Context, client kubernetes.Interface, nodeName string) return &types.Node{ Name: node.Name, - IsConnected: isConnected(*node), - IsReady: isReady(*node), - IsPrimaryNode: isPrimary(*node), - CanDelete: node.Spec.Unschedulable && !isConnected(*node), + IsConnected: isConnected(node), + IsReady: isReady(node), + IsPrimaryNode: isPrimary(node), + CanDelete: node.Spec.Unschedulable && !isConnected(node), KubeletVersion: node.Status.NodeInfo.KubeletVersion, CPU: cpuCapacity, Memory: memoryCapacity, @@ -84,22 +109,3 @@ func GetNode(ctx context.Context, client kubernetes.Interface, nodeName string) PodList: nodePods, }, nil } - -func podsOnNode(ctx context.Context, client kubernetes.Interface, nodeName string) ([]corev1.Pod, error) { - namespaces, err := client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, fmt.Errorf("list namespaces: %w", err) - } - - toReturn := []corev1.Pod{} - - for _, ns := range namespaces.Items { - nsPods, err := client.CoreV1().Pods(ns.Name).List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("spec.nodeName=%s", nodeName)}) - if err != nil { - return nil, fmt.Errorf("list pods on %s in namespace %s: %w", nodeName, ns.Name, err) - } - - toReturn = append(toReturn, nsPods.Items...) - } - return toReturn, nil -} diff --git a/pkg/helmvm/helmvm_nodes.go b/pkg/helmvm/helmvm_nodes.go index f8bfefff4b..9396e6508c 100644 --- a/pkg/helmvm/helmvm_nodes.go +++ b/pkg/helmvm/helmvm_nodes.go @@ -2,10 +2,6 @@ package helmvm import ( "context" - "fmt" - "math" - "strconv" - "github.com/pkg/errors" "github.com/replicatedhq/kots/pkg/helmvm/types" "github.com/replicatedhq/kots/pkg/k8sutil" @@ -34,58 +30,13 @@ func GetNodes(ctx context.Context, client kubernetes.Interface) (*types.HelmVMNo toReturn := types.HelmVMNodes{} - nodePods, err := podsPerNode(ctx, client) - if err != nil { - return nil, errors.Wrap(err, "pods per node") - } - for _, node := range nodes.Items { - cpuCapacity := types.CapacityAvailable{} - memoryCapacity := types.CapacityAvailable{} - podCapacity := types.CapacityAvailable{} - - memoryCapacity.Capacity = float64(node.Status.Capacity.Memory().Value()) / math.Pow(2, 30) // capacity in GB - - cpuCapacity.Capacity, err = strconv.ParseFloat(node.Status.Capacity.Cpu().String(), 64) - if err != nil { - return nil, errors.Wrapf(err, "parse CPU capacity %q for node %s", node.Status.Capacity.Cpu().String(), node.Name) - } - - podCapacity.Capacity = float64(node.Status.Capacity.Pods().Value()) - - nodeMetrics, err := metricsClient.MetricsV1beta1().NodeMetricses().Get(ctx, node.Name, metav1.GetOptions{}) + nodeMet, err := nodeMetrics(ctx, client, metricsClient, node) if err != nil { - return nil, errors.Wrap(err, "list pod metrics") - } - - if nodeMetrics.Usage.Memory() != nil { - memoryCapacity.Available = memoryCapacity.Capacity - float64(nodeMetrics.Usage.Memory().Value())/math.Pow(2, 30) + return nil, errors.Wrap(err, "node metrics") } - if nodeMetrics.Usage.Cpu() != nil { - cpuCapacity.Available = cpuCapacity.Capacity - nodeMetrics.Usage.Cpu().AsApproximateFloat64() - } - - podCapacity.Available = podCapacity.Capacity - float64(nodePods[node.Name]) - - nodeLabelArray := []string{} - for k, v := range node.Labels { - nodeLabelArray = append(nodeLabelArray, fmt.Sprintf("%s:%s", k, v)) - } - - toReturn.Nodes = append(toReturn.Nodes, types.Node{ - Name: node.Name, - IsConnected: isConnected(node), - IsReady: isReady(node), - IsPrimaryNode: isPrimary(node), - CanDelete: node.Spec.Unschedulable && !isConnected(node), - KubeletVersion: node.Status.NodeInfo.KubeletVersion, - CPU: cpuCapacity, - Memory: memoryCapacity, - Pods: podCapacity, - Labels: nodeLabelArray, - Conditions: findNodeConditions(node.Status.Conditions), - }) + toReturn.Nodes = append(toReturn.Nodes, *nodeMet) } isHelmVM, err := IsHelmVM(client) @@ -122,38 +73,6 @@ func findNodeConditions(conditions []corev1.NodeCondition) types.NodeConditions return discoveredConditions } -// podsPerNode returns a map of node names to the number of pods, across all namespaces -func podsPerNode(ctx context.Context, client kubernetes.Interface) (map[string]int, error) { - namespaces, err := client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, errors.Wrap(err, "list namespaces") - } - - toReturn := map[string]int{} - - for _, ns := range namespaces.Items { - nsPods, err := client.CoreV1().Pods(ns.Name).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, errors.Wrapf(err, "list pods in namespace %s", ns.Name) - } - - for _, pod := range nsPods.Items { - pod := pod - if pod.Spec.NodeName == "" { - continue - } - - if _, ok := toReturn[pod.Spec.NodeName]; !ok { - toReturn[pod.Spec.NodeName] = 0 - } - - toReturn[pod.Spec.NodeName]++ - } - } - - return toReturn, nil -} - func isConnected(node corev1.Node) bool { for _, taint := range node.Spec.Taints { if taint.Key == "node.kubernetes.io/unreachable" {