From 00d7c997da80ccc905d30ccfe0687f801d06e7b9 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Wed, 6 Nov 2024 17:05:23 -0800 Subject: [PATCH 1/3] A scheduler plugin imitator that tries to verify if proposed pods will be schedulable in the cluster --- go.mod | 5 ++ go.sum | 14 ++++++ pkg/scheduler/fit.go | 108 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 pkg/scheduler/fit.go diff --git a/go.mod b/go.mod index 33ba1003..2e24d13e 100644 --- a/go.mod +++ b/go.mod @@ -26,11 +26,13 @@ require ( github.com/prometheus/client_golang v1.18.0 go.uber.org/zap v1.26.0 golang.org/x/mod v0.17.0 + k8s.io/kubernetes v1.29.2 k8s.io/utils v0.0.0-20230726121419-3b25d923346b ) require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch v5.6.0+incompatible // indirect @@ -74,9 +76,12 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.29.2 // indirect + k8s.io/apiserver v0.29.2 // indirect k8s.io/component-base v0.29.2 // indirect + k8s.io/component-helpers v0.29.2 // indirect k8s.io/klog/v2 v2.110.1 // indirect k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/kube-scheduler v0.29.2 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.4.0 // indirect diff --git a/go.sum b/go.sum index 0eab4311..9484a1e0 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/Jeffail/gabs/v2 v2.7.0 h1:Y2edYaTcE8ZpRsR2AtmPu5xQdFDIthFG0jYhu5PY8kg github.com/Jeffail/gabs/v2 v2.7.0/go.mod h1:dp5ocw1FvBBQYssgHsG7I1WYsiLRtkUaB1FEtSwvNUw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -52,6 +54,8 @@ github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM= github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -96,6 +100,8 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -186,14 +192,22 @@ k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2I k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/apiserver v0.29.2 h1:+Z9S0dSNr+CjnVXQePG8TcBWHr3Q7BmAr7NraHvsMiQ= +k8s.io/apiserver v0.29.2/go.mod h1:B0LieKVoyU7ykQvPFm7XSdIHaCHSzCzQWPFa5bqbeMQ= k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8= k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM= +k8s.io/component-helpers v0.29.2 h1:1kTIanIdqUVG2nW3e2ENVEaYbZKphqPgEdCmJvk71aw= +k8s.io/component-helpers v0.29.2/go.mod h1:gFc/p60rYtpD8UCcNfPCmbokHT2uy0yDpmr/KKUMNAw= k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/kube-scheduler v0.29.2 h1:wcYkF18AUl4DPrQYpfeZQHCqQokTq7VSKaSb3d3lzOg= +k8s.io/kube-scheduler v0.29.2/go.mod h1:yb8xrwzrdUMFU1ot3BlhKszoCfwNJlEd7tLskXe2i/I= +k8s.io/kubernetes v1.29.2 h1:8hh1cntqdulanjQt7wSSSsJfBgOyx6fUdFWslvGL5m0= +k8s.io/kubernetes v1.29.2/go.mod h1:xZPKU0yO0CBbLTnbd+XGyRmmtmaVuJykDb8gNCkeeUE= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/controller-runtime v0.17.4 h1:AMf1E0+93/jLQ13fb76S6Atwqp24EQFCmNbG84GJxew= diff --git a/pkg/scheduler/fit.go b/pkg/scheduler/fit.go new file mode 100644 index 00000000..15cf32b9 --- /dev/null +++ b/pkg/scheduler/fit.go @@ -0,0 +1,108 @@ +package scheduler + +import ( + "context" + "errors" + + "k8s.io/kubernetes/pkg/scheduler/framework" + plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func createNodeInfos(ctx context.Context, cli client.Client) ([]*framework.NodeInfo, error) { + nodes := corev1.NodeList{} + if err := cli.List(ctx, &nodes); err != nil { + return nil, err + } + + nodeInfos := make([]*framework.NodeInfo, 0, len(nodes.Items)) + + for _, node := range nodes.Items { + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(&node) + pods := &corev1.PodList{} + if err := cli.List(ctx, pods, client.MatchingFields{"spec.nodeName": nodeInfo.Node().Name}); err != nil { + return nil, err + } + + for _, pod := range pods.Items { + nodeInfo.AddPod(&pod) + } + + nodeInfos = append(nodeInfos, nodeInfo) + } + + return nodeInfos, nil +} + +// WillTheyFit checks if the proposed pods will be schedulable in the cluster. The ProposedPods must have their +// labels, tolerations, affinities and resource limits set +func WillTheyFit(ctx context.Context, cli client.Client, proposedPods []*corev1.Pod) error { + usableNodes, err := createNodeInfos(ctx, cli) + if err != nil { + return err + } + + state := framework.NewCycleState() + + schedulablePlugin, err := nodeunschedulable.New(ctx, nil, nil) + if err != nil { + return err + } + + noderesourcesPlugin, err := noderesources.NewFit(ctx, nil, nil, plfeature.Features{}) + if err != nil { + return err + } + + nodeaffinityPlugin, err := nodeaffinity.New(ctx, nil, nil) + if err != nil { + return err + } + + interpodaffinityPlugin, err := interpodaffinity.New(ctx, nil, nil) + if err != nil { + return err + } + + plugins := []framework.FilterPlugin{ + schedulablePlugin.(framework.FilterPlugin), + noderesourcesPlugin.(framework.FilterPlugin), + nodeaffinityPlugin.(framework.FilterPlugin), + interpodaffinityPlugin.(framework.FilterPlugin), + } + +NextPod: + for _, pod := range proposedPods { + for _, node := range usableNodes { + podInfo, err := framework.NewPodInfo(pod) + if err != nil { + return err + } + + for _, plugin := range plugins { + status := plugin.Filter(ctx, state, podInfo.Pod, node) + if status.Code() == framework.Unschedulable { + continue + } + } + + node.AddPod(pod) + continue NextPod + // Now we need a way to check if the pod was never added to any node + // TODO Need more checks like affinities, inter-pod affinities, taints, tolerations + } + // Pod was never added to any node + return errors.New(framework.Unschedulable.String()) + } + + // Check node condition, will it accept something + // Need tolerations, taints, affinities.. + return nil +} From df740b6d72c549e66b3388862a4f3e43a6ff1fad Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Wed, 6 Nov 2024 17:10:39 -0800 Subject: [PATCH 2/3] Add taint toleration plugin to checks --- pkg/scheduler/fit.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/scheduler/fit.go b/pkg/scheduler/fit.go index 15cf32b9..8b857417 100644 --- a/pkg/scheduler/fit.go +++ b/pkg/scheduler/fit.go @@ -10,6 +10,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -71,11 +72,17 @@ func WillTheyFit(ctx context.Context, cli client.Client, proposedPods []*corev1. return err } + tainttolerationPlugin, err := tainttoleration.New(ctx, nil, nil) + if err != nil { + return err + } + plugins := []framework.FilterPlugin{ schedulablePlugin.(framework.FilterPlugin), noderesourcesPlugin.(framework.FilterPlugin), nodeaffinityPlugin.(framework.FilterPlugin), interpodaffinityPlugin.(framework.FilterPlugin), + tainttolerationPlugin.(framework.FilterPlugin), } NextPod: @@ -95,14 +102,9 @@ NextPod: node.AddPod(pod) continue NextPod - // Now we need a way to check if the pod was never added to any node - // TODO Need more checks like affinities, inter-pod affinities, taints, tolerations } // Pod was never added to any node return errors.New(framework.Unschedulable.String()) } - - // Check node condition, will it accept something - // Need tolerations, taints, affinities.. return nil } From 4912ba38d41fbb0f939caeab3cb384df7472c86c Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Thu, 7 Nov 2024 17:54:57 -0800 Subject: [PATCH 3/3] Remove podinternodeaffinity for now due to snapshot cache issue, add a small smoke test for the resources requirements check --- pkg/scheduler/fit.go | 36 +++++++++++----- pkg/scheduler/fit_test.go | 88 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 11 deletions(-) create mode 100644 pkg/scheduler/fit_test.go diff --git a/pkg/scheduler/fit.go b/pkg/scheduler/fit.go index 8b857417..1fd42a0a 100644 --- a/pkg/scheduler/fit.go +++ b/pkg/scheduler/fit.go @@ -4,9 +4,11 @@ import ( "context" "errors" + "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" - "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" + + // "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable" @@ -57,20 +59,20 @@ func WillTheyFit(ctx context.Context, cli client.Client, proposedPods []*corev1. return err } - noderesourcesPlugin, err := noderesources.NewFit(ctx, nil, nil, plfeature.Features{}) + noderesourcesPlugin, err := noderesources.NewFit(ctx, &config.NodeResourcesFitArgs{ScoringStrategy: defaultScoringStrategy}, nil, plfeature.Features{}) if err != nil { return err } - nodeaffinityPlugin, err := nodeaffinity.New(ctx, nil, nil) + nodeaffinityPlugin, err := nodeaffinity.New(ctx, &config.NodeAffinityArgs{}, nil) if err != nil { return err } - interpodaffinityPlugin, err := interpodaffinity.New(ctx, nil, nil) - if err != nil { - return err - } + // interpodaffinityPlugin, err := interpodaffinity.New(ctx, &config.InterPodAffinityArgs{}, snapshot) + // if err != nil { + // return err + // } tainttolerationPlugin, err := tainttoleration.New(ctx, nil, nil) if err != nil { @@ -78,15 +80,16 @@ func WillTheyFit(ctx context.Context, cli client.Client, proposedPods []*corev1. } plugins := []framework.FilterPlugin{ - schedulablePlugin.(framework.FilterPlugin), noderesourcesPlugin.(framework.FilterPlugin), + schedulablePlugin.(framework.FilterPlugin), nodeaffinityPlugin.(framework.FilterPlugin), - interpodaffinityPlugin.(framework.FilterPlugin), + // interpodaffinityPlugin.(framework.FilterPlugin), tainttolerationPlugin.(framework.FilterPlugin), } NextPod: for _, pod := range proposedPods { + NextNode: for _, node := range usableNodes { podInfo, err := framework.NewPodInfo(pod) if err != nil { @@ -94,9 +97,12 @@ NextPod: } for _, plugin := range plugins { + if prefilterPlugin, ok := plugin.(framework.PreFilterPlugin); ok { + prefilterPlugin.PreFilter(ctx, state, podInfo.Pod) + } status := plugin.Filter(ctx, state, podInfo.Pod, node) - if status.Code() == framework.Unschedulable { - continue + if status.Code() != framework.Success { + continue NextNode } } @@ -108,3 +114,11 @@ NextPod: } return nil } + +var defaultScoringStrategy = &config.ScoringStrategy{ + Type: config.LeastAllocated, + Resources: []config.ResourceSpec{ + {Name: "cpu", Weight: 1}, + {Name: "memory", Weight: 1}, + }, +} diff --git a/pkg/scheduler/fit_test.go b/pkg/scheduler/fit_test.go new file mode 100644 index 00000000..12a6ee72 --- /dev/null +++ b/pkg/scheduler/fit_test.go @@ -0,0 +1,88 @@ +package scheduler + +import ( + "context" + "strconv" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestResources(t *testing.T) { + require := require.New(t) + ctx := context.TODO() + cli := createClient() + require.NoError(cli.Create(ctx, makeNode("node1"))) + + pods := []*corev1.Pod{ + makePod("pod1"), + } + require.NoError(WillTheyFit(ctx, cli, pods)) + + // Lets add more pods than the node can handle + for i := 2; i < 20; i++ { + pods = append(pods, makePod("pod"+strconv.Itoa(i))) + } + require.Error(WillTheyFit(ctx, cli, pods)) +} + +func createClient() client.Client { + s := runtime.NewScheme() + s.AddKnownTypes(corev1.SchemeGroupVersion, &corev1.Node{}) + + nodeNameIndexer := client.IndexerFunc(func(obj client.Object) []string { + pod, ok := obj.(*corev1.Pod) + if !ok { + return nil + } + return []string{pod.Spec.NodeName} + }) + + fakeClient := fake.NewClientBuilder().WithIndex(&corev1.Pod{}, "spec.nodeName", nodeNameIndexer).Build() + return fakeClient +} + +func makeResources(milliCPU, memory, pods int64) corev1.ResourceList { + return corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI), + corev1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), + } +} + +func makeNode(name string) *corev1.Node { + n := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: corev1.NodeSpec{ + Unschedulable: false, + }, + } + n.Status.Capacity, n.Status.Allocatable = makeResources(1000, 1000, 100), makeResources(1000, 1000, 100) + return n +} + +func makePod(name string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Resources: corev1.ResourceRequirements{ + Requests: makeResources(100, 100, 1), + Limits: makeResources(200, 200, 1), + }, + }, + }, + }, + } +}