-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix version upgrade kubelet support #16932
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,10 +27,13 @@ import ( | |
|
||
"github.com/spf13/cobra" | ||
"github.com/spf13/viper" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/tools/clientcmd" | ||
"k8s.io/klog/v2" | ||
"k8s.io/kops/cmd/kops/util" | ||
"k8s.io/kops/pkg/apis/kops" | ||
apisutil "k8s.io/kops/pkg/apis/kops/util" | ||
"k8s.io/kops/pkg/assets" | ||
"k8s.io/kops/pkg/commands/commandutils" | ||
"k8s.io/kops/pkg/kubeconfig" | ||
|
@@ -65,6 +68,9 @@ type UpdateClusterOptions struct { | |
SSHPublicKey string | ||
RunTasksOptions fi.RunTasksOptions | ||
AllowKopsDowngrade bool | ||
// Bypasses control plane version checks, which by default prevent non-control plane instancegroups | ||
// from being updated to a version greater than the control plane | ||
IgnoreVersionSkew bool | ||
// GetAssets is whether this is invoked from the CmdGetAssets. | ||
GetAssets bool | ||
|
||
|
@@ -93,6 +99,8 @@ func (o *UpdateClusterOptions) InitDefaults() { | |
o.Target = "direct" | ||
o.SSHPublicKey = "" | ||
o.OutDir = "" | ||
// By default we enforce the version skew between control plane and worker nodes | ||
o.IgnoreVersionSkew = false | ||
|
||
// By default we export a kubecfg, but it doesn't have a static/eternal credential in it any more. | ||
o.CreateKubecfg = true | ||
|
@@ -142,6 +150,7 @@ func NewCmdUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command { | |
cmd.RegisterFlagCompletionFunc("lifecycle-overrides", completeLifecycleOverrides) | ||
|
||
cmd.Flags().BoolVar(&options.Prune, "prune", options.Prune, "Delete old revisions of cloud resources that were needed during an upgrade") | ||
cmd.Flags().BoolVar(&options.IgnoreVersionSkew, "ignore-version-skew", options.IgnoreVersionSkew, "Setting this to true will force updating the kubernetes version on all instance groups, regardles of which control plane version is running") | ||
|
||
return cmd | ||
} | ||
|
@@ -290,19 +299,29 @@ func RunUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *Up | |
return nil, err | ||
} | ||
|
||
minControlPlaneRunningVersion := cluster.Spec.KubernetesVersion | ||
if !c.IgnoreVersionSkew { | ||
minControlPlaneRunningVersion, err = checkControlPlaneRunningVersion(ctx, cluster.ObjectMeta.Name, minControlPlaneRunningVersion) | ||
if err != nil { | ||
klog.Warningf("error checking control plane running verion: %v", err) | ||
} else { | ||
klog.Warningf("successfully checked control plane running version: %v", minControlPlaneRunningVersion) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: any reason to make this a warning (as opposed to Infof)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh no... that was just for my testing i fully intended to make it info or debug and then forgot :) |
||
} | ||
} | ||
applyCmd := &cloudup.ApplyClusterCmd{ | ||
Cloud: cloud, | ||
Clientset: clientset, | ||
Cluster: cluster, | ||
DryRun: isDryrun, | ||
AllowKopsDowngrade: c.AllowKopsDowngrade, | ||
RunTasksOptions: &c.RunTasksOptions, | ||
OutDir: c.OutDir, | ||
Phase: phase, | ||
TargetName: targetName, | ||
LifecycleOverrides: lifecycleOverrideMap, | ||
GetAssets: c.GetAssets, | ||
DeletionProcessing: deletionProcessing, | ||
Cloud: cloud, | ||
Clientset: clientset, | ||
Cluster: cluster, | ||
DryRun: isDryrun, | ||
AllowKopsDowngrade: c.AllowKopsDowngrade, | ||
RunTasksOptions: &c.RunTasksOptions, | ||
OutDir: c.OutDir, | ||
Phase: phase, | ||
TargetName: targetName, | ||
LifecycleOverrides: lifecycleOverrideMap, | ||
GetAssets: c.GetAssets, | ||
DeletionProcessing: deletionProcessing, | ||
ControlPlaneRunningVersion: minControlPlaneRunningVersion, | ||
} | ||
|
||
applyResults, err := applyCmd.Run(ctx) | ||
|
@@ -518,3 +537,38 @@ func completeLifecycleOverrides(cmd *cobra.Command, args []string, toComplete st | |
} | ||
return completions, cobra.ShellCompDirectiveNoFileComp | ||
} | ||
|
||
func checkControlPlaneRunningVersion(ctx context.Context, clusterName string, version string) (string, error) { | ||
|
||
configLoadingRules := clientcmd.NewDefaultClientConfigLoadingRules() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: I think we should try to share this "get kubeconfig" code with the rolling-update logic (I think that also connects to the cluster). Not a blocker though. |
||
config, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( | ||
configLoadingRules, | ||
&clientcmd.ConfigOverrides{CurrentContext: clusterName}).ClientConfig() | ||
if err != nil { | ||
return version, fmt.Errorf("cannot load kubecfg settings for %q: %v", clusterName, err) | ||
} | ||
|
||
k8sClient, err := kubernetes.NewForConfig(config) | ||
if err != nil { | ||
return version, fmt.Errorf("cannot build kubernetes api client for %q: %v", clusterName, err) | ||
} | ||
|
||
parsedVersion, err := apisutil.ParseKubernetesVersion(version) | ||
if err != nil { | ||
return version, fmt.Errorf("cannot parse kubernetes version %q: %v", clusterName, err) | ||
} | ||
nodeList, err := k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ | ||
LabelSelector: "node-role.kubernetes.io/control-plane", | ||
}) | ||
if err != nil { | ||
return version, fmt.Errorf("cannot list nodes in cluster %q: %v", clusterName, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah - this is tricky. If the control plane is offline, we presumably don't want to update the nodes (?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the control plane is offline, this is the least of our worries lol. But it might be an expected situation (e.g. creating a new cluster) so we just log the error and continue as normal |
||
} | ||
for _, node := range nodeList.Items { | ||
if apisutil.IsKubernetesGTE(node.Status.NodeInfo.KubeletVersion, *parsedVersion) { | ||
version = node.Status.NodeInfo.KubeletVersion | ||
parsedVersion, _ = apisutil.ParseKubernetesVersion(version) | ||
} | ||
|
||
} | ||
return strings.TrimPrefix(version, "v"), nil | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,22 +42,40 @@ type FileAssets struct { | |
// NodeUpAssets are the assets for downloading nodeup | ||
NodeUpAssets map[architectures.Architecture]*assets.MirroredAsset | ||
|
||
// AssetsSupportedKubelet are the assets for downloading nodeup, when the masters have not yet been rolled out to the version in the ClusterSpec | ||
AssetsSupportedKubelet map[architectures.Architecture][]*assets.MirroredAsset | ||
|
||
Cluster *kops.Cluster | ||
} | ||
|
||
// AddFileAssets adds the file assets within the assetBuilder | ||
func (c *FileAssets) AddFileAssets(assetBuilder *assets.AssetBuilder) error { | ||
var err error | ||
c.Assets, err = c.addFileAssets(assetBuilder, c.Cluster.Spec.KubernetesVersion) | ||
if err != nil { | ||
return err | ||
} | ||
if len(assetBuilder.KubeletSupportedVersion) > 0 && assetBuilder.KubeletSupportedVersion != c.Cluster.Spec.KubernetesVersion { | ||
c.AssetsSupportedKubelet, err = c.addFileAssets(assetBuilder, assetBuilder.KubeletSupportedVersion) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah - great point - we need different assets. Although I'm starting to worry about complexity now... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the complexity here worried me a bit as well as I dug a bit into how to achieve this. The code isn't perfect but i tried to do it without having to refactor many things, if you have any better ideas we can either improve this down the line or i can make any adjustments :) |
||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// AddFileAssets adds the file assets within the assetBuilder | ||
func (c *FileAssets) addFileAssets(assetBuilder *assets.AssetBuilder, version string) (map[architectures.Architecture][]*assets.MirroredAsset, error) { | ||
var baseURL string | ||
if components.IsBaseURL(c.Cluster.Spec.KubernetesVersion) { | ||
baseURL = c.Cluster.Spec.KubernetesVersion | ||
if components.IsBaseURL(version) { | ||
baseURL = version | ||
} else { | ||
baseURL = "https://dl.k8s.io/release/v" + c.Cluster.Spec.KubernetesVersion | ||
baseURL = "https://dl.k8s.io/release/v" + version | ||
} | ||
|
||
c.Assets = make(map[architectures.Architecture][]*assets.MirroredAsset) | ||
assetsMap := make(map[architectures.Architecture][]*assets.MirroredAsset) | ||
c.NodeUpAssets = make(map[architectures.Architecture]*assets.MirroredAsset) | ||
for _, arch := range architectures.GetSupported() { | ||
c.Assets[arch] = []*assets.MirroredAsset{} | ||
assetsMap[arch] = []*assets.MirroredAsset{} | ||
|
||
k8sAssetsNames := []string{ | ||
fmt.Sprintf("/bin/linux/%s/kubelet", arch), | ||
|
@@ -71,18 +89,18 @@ func (c *FileAssets) AddFileAssets(assetBuilder *assets.AssetBuilder) error { | |
for _, an := range k8sAssetsNames { | ||
k, err := url.Parse(baseURL) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
k.Path = path.Join(k.Path, an) | ||
|
||
asset, err := assetBuilder.RemapFile(k, nil) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(asset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(asset)) | ||
} | ||
|
||
kubernetesVersion, _ := util.ParseKubernetesVersion(c.Cluster.Spec.KubernetesVersion) | ||
kubernetesVersion, _ := util.ParseKubernetesVersion(version) | ||
|
||
cloudProvider := c.Cluster.GetCloudProvider() | ||
if ok := model.UseExternalKubeletCredentialProvider(*kubernetesVersion, cloudProvider); ok { | ||
|
@@ -95,7 +113,7 @@ func (c *FileAssets) AddFileAssets(assetBuilder *assets.AssetBuilder) error { | |
// VALID FOR 60 DAYS WE REALLY NEED TO MERGE https://github.com/kubernetes/cloud-provider-gcp/pull/601 and CUT A RELEASE | ||
k, err := url.Parse(fmt.Sprintf("%s/linux-%s/v20231005-providersv0.27.1-65-g8fbe8d27", *binaryLocation, arch)) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
|
||
// TODO: Move these hashes to assetdata | ||
|
@@ -105,14 +123,14 @@ func (c *FileAssets) AddFileAssets(assetBuilder *assets.AssetBuilder) error { | |
} | ||
hash, err := hashing.FromString(hashes[arch]) | ||
if err != nil { | ||
return fmt.Errorf("unable to parse auth-provider-gcp binary asset hash %q: %v", hashes[arch], err) | ||
return nil, fmt.Errorf("unable to parse auth-provider-gcp binary asset hash %q: %v", hashes[arch], err) | ||
} | ||
asset, err := assetBuilder.RemapFile(k, hash) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
|
||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(asset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(asset)) | ||
case kops.CloudProviderAWS: | ||
binaryLocation := c.Cluster.Spec.CloudProvider.AWS.BinariesLocation | ||
if binaryLocation == nil { | ||
|
@@ -121,65 +139,65 @@ func (c *FileAssets) AddFileAssets(assetBuilder *assets.AssetBuilder) error { | |
|
||
u, err := url.Parse(fmt.Sprintf("%s/linux/%s/ecr-credential-provider-linux-%s", *binaryLocation, arch, arch)) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
asset, err := assetBuilder.RemapFile(u, nil) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(asset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(asset)) | ||
} | ||
} | ||
|
||
{ | ||
cniAsset, err := wellknownassets.FindCNIAssets(c.Cluster, assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(cniAsset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(cniAsset)) | ||
} | ||
|
||
if c.Cluster.Spec.Containerd == nil || !c.Cluster.Spec.Containerd.SkipInstall { | ||
containerdAsset, err := wellknownassets.FindContainerdAsset(c.Cluster, assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
if containerdAsset != nil { | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(containerdAsset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(containerdAsset)) | ||
} | ||
|
||
runcAsset, err := wellknownassets.FindRuncAsset(c.Cluster, assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
if runcAsset != nil { | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(runcAsset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(runcAsset)) | ||
} | ||
nerdctlAsset, err := wellknownassets.FindNerdctlAsset(c.Cluster, assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
if nerdctlAsset != nil { | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(nerdctlAsset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(nerdctlAsset)) | ||
} | ||
} | ||
|
||
crictlAsset, err := wellknownassets.FindCrictlAsset(c.Cluster, assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
if crictlAsset != nil { | ||
c.Assets[arch] = append(c.Assets[arch], assets.BuildMirroredAsset(crictlAsset)) | ||
assetsMap[arch] = append(assetsMap[arch], assets.BuildMirroredAsset(crictlAsset)) | ||
} | ||
|
||
asset, err := wellknownassets.NodeUpAsset(assetBuilder, arch) | ||
if err != nil { | ||
return err | ||
return nil, err | ||
} | ||
c.NodeUpAssets[arch] = asset | ||
} | ||
|
||
return nil | ||
return assetsMap, nil | ||
} | ||
|
||
// needsMounterAsset checks if we need the mounter program | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should just return the error, unless there's some reason to expect this error to happen which we need to tolerate
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it will error out for example if the cluster isn't up (e.g. new clusters, testing, etc) in which case we need to tolerate it.