From ead4de5556c5185d1e6a25f6a48cd2490ecbd419 Mon Sep 17 00:00:00 2001 From: Richard Su Date: Wed, 28 Aug 2024 17:03:38 -0500 Subject: [PATCH] OCPBUGS-38466: Allow controller to continue when assisted-service is unavailable in agent-based installs. assisted-service runs on the bootstrap node in agent-based installs. The bootstrap node reboots after the control plane is available. If the assisted-installer-controller restarts after the bootstrap node reboots, or for some reason the controller is never able to contact assisted-service, the controller loops waiting or assisted-service to become available, times out, and exits. With compact clusters, because the controller exited and is unable to approve CSRs, the third control plane node is unable to join the cluster causing the cluster installation to fail. If the invoker is agent-installer, instead of exiting, this patch allows the controller to continue to run when assisted-service is offline. Because assisted-service may be unavailable, HasValidvSphereCredentials has been updated to also look at the install-config to determine if credentials were set. Because username and password are redacted, only the server name is used to determine if valid credentials were provided. --- go.mod | 2 +- .../assisted_installer_controller.go | 6 +- .../assisted_installer_controller_test.go | 2 +- .../mock_controller.go | 10 +- src/common/common.go | 121 +++++++--- src/common/common_test.go | 206 +++++++++++------- src/installer/installer.go | 8 +- .../assisted_installer_main.go | 71 ++++-- 8 files changed, 277 insertions(+), 149 deletions(-) diff --git a/go.mod b/go.mod index b8b0c68df..60392cfcb 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( k8s.io/apimachinery v0.29.9 k8s.io/client-go v0.29.9 sigs.k8s.io/controller-runtime v0.17.2 + sigs.k8s.io/yaml v1.4.0 ) require ( @@ -182,7 +183,6 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) replace ( diff --git a/src/assisted_installer_controller/assisted_installer_controller.go b/src/assisted_installer_controller/assisted_installer_controller.go index e1620e301..12bc22ac6 100644 --- a/src/assisted_installer_controller/assisted_installer_controller.go +++ b/src/assisted_installer_controller/assisted_installer_controller.go @@ -114,7 +114,7 @@ type Controller interface { UpdateNodeLabels(ctx context.Context, wg *sync.WaitGroup) UpdateBMHs(ctx context.Context, wg *sync.WaitGroup) UploadLogs(ctx context.Context, wg *sync.WaitGroup, invoker string) - SetReadyState() *models.Cluster + SetReadyState(waitTimeout time.Duration) *models.Cluster GetStatus() *ControllerStatus } @@ -1470,11 +1470,11 @@ func (c *controller) UploadLogs(ctx context.Context, wg *sync.WaitGroup, invoker } } -func (c *controller) SetReadyState() *models.Cluster { +func (c controller) SetReadyState(waitTimeout time.Duration) *models.Cluster { c.log.Infof("Start waiting to be ready") var cluster *models.Cluster var err error - _ = utils.WaitForPredicate(WaitTimeout, 1*time.Second, func() bool { + _ = utils.WaitForPredicate(waitTimeout, 1*time.Second, func() bool { cluster, err = c.ic.GetCluster(context.TODO(), false) if err != nil { c.log.WithError(err).Warningf("Failed to connect to assisted service") diff --git a/src/assisted_installer_controller/assisted_installer_controller_test.go b/src/assisted_installer_controller/assisted_installer_controller_test.go index ca6b14e0e..3d7175619 100644 --- a/src/assisted_installer_controller/assisted_installer_controller_test.go +++ b/src/assisted_installer_controller/assisted_installer_controller_test.go @@ -338,7 +338,7 @@ var _ = Describe("installer HostRoleMaster role", func() { mockk8sclient.EXPECT().CreateEvent(assistedController.Namespace, common.AssistedControllerIsReadyEvent, gomock.Any(), common.AssistedControllerPrefix).Return(nil, fmt.Errorf("dummy")).Times(1) mockk8sclient.EXPECT().CreateEvent(assistedController.Namespace, common.AssistedControllerIsReadyEvent, gomock.Any(), common.AssistedControllerPrefix).Return(nil, nil).Times(1) - assistedController.SetReadyState() + assistedController.SetReadyState(WaitTimeout) Expect(assistedController.status.HasError()).Should(Equal(false)) }) diff --git a/src/assisted_installer_controller/mock_controller.go b/src/assisted_installer_controller/mock_controller.go index b60b537f1..114c90cf5 100644 --- a/src/assisted_installer_controller/mock_controller.go +++ b/src/assisted_installer_controller/mock_controller.go @@ -5,6 +5,7 @@ // // mockgen -source=assisted_installer_controller.go -package=assisted_installer_controller -destination=mock_controller.go // + // Package assisted_installer_controller is a generated GoMock package. package assisted_installer_controller @@ -12,6 +13,7 @@ import ( context "context" reflect "reflect" sync "sync" + time "time" models "github.com/openshift/assisted-service/models" gomock "go.uber.org/mock/gomock" @@ -79,17 +81,17 @@ func (mr *MockControllerMockRecorder) PostInstallConfigs(ctx, wg any) *gomock.Ca } // SetReadyState mocks base method. -func (m *MockController) SetReadyState() *models.Cluster { +func (m *MockController) SetReadyState(waitTimeout time.Duration) *models.Cluster { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "SetReadyState") + ret := m.ctrl.Call(m, "SetReadyState", waitTimeout) ret0, _ := ret[0].(*models.Cluster) return ret0 } // SetReadyState indicates an expected call of SetReadyState. -func (mr *MockControllerMockRecorder) SetReadyState() *gomock.Call { +func (mr *MockControllerMockRecorder) SetReadyState(waitTimeout any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetReadyState", reflect.TypeOf((*MockController)(nil).SetReadyState)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetReadyState", reflect.TypeOf((*MockController)(nil).SetReadyState), waitTimeout) } // UpdateBMHs mocks base method. diff --git a/src/common/common.go b/src/common/common.go index c202ee1e2..25b53dcf5 100644 --- a/src/common/common.go +++ b/src/common/common.go @@ -21,6 +21,7 @@ import ( "github.com/thoas/go-funk" "github.com/tidwall/gjson" v1 "k8s.io/api/core/v1" + "sigs.k8s.io/yaml" ) const ( @@ -33,6 +34,9 @@ const ( installConfigMapName = "openshift-install-manifests" installConfigMapNS = "openshift-config" installConfigMapAttribute = "invoker" + clusterConfigCMName = "cluster-config-v1" + clusterConfigCMNamespace = "kube-system" + clusterConfigCMAttribute = "install-config" InvokerAssisted = "assisted-service" InvokerAgent = "agent-installer" ) @@ -215,24 +219,23 @@ func HostMatchByNameOrIPAddress(node v1.Node, namesMap, IPAddressMap map[string] // Returns True when uninitialized taint must be removed. // This is required for some external platforms (e.g. VSphere, Nutanix) to proceed // with the installation using fake credentials. -func RemoveUninitializedTaint(platform *models.Platform, invoker string, hasValidCredentials bool, openshiftVersion string) bool { +func RemoveUninitializedTaint(ctx context.Context, ic inventory_client.InventoryClient, kc k8s_client.K8SClient, + log logrus.FieldLogger, platformType models.PlatformType, openshiftVersion, invoker string) bool { removeUninitializedTaintForPlatforms := [...]models.PlatformType{models.PlatformTypeNutanix, models.PlatformTypeVsphere} - version := semver.New(parseOpenshiftVersionIntoMajorMinorZOnly(openshiftVersion)) - if invoker == InvokerAgent && - *platform.Type == models.PlatformTypeVsphere && - version.Compare(*semver.New("4.15.0")) >= 0 && - hasValidCredentials { - // Starting with OpenShift 4.15, the agent-installer can pass valid credentials - // to vSphere. Do not remove the taint if: - // 1) invoker == agent-installer - // 2) platform == vSphere - // 3) OpenShift version >= 4.15 - // 4) and valid vSphere credentials were provided in install-config.yaml - // With valid credentials, the vSphere CCM will remove the uninitialized taints - // when it initializes the nodes. - return false + if invoker == InvokerAgent && platformType == models.PlatformTypeVsphere { + hasValidvSphereCredentials := HasValidvSphereCredentials(ctx, ic, kc, log) + if hasValidvSphereCredentials { + log.Infof("Has valid vSphere credentials: %v", hasValidvSphereCredentials) + } + version := semver.New(parseOpenshiftVersionIntoMajorMinorZOnly(openshiftVersion)) + if hasValidvSphereCredentials && version.Compare(*semver.New("4.15.0")) >= 0 { + // Starting with OpenShift 4.15, vSphere credentials can be used with ABI. + // In such cases, we should not remove the taints because the vSphere CCM will + // remove them after it initializes the nodes. + return false + } } - return platform != nil && funk.Contains(removeUninitializedTaintForPlatforms, *platform.Type) + return funk.Contains(removeUninitializedTaintForPlatforms, platformType) } // parseOpenshiftVersionIntoMajorMinorZOnly adds a .0 to verions that only specify @@ -248,35 +251,87 @@ func parseOpenshiftVersionIntoMajorMinorZOnly(version string) string { } // HasValidvSphereCredentials returns true if the the platform is -// vSphere and the install config overrides contains real +// vSphere and the install config or equivalent overrides contains real // credential values and not placeholder values. // Deprecated credential fields are not considered valid. -func HasValidvSphereCredentials(ctx context.Context, ic inventory_client.InventoryClient, log logrus.FieldLogger) bool { +func HasValidvSphereCredentials(ctx context.Context, ic inventory_client.InventoryClient, kc k8s_client.K8SClient, log logrus.FieldLogger) bool { cluster, callErr := ic.GetCluster(ctx, false) + if cluster != nil { + // assisted-service is available, check install config overrides for credentials + if cluster.Platform == nil || *cluster.Platform.Type != models.PlatformTypeVsphere { + return false + } + + if cluster.InstallConfigOverrides == "" { + return false + } + + username := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.user`).String() + password := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.password`).String() + server := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.server`).String() + + if username != "usernameplaceholder" && username != "" && + password != "passwordplaceholder" && password != "" && + server != "vcenterplaceholder" && server != "" { + return true + } + } else { + // With ABI, after the bootstrap node reboots, assisted-service becomes + // unavailable. If the controller restarts after the bootstrap node reboots, + // the cluster is unavailable. In this case, we instead check the + // install-config for credentials. + log.WithError(callErr).Warnf("error getting cluster, assisted-service was unavailable") - if callErr != nil { - log.WithError(callErr).Errorf("error getting cluster") - return false + installConfig, err := getInstallConfigYAML(kc, log) + if err != nil { + return false + } + icJSON, err := yaml.YAMLToJSON([]byte(installConfig)) + if err != nil { + log.Warnf("error parsing install config into json: %v", err) + return false + } + server := gjson.Get(string(icJSON), `platform.vsphere.vcenters.0.server`).String() + if server != "vcenterplaceholder" && server != "" { + return true + } } + return false +} - if cluster == nil || cluster.Platform == nil || *cluster.Platform.Type != models.PlatformTypeVsphere { - return false +func GetPlatformTypeFromInstallConfig(kc k8s_client.K8SClient, log logrus.FieldLogger) (models.PlatformType, error) { + installConfigYAML, err := getInstallConfigYAML(kc, log) + if err != nil { + return "", err } - if cluster.InstallConfigOverrides == "" { - return false + var data map[string]interface{} + err = yaml.Unmarshal([]byte(installConfigYAML), &data) + if err != nil { + return "", err } - username := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.user`).String() - password := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.password`).String() - server := gjson.Get(cluster.InstallConfigOverrides, `platform.vsphere.vcenters.0.server`).String() + platformMap, ok := data["platform"].(map[string]interface{}) + if !ok { + return "", errors.New("platform not found in install config") + } - if username != "usernameplaceholder" && username != "" && - password != "passwordplaceholder" && password != "" && - server != "vcenterplaceholder" && server != "" { - return true + platformType := "" + for key := range platformMap { + platformType = key + break } - return false + + return models.PlatformType(platformType), nil +} + +func getInstallConfigYAML(kc k8s_client.K8SClient, log logrus.FieldLogger) (string, error) { + clusterConfigCM, err := kc.GetConfigMap(clusterConfigCMNamespace, clusterConfigCMName) + if err != nil { + log.Warnf("error retrieving %v ConfigMap", clusterConfigCMName) + return "", err + } + return clusterConfigCM.Data[clusterConfigCMAttribute], nil } func GetInvoker(kc k8s_client.K8SClient, log logrus.FieldLogger) string { diff --git a/src/common/common_test.go b/src/common/common_test.go index ec20a2f9a..f8d6b55bf 100644 --- a/src/common/common_test.go +++ b/src/common/common_test.go @@ -13,7 +13,9 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/openshift/assisted-installer/src/inventory_client" + "github.com/openshift/assisted-installer/src/k8s_client" "github.com/openshift/assisted-service/models" + "github.com/pkg/errors" "github.com/sirupsen/logrus" gomock "go.uber.org/mock/gomock" v1 "k8s.io/api/core/v1" @@ -28,9 +30,15 @@ var _ = Describe("verify common", func() { var ( l = logrus.New() mockbmclient *inventory_client.MockInventoryClient + mockkcclient *k8s_client.MockK8SClient ) - mockbmclient = inventory_client.NewMockInventoryClient(gomock.NewController(GinkgoT())) + l.SetOutput(io.Discard) + BeforeEach(func() { + mockbmclient = inventory_client.NewMockInventoryClient(gomock.NewController(GinkgoT())) + mockkcclient = k8s_client.NewMockK8SClient(gomock.NewController(GinkgoT())) + }) + Context("Verify SetConfiguringStatusForHosts", func() { It("test SetConfiguringStatusForHosts", func() { @@ -181,144 +189,180 @@ var _ = Describe("verify common", func() { }) }) + vSphereClusterWithValidCredentials := &models.Cluster{ + Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, + InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"vcenters\":[{\"server\":\"server.openshift.com\",\"user\":\"some-user\",\"password\":\"some-password\",\"datacenters\":[\"datacenter\"]}],\"failureDomains\":[{\"name\":\"test-failure-baseDomain\",\"region\":\"changeme-region\",\"zone\":\"changeme-zone\",\"server\":\"server.openshift.com\",\"topology\":{\"datacenter\":\"datacenter\",\"computeCluster\":\"/datacenter/host/cluster\",\"networks\":[\"segment-a\"],\"datastore\":\"/datacenter/datastore/mystore\",\"resourcePool\":\"/datacenter/host/mycluster//Resources\",\"folder\":\"/datacenter/vm/folder\"}}]}}}", + } + vSphereClusterWithInvalidCredentials := &models.Cluster{ + Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, + InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"username\":\"a-user\",\"password\":\"a-password\",\"vCenter\":\"a-server.com\"}}}", + } + Context("Verify RemoveUninitializedTaint", func() { - It("nil platform struct should not remove uninitialized taint", func() { - removeUninitializedTaint := RemoveUninitializedTaint(nil, InvokerAssisted, false, "4.14.0") + It("empty platform string should not remove uninitialized taint", func() { + removeUninitializedTaint := RemoveUninitializedTaint(context.TODO(), mockbmclient, mockkcclient, l, "", "", "") Expect(removeUninitializedTaint).To(BeFalse()) }) tests := []struct { PlatformType models.PlatformType - ExpectedRemoveUninitializedTaint bool Invoker string - HasValidvSphereCredentials bool VersionOpenshift string + Cluster *models.Cluster + ExpectedRemoveUninitializedTaint bool }{ { PlatformType: models.PlatformTypeNutanix, - Invoker: InvokerAssisted, - VersionOpenshift: "4.14.0", ExpectedRemoveUninitializedTaint: true, }, { PlatformType: models.PlatformTypeVsphere, - Invoker: InvokerAssisted, - HasValidvSphereCredentials: false, - VersionOpenshift: "4.14.0-rc0", + Cluster: vSphereClusterWithInvalidCredentials, ExpectedRemoveUninitializedTaint: true, }, + { + PlatformType: models.PlatformTypeNone, + ExpectedRemoveUninitializedTaint: false, + }, + { + PlatformType: models.PlatformTypeBaremetal, + ExpectedRemoveUninitializedTaint: false, + }, { PlatformType: models.PlatformTypeVsphere, Invoker: InvokerAgent, - HasValidvSphereCredentials: false, VersionOpenshift: "4.14.0-rc0", + Cluster: vSphereClusterWithInvalidCredentials, ExpectedRemoveUninitializedTaint: true, }, { PlatformType: models.PlatformTypeVsphere, Invoker: InvokerAgent, - HasValidvSphereCredentials: false, VersionOpenshift: "4.15.0", + Cluster: vSphereClusterWithInvalidCredentials, ExpectedRemoveUninitializedTaint: true, }, { PlatformType: models.PlatformTypeVsphere, Invoker: InvokerAgent, - HasValidvSphereCredentials: true, VersionOpenshift: "4.14.0", + Cluster: vSphereClusterWithValidCredentials, ExpectedRemoveUninitializedTaint: true, }, { PlatformType: models.PlatformTypeVsphere, Invoker: InvokerAgent, - HasValidvSphereCredentials: true, VersionOpenshift: "4.15", - ExpectedRemoveUninitializedTaint: false, - }, - { - PlatformType: models.PlatformTypeNone, - Invoker: InvokerAssisted, - VersionOpenshift: "4.14.0-rc10-z10", - ExpectedRemoveUninitializedTaint: false, - }, - { - PlatformType: models.PlatformTypeBaremetal, - Invoker: InvokerAssisted, - VersionOpenshift: "4.14.0", + Cluster: vSphereClusterWithValidCredentials, ExpectedRemoveUninitializedTaint: false, }, } - for i := range tests { - test := tests[i] - It(fmt.Sprintf("platform %v, invoker %v, hasValidCredentials %v, version %v, expected remove uninitialized taint = %v", test.PlatformType, test.Invoker, test.HasValidvSphereCredentials, test.VersionOpenshift, test.ExpectedRemoveUninitializedTaint), func() { - platform := &models.Platform{ - Type: &test.PlatformType, + for _, test := range tests { + test := test + It(fmt.Sprintf("platform %v, invoker %v, version %v, cluster %v, is expected to remove uninitialized taint = %v", test.PlatformType, test.Invoker, test.VersionOpenshift, test.Cluster, test.ExpectedRemoveUninitializedTaint), func() { + if test.PlatformType == models.PlatformTypeVsphere { + mockbmclient.EXPECT().GetCluster(gomock.Any(), false).Return(test.Cluster, nil).Times(1) } - removeUninitializedTaint := RemoveUninitializedTaint(platform, test.Invoker, - test.HasValidvSphereCredentials, test.VersionOpenshift) + removeUninitializedTaint := RemoveUninitializedTaint(context.TODO(), mockbmclient, mockkcclient, l, test.PlatformType, test.VersionOpenshift, test.Invoker) Expect(removeUninitializedTaint).To(Equal(test.ExpectedRemoveUninitializedTaint)) }) } }) Context("Verify HasValidvSphereCredentials", func() { - It("nil cluster struct should not cause nil pointer exception", func() { - mockbmclient.EXPECT().GetCluster(gomock.Any(), false).Return(nil, nil).Times(1) - hasValidvSphereCredentials := HasValidvSphereCredentials(context.TODO(), mockbmclient, l) - Expect(hasValidvSphereCredentials).To(BeFalse()) - }) - - tests := []struct { - TestName string - Cluster models.Cluster - HasValidvSphereCredentials bool - }{ - { - TestName: "has valid vSphere credentials should return true", - Cluster: models.Cluster{ - Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, - InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"vcenters\":[{\"server\":\"server.openshift.com\",\"user\":\"some-user\",\"password\":\"some-password\",\"datacenters\":[\"datacenter\"]}],\"failureDomains\":[{\"name\":\"test-failure-baseDomain\",\"region\":\"changeme-region\",\"zone\":\"changeme-zone\",\"server\":\"server.openshift.com\",\"topology\":{\"datacenter\":\"datacenter\",\"computeCluster\":\"/datacenter/host/cluster\",\"networks\":[\"segment-a\"],\"datastore\":\"/datacenter/datastore/mystore\",\"resourcePool\":\"/datacenter/host/mycluster//Resources\",\"folder\":\"/datacenter/vm/folder\"}}]}}}", + Context("cluster is available from assisted-service", func() { + tests := []struct { + TestName string + Cluster *models.Cluster + HasValidvSphereCredentials bool + }{ + { + TestName: "has valid vSphere credentials should return true", + Cluster: vSphereClusterWithValidCredentials, + HasValidvSphereCredentials: true, }, - HasValidvSphereCredentials: true, - }, - { - TestName: "does not have valid vSphere credentials should return false", - Cluster: models.Cluster{ - Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, - InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"username\":\"a-user\",\"password\":\"a-password\",\"vCenter\":\"a-server.com\"}}}", + { + TestName: "does not have valid vSphere credentials should return false", + Cluster: vSphereClusterWithInvalidCredentials, + HasValidvSphereCredentials: false, }, - HasValidvSphereCredentials: false, - }, - { - TestName: "deprecated credentials should return false", - Cluster: models.Cluster{ - Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, - InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"vcenters\":[{\"server\":\"\",\"user\":\"usernameplaceholder\",\"password\":\"some-password\",\"datacenters\":[\"datacenter\"]}],\"failureDomains\":[{\"name\":\"test-failure-baseDomain\",\"region\":\"changeme-region\",\"zone\":\"changeme-zone\",\"server\":\"server.openshift.com\",\"topology\":{\"datacenter\":\"datacenter\",\"computeCluster\":\"/datacenter/host/cluster\",\"networks\":[\"segment-a\"],\"datastore\":\"/datacenter/datastore/mystore\",\"resourcePool\":\"/datacenter/host/mycluster//Resources\",\"folder\":\"/datacenter/vm/folder\"}}]}}}", + { + TestName: "deprecated credentials should return false", + Cluster: &models.Cluster{ + Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeVsphere)}, + InstallConfigOverrides: "{\"platform\":{\"vsphere\":{\"vcenters\":[{\"server\":\"\",\"user\":\"usernameplaceholder\",\"password\":\"some-password\",\"datacenters\":[\"datacenter\"]}],\"failureDomains\":[{\"name\":\"test-failure-baseDomain\",\"region\":\"changeme-region\",\"zone\":\"changeme-zone\",\"server\":\"server.openshift.com\",\"topology\":{\"datacenter\":\"datacenter\",\"computeCluster\":\"/datacenter/host/cluster\",\"networks\":[\"segment-a\"],\"datastore\":\"/datacenter/datastore/mystore\",\"resourcePool\":\"/datacenter/host/mycluster//Resources\",\"folder\":\"/datacenter/vm/folder\"}}]}}}", + }, + HasValidvSphereCredentials: false, }, - HasValidvSphereCredentials: false, - }, - { - TestName: "other platforms should return false", - Cluster: models.Cluster{ - Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeBaremetal)}, - InstallConfigOverrides: "{\"fips\":\"false\",\"platform\":{\"baremetal\":{}}}", + { + TestName: "other platforms should return false", + Cluster: &models.Cluster{ + Platform: &models.Platform{Type: platformTypePtr(models.PlatformTypeBaremetal)}, + InstallConfigOverrides: "{\"fips\":\"false\",\"platform\":{\"baremetal\":{}}}", + }, + HasValidvSphereCredentials: false, }, - HasValidvSphereCredentials: false, - }, - } + } - for i := range tests { - test := tests[i] - It(test.TestName, func() { - mockbmclient.EXPECT().GetCluster(gomock.Any(), false).Return(&test.Cluster, nil).Times(1) + for _, test := range tests { + test := test + It(test.TestName, func() { + mockbmclient.EXPECT().GetCluster(gomock.Any(), false).Return(test.Cluster, nil).Times(1) + hasValidvSphereCredentials := HasValidvSphereCredentials(context.TODO(), mockbmclient, mockkcclient, l) - hasValidvSphereCredentials := HasValidvSphereCredentials(context.TODO(), mockbmclient, l) + Expect(hasValidvSphereCredentials).To(Equal(test.HasValidvSphereCredentials)) + }) + } + }) - Expect(hasValidvSphereCredentials).To(Equal(test.HasValidvSphereCredentials)) - }) + Context("cluster is unavailable from assisted-service, install-config ConfigMap is used", func() { + configMapTests := []struct { + TestName string + ConfigMap *v1.ConfigMap + HasValidvSphereCredentials bool + }{ + { + TestName: "has server defined in install-config should return true", + ConfigMap: &v1.ConfigMap{ + Data: map[string]string{ + "install-config": "platform:\n vsphere:\n apiVIPs:\n - 192.168.111.5\n failureDomains:\n - name: generated-failure-domain\n region: generated-region\n server: server.openshift.com\n topology:\n computeCluster: compute-cluster\n datacenter: datacenter\n datastore: datastore\n folder: /my-folder\n networks:\n - test-network-1\n resourcePool: /datacenter/host/cluster/resource-pool\n zone: generated-zone\n ingressVIPs:\n - 192.168.111.4\n vcenters:\n - datacenters:\n - datacenter\n password: \"\"\n server: machine1.server.com\n user: \"\"\npublish: External\n", + }, + }, + HasValidvSphereCredentials: true, + }, + { + TestName: "has deprecated server value in install-config should return false", + ConfigMap: &v1.ConfigMap{ + Data: map[string]string{ + "install-config": "platform:\n vsphere:\n apiVIPs:\n - 192.168.111.5\n failureDomains:\n - name: generated-failure-domain\n region: generated-region\n server: vcenterplaceholder\n topology:\n computeCluster: compute-cluster\n datacenter: datacenter\n datastore: datastore\n folder: /my-folder\n networks:\n - test-network-1\n resourcePool: /datacenter/host/cluster/resource-pool\n zone: generated-zone\n ingressVIPs:\n - 192.168.111.4\n vcenters:\n - datacenters:\n - datacenter\n password: \"\"\n server: vcenterplaceholder\n user: \"\"\npublish: External\n", + }, + }, + HasValidvSphereCredentials: false, + }, + { + TestName: "username and password are redacted in install-config and are ignored, should return false", + ConfigMap: &v1.ConfigMap{ + Data: map[string]string{ + "install-config": "platform:\n vsphere:\n apiVIPs:\n - 192.168.111.5\n failureDomains:\n - name: generated-failure-domain\n region: generated-region\n server: vcenterplaceholder\n topology:\n computeCluster: compute-cluster\n datacenter: datacenter\n datastore: datastore\n folder: /my-folder\n networks:\n - test-network-1\n resourcePool: /datacenter/host/cluster/resource-pool\n zone: generated-zone\n ingressVIPs:\n - 192.168.111.4\n vcenters:\n - datacenters:\n - datacenter\n password: \"goodpassword\"\n server: vcenterplaceholder\n user: \"goodusername\"\npublish: External\n", + }, + }, + HasValidvSphereCredentials: false, + }, + } - } + for _, test := range configMapTests { + test := test + It(test.TestName, func() { + mockbmclient.EXPECT().GetCluster(gomock.Any(), false).Return(nil, errors.New("some error")).Times(1) + mockkcclient.EXPECT().GetConfigMap(gomock.Any(), gomock.Any()).Return(test.ConfigMap, nil).Times(1) + + hasValidvSphereCredentials := HasValidvSphereCredentials(context.TODO(), mockbmclient, mockkcclient, l) + + Expect(hasValidvSphereCredentials).To(Equal(test.HasValidvSphereCredentials)) + }) + } + }) }) }) diff --git a/src/installer/installer.go b/src/installer/installer.go index 1f23d7e83..06c15d894 100644 --- a/src/installer/installer.go +++ b/src/installer/installer.go @@ -501,7 +501,7 @@ func (i *installer) waitForControlPlane(ctx context.Context) error { } i.UpdateHostInstallProgress(models.HostStageWaitingForControlPlane, waitingForMastersStatusInfo) - hasValidvSphereCredentials := common.HasValidvSphereCredentials(ctx, i.inventoryClient, i.log) + hasValidvSphereCredentials := common.HasValidvSphereCredentials(ctx, i.inventoryClient, kc, i.log) if hasValidvSphereCredentials { i.log.Infof("Has valid vSphere credentials: %v", hasValidvSphereCredentials) } @@ -706,8 +706,10 @@ func (i *installer) waitForMasterNodes(ctx context.Context, minMasterNodes int, // GetInvoker reads the openshift-install-manifests in the openshift-config namespace. // This configmap exists after nodes start to appear in the cluster. invoker := common.GetInvoker(kc, i.log) - removeUninitializedTaint := common.RemoveUninitializedTaint(platform, invoker, - hasValidvSphereCredentials, i.OpenshiftVersion) + removeUninitializedTaint := false + if platform != nil { + removeUninitializedTaint = common.RemoveUninitializedTaint(ctx, i.inventoryClient, kc, i.log, *platform.Type, i.OpenshiftVersion, invoker) + } i.log.Infof("Remove uninitialized taint: %v", removeUninitializedTaint) if removeUninitializedTaint { for _, node := range nodes.Items { diff --git a/src/main/assisted-installer-controller/assisted_installer_main.go b/src/main/assisted-installer-controller/assisted_installer_main.go index 3bf51841a..a92f4c13a 100644 --- a/src/main/assisted-installer-controller/assisted_installer_main.go +++ b/src/main/assisted-installer-controller/assisted_installer_main.go @@ -137,22 +137,55 @@ func main() { go assistedController.HackDNSAddressConflict(&wg) wg.Add(1) } + invoker := common.GetInvoker(kc, logger) - cluster := assistedController.SetReadyState() - if cluster == nil && invoker == common.InvokerAgent { - // When the agent-based installer installs a SNO cluster, assisted-service - // will never be reachable because it will not be running after the boostrap - // node reboots to become the SNO cluster. SetReadyState will never be - // able to get a connection to assisted-service in this circumstance. - // - // Instead of exiting with panic with "invalid memory address or nil pointer - // dereference" and having the controller restart because cluster is nil, - // log warning and exit 0. Otherwise the controller will keep restarting - // leaving the controller in a running state, even through the cluster - // has finished install. - logger.Warnf("SetReadyState timed out fetching cluster from assisted-service, invoker = %v", invoker) - return + var cluster *models.Cluster + var platformType models.PlatformType + removeUninitializedTaint := false + if invoker == common.InvokerAgent { + if Options.ControllerConfig.HighAvailabilityMode == models.ClusterHighAvailabilityModeNone { + // When the agent-based installer installs a SNO cluster, assisted-service + // will never be reachable because it will not be running after the boostrap + // node reboots to become the SNO cluster. SetReadyState will never be + // able to get a connection to assisted-service in this circumstance. + // + // Instead of exiting with panic with "invalid memory address or nil pointer + // dereference" and having the controller restart because cluster is nil, + // log warning and exit 0. Otherwise the controller will keep restarting + // leaving the controller in a running state, even through the cluster + // has finished install. + logger.Warnf("cluster is SNO and invoker = %v, skipping assisted-installer-controller", invoker) + return + } + // With the agent-based installer, assisted-service runs on the bootstrap node. + // We do not need to wait for an extended amount of time to reach assisted-service + // because it should be running on the local network. + cluster = assistedController.SetReadyState(5 * time.Minute) + if cluster == nil { + // assisted-service may no longer be available after bootstrap node reboots + // and assisted-installer-controller is restarted. In this case, we try + // to determine the platform type by looking at the install-config stored + // in the cluster-config-v1 ConfigMap. + logger.Warnf("SetReadyState timed out fetching cluster from assisted-service, invoker = %v", invoker) + pt, err := common.GetPlatformTypeFromInstallConfig(kc, logger) + if err != nil { + logger.Warnf("error determining platform type from install-config: %v", err) + } else { + platformType = pt + logger.Infof("platform type from install-config: %v", platformType) + } + } + } else { + cluster = assistedController.SetReadyState(assistedinstallercontroller.WaitTimeout) + if cluster == nil { + logger.Fatal("Failed to retrieve cluster from assisted service") + } + } + if cluster != nil { + platformType = *cluster.Platform.Type } + removeUninitializedTaint = common.RemoveUninitializedTaint(mainContext, client, kc, logger, platformType, Options.ControllerConfig.OpenshiftVersion, invoker) + logger.Infof("Remove uninitialized taint: %v", removeUninitializedTaint) // While adding new routine don't miss to add wg.add(1) // without adding it will panic @@ -165,20 +198,12 @@ func main() { logger.Infof("Finished all") }() - hasValidvSphereCredentials := common.HasValidvSphereCredentials(mainContext, client, logger) - if hasValidvSphereCredentials { - logger.Infof("Has valid vSphere credentials: %v", hasValidvSphereCredentials) - } - removeUninitializedTaint := common.RemoveUninitializedTaint(cluster.Platform, - invoker, hasValidvSphereCredentials, cluster.OpenshiftVersion) - logger.Infof("Remove uninitialized taint: %v", removeUninitializedTaint) - go assistedController.WaitAndUpdateNodesStatus(mainContext, &wg, removeUninitializedTaint) wg.Add(1) go assistedController.PostInstallConfigs(mainContext, &wg) wg.Add(1) - if *cluster.Platform.Type == models.PlatformTypeBaremetal { + if cluster != nil && *cluster.Platform.Type == models.PlatformTypeBaremetal { go assistedController.UpdateBMHs(mainContext, &wg) wg.Add(1) } else {