From ee49db4e044c61cf538a08cae7cbb2482ab4f838 Mon Sep 17 00:00:00 2001 From: Naiming Shen Date: Fri, 27 Dec 2024 17:44:45 -0800 Subject: [PATCH] Handle the review comments in PR 4407 - changed the function to retrieveDeviceNodeName() and call it only at the start of domainmgr Run() - remove the ctx.hvTypeKube and status.IsDNidNode checks in the domainmgr.go code; also remove the status.DomainConfigDeleted. we now rely on normal domain handling of delete/cleanup work flow - fixed a bug where nodeName with underscore, which is not allowed in kubernetes names - changed the zedmanager/handleclusterstatus.go code to PoC code base, and commented out one line for later PR to handle - implemented the scheme when kubevirt can not contact kubernetes API-server or the cluster does not have the POD/VMI being scheduled yet, we return the 'Unknown' status now. It keeps a starting 'unknown' timestamp per application - also if the 'unknown' status lasts longer than 5 minutes, it changes into 'Halt' status back to domainmgr - updated 'zedkube.md' section 'Handle Domain Apps Status in domainmgr' for the above behavior Signed-off-by: Naiming Shen --- pkg/pillar/cmd/domainmgr/domainmgr.go | 86 ++---------- pkg/pillar/cmd/zedagent/parseconfig.go | 13 +- pkg/pillar/cmd/zedkube/applogs.go | 2 +- pkg/pillar/cmd/zedmanager/handleclusterapp.go | 45 ++++-- pkg/pillar/docs/zedkube.md | 8 +- pkg/pillar/hypervisor/kubevirt.go | 128 +++++++++++------- pkg/pillar/types/domainmgrtypes.go | 5 - 7 files changed, 134 insertions(+), 153 deletions(-) diff --git a/pkg/pillar/cmd/domainmgr/domainmgr.go b/pkg/pillar/cmd/domainmgr/domainmgr.go index 502e9706d7..4d9dea3bcf 100644 --- a/pkg/pillar/cmd/domainmgr/domainmgr.go +++ b/pkg/pillar/cmd/domainmgr/domainmgr.go @@ -452,6 +452,12 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar } log.Noticef("processed GCComplete") + // Get the EdgeNode info, needed for kubevirt clustering + err = domainCtx.retrieveDeviceNodeName() + if err != nil { + log.Fatal(err) + } + if !domainCtx.setInitialUsbAccess { log.Functionf("GCComplete but not setInitialUsbAccess => first boot") // Enable USB keyboard and storage @@ -1102,13 +1108,6 @@ func maybeRetryBoot(ctx *domainContext, status *types.DomainStatus) { return } - err := ctx.retrieveNodeNameAndUUID() - if err != nil { - log.Errorf("maybeRetryBoot(%s) retrieveNodeNameAndUUID failed: %s", - status.Key(), err) - return - } - if status.Activated && status.BootFailed { log.Functionf("maybeRetryBoot(%s) clearing bootFailed since Activated", status.Key()) @@ -1365,7 +1364,6 @@ func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) { State: types.INSTALLED, VmConfig: config.VmConfig, Service: config.Service, - IsDNidNode: config.IsDNidNode, } status.VmConfig.CPUs = make([]int, 0) @@ -1575,13 +1573,6 @@ func doActivate(ctx *domainContext, config types.DomainConfig, log.Functionf("doActivate(%v) for %s", config.UUIDandVersion, config.DisplayName) - err := ctx.retrieveNodeNameAndUUID() - if err != nil { - log.Errorf("doActivate(%s) retrieveNodeNameAndUUID failed: %s", - status.Key(), err) - return - } - if ctx.cpuPinningSupported { if err := assignCPUs(ctx, &config, status); err != nil { log.Warnf("failed to assign CPUs for %s", config.DisplayName) @@ -1807,18 +1798,6 @@ func doActivateTail(ctx *domainContext, status *types.DomainStatus, log.Errorf("domain start for %s: %s", status.DomainName, err) status.SetErrorNow(err.Error()) - // HvKube case - if ctx.hvTypeKube && !status.IsDNidNode { - log.Noticef("doActivateTail(%v) we are not DNiD, skip delete app", status.DomainName) - return - } - - // Only send delete if DomainConfig is not deleted - // detail see the zedkube.md section 'Handling Domain Deletion in Domainmgr' - if ctx.hvTypeKube && !status.DomainConfigDeleted { - log.Noticef("doActivateTail(%v) DomainConfig exists, skip delete app", status.DomainName) - return - } // Delete if err := hyper.Task(status).Delete(status.DomainName); err != nil { log.Errorf("failed to delete domain: %s (%v)", status.DomainName, err) @@ -1849,17 +1828,6 @@ func doActivateTail(ctx *domainContext, status *types.DomainStatus, log.Errorf("doActivateTail(%v) failed for %s: %s", status.UUIDandVersion, status.DisplayName, err) - if ctx.hvTypeKube && !status.IsDNidNode { - log.Noticef("doActivateTail(%v) we are not DNiD, skip delete app", status.DomainName) - return - } - // Only send delete if DomainConfig is not deleted - // detail see the zedkube.md section 'Handling Domain Deletion in Domainmgr' - if ctx.hvTypeKube && !status.DomainConfigDeleted { - log.Noticef("doActivateTail(%v) DomainConfig exists, skip delete app", status.DomainName) - return - } - // Delete if err := hyper.Task(status).Delete(status.DomainName); err != nil { log.Errorf("failed to delete domain: %s (%v)", status.DomainName, err) @@ -1961,17 +1929,6 @@ func doInactivate(ctx *domainContext, status *types.DomainStatus, impatient bool } if status.DomainId != 0 { - if ctx.hvTypeKube && !status.IsDNidNode { - log.Noticef("doInactivate(%v) we are not DNiD, skip delete app", status.DomainName) - return - } - // Only send delete if DomainConfig is not deleted - // detail see the zedkube.md section 'Handling Domain Deletion in Domainmgr' - if ctx.hvTypeKube && !status.DomainConfigDeleted { - log.Noticef("doInactivate(%v) DomainConfig exists, skip delete app", status.DomainName) - return - } - if err := hyper.Task(status).Delete(status.DomainName); err != nil { log.Errorf("Failed to delete domain %s (%v)", status.DomainName, err) } else { @@ -2559,16 +2516,6 @@ func handleDelete(ctx *domainContext, key string, status *types.DomainStatus) { // No point in publishing metrics any more ctx.pubDomainMetric.Unpublish(status.Key()) - if ctx.hvTypeKube && !status.IsDNidNode { - log.Noticef("handleDelete(%v) we are not DNiD, skip delete app", status.DomainName) - return - } - - // set the DomainConfigDeleted for kubernetes to remove the domain - // detail see the zedkube.md section 'Handling Domain Deletion in Domainmgr' - status.DomainConfigDeleted = true - log.Noticef("handleDelete(%v) DomainConfigDeleted", status.DomainName) - err := hyper.Task(status).Delete(status.DomainName) if err != nil { log.Errorln(err) @@ -2617,10 +2564,6 @@ func DomainShutdown(ctx *domainContext, status types.DomainStatus, force bool) e // Stop the domain log.Functionf("Stopping domain - %s", status.DomainName) - if ctx.hvTypeKube && !status.IsDNidNode { - log.Noticef("DomainShutdown(%v) we are not DNiD, skip delete app", status.DomainName) - return nil - } err = hyper.Task(&status).Stop(status.DomainName, force) return err @@ -3710,15 +3653,14 @@ func lookupCapabilities(ctx *domainContext) (*types.Capabilities, error) { return &capabilities, nil } -func (ctx *domainContext) retrieveNodeNameAndUUID() error { - if ctx.nodeName == "" { - NodeInfo, err := ctx.subEdgeNodeInfo.Get("global") - if err != nil { - log.Errorf("retrieveNodeNameAndUUID: can't get edgeNodeInfo %v", err) - return err - } - enInfo := NodeInfo.(types.EdgeNodeInfo) - ctx.nodeName = strings.ToLower(enInfo.DeviceName) +func (ctx *domainContext) retrieveDeviceNodeName() error { + NodeInfo, err := ctx.subEdgeNodeInfo.Get("global") + if err != nil { + log.Errorf("retrieveDeviceNodeName: can't get edgeNodeInfo %v", err) + return err } + enInfo := NodeInfo.(types.EdgeNodeInfo) + ctx.nodeName = strings.ReplaceAll(strings.ToLower(enInfo.DeviceName), "_", "-") + log.Noticef("retrieveDeviceNodeName: devicename, NodeInfo %v", NodeInfo) // XXX return nil } diff --git a/pkg/pillar/cmd/zedagent/parseconfig.go b/pkg/pillar/cmd/zedagent/parseconfig.go index 2c7d5e21a3..4914e9d632 100644 --- a/pkg/pillar/cmd/zedagent/parseconfig.go +++ b/pkg/pillar/cmd/zedagent/parseconfig.go @@ -3214,13 +3214,8 @@ func parseEdgeNodeClusterConfig(getconfigCtx *getconfigContext, ctx := getconfigCtx.zedagentCtx zcfgCluster := config.GetCluster() if zcfgCluster == nil { - log.Functionf("parseEdgeNodeClusterConfig: No EdgeNodeClusterConfig, Unpublishing") - pub := ctx.pubEdgeNodeClusterConfig - items := pub.GetAll() - if len(items) > 0 { - log.Functionf("parseEdgeNodeClusterConfig: Unpublishing EdgeNodeClusterConfig") - ctx.pubEdgeNodeClusterConfig.Unpublish("global") - } + log.Functionf("parseEdgeNodeClusterConfig: Unpublishing EdgeNodeClusterConfig") + ctx.pubEdgeNodeClusterConfig.Unpublish("global") return } ipAddr, ipNet, err := net.ParseCIDR(zcfgCluster.GetClusterIpPrefix()) @@ -3231,6 +3226,10 @@ func parseEdgeNodeClusterConfig(getconfigCtx *getconfigContext, ipNet.IP = ipAddr joinServerIP := net.ParseIP(zcfgCluster.GetJoinServerIp()) + if joinServerIP == nil { + log.Errorf("handleEdgeNodeConfigItem: parse JoinServerIP failed") + return + } var isJoinNode bool // deduce the bootstrap node status from clusterIPPrefix and joinServerIP if ipAddr.Equal(joinServerIP) { // deduce the bootstrap node status from diff --git a/pkg/pillar/cmd/zedkube/applogs.go b/pkg/pillar/cmd/zedkube/applogs.go index 1796a24fea..74f4c2ddb5 100644 --- a/pkg/pillar/cmd/zedkube/applogs.go +++ b/pkg/pillar/cmd/zedkube/applogs.go @@ -204,7 +204,7 @@ func (z *zedkube) getnodeNameAndUUID() error { return err } enInfo := NodeInfo.(types.EdgeNodeInfo) - z.nodeName = strings.ToLower(enInfo.DeviceName) + z.nodeName = strings.ReplaceAll(strings.ToLower(enInfo.DeviceName), "_", "-") z.nodeuuid = enInfo.DeviceID.String() } return nil diff --git a/pkg/pillar/cmd/zedmanager/handleclusterapp.go b/pkg/pillar/cmd/zedmanager/handleclusterapp.go index 83a94dfa67..8fcf0838f3 100644 --- a/pkg/pillar/cmd/zedmanager/handleclusterapp.go +++ b/pkg/pillar/cmd/zedmanager/handleclusterapp.go @@ -6,38 +6,55 @@ package zedmanager import "github.com/lf-edge/eve/pkg/pillar/types" func handleENClusterAppStatusCreate(ctxArg interface{}, key string, configArg interface{}) { - log.Functionf("handleENClusterAppStatusCreate(%s)", key) + log.Noticef("handleENClusterAppStatusCreate(%s)", key) ctx := ctxArg.(*zedmanagerContext) status := configArg.(types.ENClusterAppStatus) handleENClusterAppStatusImpl(ctx, key, &status) } func handleENClusterAppStatusModify(ctxArg interface{}, key string, configArg interface{}, oldConfigArg interface{}) { - log.Functionf("handleENClusterAppStatusModify(%s)", key) + log.Noticef("handleENClusterAppStatusModify(%s)", key) ctx := ctxArg.(*zedmanagerContext) status := configArg.(types.ENClusterAppStatus) handleENClusterAppStatusImpl(ctx, key, &status) } func handleENClusterAppStatusDelete(ctxArg interface{}, key string, configArg interface{}) { - log.Functionf("handleENClusterAppStatusDelete(%s)", key) + log.Noticef("handleENClusterAppStatusDelete(%s)", key) ctx := ctxArg.(*zedmanagerContext) - //status := configArg.(types.ENClusterAppStatus) - handleENClusterAppStatusImpl(ctx, key, nil) + status := configArg.(types.ENClusterAppStatus) + handleENClusterAppStatusImpl(ctx, key, &status) } func handleENClusterAppStatusImpl(ctx *zedmanagerContext, key string, status *types.ENClusterAppStatus) { - log.Functionf("handleENClusterAppStatusImpl(%s) for app-status %v", key, status) - pub := ctx.pubAppInstanceStatus - items := pub.GetAll() - for _, st := range items { - aiStatus := st.(types.AppInstanceStatus) - if aiStatus.UUIDandVersion.UUID.String() == key { - log.Functionf("handleENClusterAppStatusImpl(%s) found ai status, update", key) + aiStatus := lookupAppInstanceStatus(ctx, key) + log.Noticef("handleENClusterAppStatusImpl(%s) for app-status %v aiStatus %v", key, status, aiStatus) + + if status.ScheduledOnThisNode { + if aiStatus == nil { + // This could happen if app failover to other node and failing back to this designated node. + // One scenario is node reboot. Kubernetes told us that app is scheduled on this node. + aiConfig := lookupAppInstanceConfig(ctx, key, false) + if aiConfig == nil { + log.Errorf("handleENClusterAppStatusImpl(%s) AppInstanceConfig missing for app", key) + return + } + // XXX this will be handled in later PR in clustering and zedmanager code + //handleCreateAppInstanceStatus(ctx, *aiConfig) + } else { + // Nothing to do, we already have aiStatus + log.Functionf("handleENClusterAppStatusImpl(%s) for app-status %v aiStatus %v", key, status, aiStatus) + return + } + } else { // not scheduled here. - updateAIStatusUUID(ctx, aiStatus.UUIDandVersion.UUID.String()) - break + // if aiStatus is not present, nothing to do + if aiStatus != nil { + // If I am not scheduled here, just unpublish the AIStatus. + // We probably had app running on this node earlier before failover. + unpublishAppInstanceStatus(ctx, aiStatus) } + } } diff --git a/pkg/pillar/docs/zedkube.md b/pkg/pillar/docs/zedkube.md index 822773ba2d..cbe08ab3e0 100644 --- a/pkg/pillar/docs/zedkube.md +++ b/pkg/pillar/docs/zedkube.md @@ -47,13 +47,9 @@ kubenodeop handles NodeDrainRequest objects which zedkube subscribes to, initiat ## Applications under Kubevirt Mode -### Handling Domain Deletion in Domainmgr +### Handle Domain Apps Status in domainmgr -In normal cases of EVE application launching and running, the domainmgr handles the configuration creation, starts the domain, and monitors the domain's running status. If the starting and monitoring status is not in the running state, then there is something wrong with the runtime process, and the domain is normally stopped and deleted by the domainmgr. Domainmgr keeps a timer, usually 10 minutes, to retry starting the domain again later. - -When the application is launched and managed in KubeVirt mode, the Kubernetes cluster is provisioned for this application, being a VMI (Virtual Machine Instance) replicaSet object or a Pod replicaSet object. It uses a declarative approach to manage the desired state of the applications. The configurations are saved in the Kubernetes database for the Kubernetes controller to use to ensure the objects eventually achieve the correct state if possible. Any particular VMI/Pod state of a domain may not be in working condition at the time when EVE domainmgr checks. In the domainmgr code running in KubeVirt mode, it normally skips the hyper.Task().Delete() or hyper.Task().Stop() in domainmgr.go, and lets the Kubernetes cluster have a chance to work its way to bring up the application to the running state. - -The exception to the above is in the case of the application itself being removed from the AppInstanceConfig, in which case, the DomainStatus of this application will be deleted, and we have a new boolean DomainConfigDeleted to be set if the DomainStatus is pending for deletion. When the DomainStatus of DomainConfigDeleted is set, the code in domainmgr will allow the Stop() or Delete() operations for Kubernetes to remove the replicaSet of the application. +When the application is launched and managed in KubeVirt mode, the Kubernetes cluster is provisioned for this application, being a VMI (Virtual Machine Instance) replicaSet object or a Pod replicaSet object. It uses a declarative approach to manage the desired state of the applications. The configurations are saved in the Kubernetes database for the Kubernetes controller to use to ensure the objects eventually achieve the correct state if possible. Any particular VMI/Pod state of a domain may not be in working condition at the time when EVE domainmgr checks. In the domainmgr code running in KubeVirt mode, if it can not contact the Kubernetes API server to query about the application, or if the application itself has not be started yet in the cluster, the kubervirt.go will return the 'Unknown' status back. It will keep a 'Unknown' status starting timestamp per application. If the 'Unknown' status lasts longer then 5 minutes, the status functions in kubevirt.go will return 'Halt' status back to domainmgr. The timestamp will be cleared once it can get the application status from the kubernetes. ## Kubernetes Node Draining diff --git a/pkg/pillar/hypervisor/kubevirt.go b/pkg/pillar/hypervisor/kubevirt.go index 41fae4a6b8..49e934d646 100644 --- a/pkg/pillar/hypervisor/kubevirt.go +++ b/pkg/pillar/hypervisor/kubevirt.go @@ -48,6 +48,7 @@ const ( waitForPodCheckCounter = 5 // Check 5 times waitForPodCheckTime = 15 // Check every 15 seconds, don't wait for too long to cause watchdog tolerateSec = 30 // Pod/VMI reschedule delay after node unreachable seconds + unknownToHaltMinutes = 5 // If VMI is unknown for 5 minutes, return halt state ) // MetaDataType is a type for different Domain types @@ -64,13 +65,14 @@ const ( // VM instance meta data structure. type vmiMetaData struct { - repPod *appsv1.ReplicaSet // Handle to the replicaSetof pod - repVMI *v1.VirtualMachineInstanceReplicaSet // Handle to the replicaSet of VMI - domainID int // DomainID understood by domainmgr in EVE - mtype MetaDataType // switch on is ReplicaSet, Pod or is VMI - name string // Display-Name(all lower case) + first 5 bytes of domainName - cputotal uint64 // total CPU in NS so far - maxmem uint32 // total Max memory usage in bytes so far + repPod *appsv1.ReplicaSet // Handle to the replicaSetof pod + repVMI *v1.VirtualMachineInstanceReplicaSet // Handle to the replicaSet of VMI + domainID int // DomainID understood by domainmgr in EVE + mtype MetaDataType // switch on is ReplicaSet, Pod or is VMI + name string // Display-Name(all lower case) + first 5 bytes of domainName + cputotal uint64 // total CPU in NS so far + maxmem uint32 // total Max memory usage in bytes so far + startUnknownTime time.Time // time when the domain returned as unknown status } type kubevirtContext struct { @@ -88,12 +90,13 @@ type kubevirtContext struct { var stateMap = map[string]types.SwState{ "Paused": types.PAUSED, "Running": types.RUNNING, - "NonLocal": types.RUNNING, "shutdown": types.HALTING, "suspended": types.PAUSED, "Pending": types.PENDING, "Scheduling": types.SCHEDULING, "Failed": types.FAILED, + "Halting": types.HALTING, + "Unknown": types.UNKNOWN, } var excludedMetrics = map[string]struct{}{ @@ -512,7 +515,7 @@ func (ctx kubevirtContext) Start(domainName string) error { // Start the Pod ReplicaSet if vmis.mtype == IsMetaReplicaPod { - err := StartReplicaPodContiner(ctx, ctx.vmiList[domainName].repPod) + err := StartReplicaPodContiner(ctx, vmis) return err } else if vmis.mtype != IsMetaReplicaVMI { return logError("Start domain %s wrong type", domainName) @@ -544,7 +547,7 @@ func (ctx kubevirtContext) Start(domainName string) error { } logrus.Infof("Started Kubevirt domain replicaset %s, VMI replicaset %s", domainName, vmis.name) - err = waitForVMI(vmis.name, nodeName, true) + err = waitForVMI(vmis, nodeName, true) if err != nil { logrus.Errorf("couldn't start VMI %v", err) return err @@ -669,9 +672,9 @@ func (ctx kubevirtContext) Info(domainName string) (int, types.SwState, error) { return 0, types.HALTED, logError("info domain %s failed to get vmlist", domainName) } if vmis.mtype == IsMetaReplicaPod { - res, err = InfoReplicaSetContainer(ctx, vmis.name) + res, err = InfoReplicaSetContainer(ctx, vmis) } else { - res, err = getVMIStatus(vmis.name, nodeName) + res, err = getVMIStatus(vmis, nodeName) } if err != nil { return 0, types.BROKEN, logError("domain %s failed to get info: %v", domainName, err) @@ -680,10 +683,10 @@ func (ctx kubevirtContext) Info(domainName string) (int, types.SwState, error) { if effectiveDomainState, matched := stateMap[res]; !matched { return 0, types.BROKEN, logError("domain %s reported to be in unexpected state %s", domainName, res) } else { - if _, ok := ctx.vmiList[domainName]; !ok { + if _, ok := ctx.vmiList[domainName]; !ok { // domain is deleted return 0, types.HALTED, logError("domain %s is deleted", domainName) } - return ctx.vmiList[domainName].domainID, effectiveDomainState, nil + return ctx.vmiList[domainName].domainID, effectiveDomainState, err } } @@ -703,12 +706,12 @@ func (ctx kubevirtContext) Cleanup(domainName string) error { return logError("cleanup domain %s failed to get vmlist", domainName) } if vmis.mtype == IsMetaReplicaPod { - _, err = InfoReplicaSetContainer(ctx, vmis.name) + _, err = InfoReplicaSetContainer(ctx, vmis) if err == nil { err = ctx.Delete(domainName) } } else if vmis.mtype == IsMetaReplicaVMI { - err = waitForVMI(vmis.name, nodeName, false) + err = waitForVMI(vmis, nodeName, false) } else { err = logError("cleanup domain %s wrong type", domainName) } @@ -735,8 +738,9 @@ func convertToKubernetesFormat(b int) string { return fmt.Sprintf("%.1fYi", bf) } -func getVMIStatus(repVmiName, nodeName string) (string, error) { +func getVMIStatus(vmis *vmiMetaData, nodeName string) (string, error) { + repVmiName := vmis.name kubeconfig, err := kubeapi.GetKubeConfig() if err != nil { return "", logError("couldn't get the Kube Config: %v", err) @@ -751,14 +755,18 @@ func getVMIStatus(repVmiName, nodeName string) (string, error) { // List VMIs with a label selector that matches the replicaset name vmiList, err := virtClient.VirtualMachineInstance(kubeapi.EVEKubeNameSpace).List(context.Background(), &metav1.ListOptions{}) if err != nil { - return "", logError("getVMIStatus: domain %s failed to get VMI info %s", repVmiName, err) + retStatus, err2 := checkAndReturnStatus(vmis, true) + logError("getVMIStatus: domain %s failed to get VMI info %s, return %s", repVmiName, err, retStatus) + return retStatus, err2 } if len(vmiList.Items) == 0 { - return "", logError("getVMIStatus: No VMI found with the given replicaset name %s", repVmiName) + retStatus, err2 := checkAndReturnStatus(vmis, true) + logError("getVMIStatus: No VMI found with the given replicaset name %s, return %s", repVmiName, retStatus) + return retStatus, err2 } // Use the first VMI in the list - var foundNonlocal bool + var nonLocalStatus string var targetVMI *v1.VirtualMachineInstance for _, vmi := range vmiList.Items { if vmi.Status.NodeName == nodeName { @@ -768,22 +776,27 @@ func getVMIStatus(repVmiName, nodeName string) (string, error) { } } else { if vmi.GenerateName == repVmiName { - foundNonlocal = true + nonLocalStatus = fmt.Sprintf("%v", vmi.Status.Phase) } } } if targetVMI == nil { - if foundNonlocal { - return "NonLocal", nil + if nonLocalStatus != "" { + _, _ = checkAndReturnStatus(vmis, false) // reset the unknown timestamp + return nonLocalStatus, nil } - return "", logError("getVMIStatus: No VMI %s found with the given nodeName %s", repVmiName, nodeName) + retStatus, err2 := checkAndReturnStatus(vmis, true) + logError("getVMIStatus: No VMI %s found with the given nodeName %s, return %s", repVmiName, nodeName, retStatus) + return retStatus, err2 } res := fmt.Sprintf("%v", targetVMI.Status.Phase) + _, _ = checkAndReturnStatus(vmis, false) // reset the unknown timestamp return res, nil } // Inspired from kvm.go -func waitForVMI(vmiName, nodeName string, available bool) error { +func waitForVMI(vmis *vmiMetaData, nodeName string, available bool) error { + vmiName := vmis.name maxDelay := time.Minute * 5 // 5mins ?? lets keep it for now delay := time.Second var waited time.Duration @@ -795,7 +808,7 @@ func waitForVMI(vmiName, nodeName string, available bool) error { waited += delay } - state, err := getVMIStatus(vmiName, nodeName) + state, err := getVMIStatus(vmis, nodeName) if err != nil { if available { @@ -1237,7 +1250,8 @@ func setKubeToleration(timeOutSec int64) []k8sv1.Toleration { } // StartReplicaPodContiner starts the ReplicaSet pod -func StartReplicaPodContiner(ctx kubevirtContext, rep *appsv1.ReplicaSet) error { +func StartReplicaPodContiner(ctx kubevirtContext, vmis *vmiMetaData) error { + rep := vmis.repPod err := getConfig(&ctx) if err != nil { return err @@ -1261,7 +1275,7 @@ func StartReplicaPodContiner(ctx kubevirtContext, rep *appsv1.ReplicaSet) error logrus.Infof("StartReplicaPodContiner: Rep %s %s, result %v", rep.ObjectMeta.Name, opStr, result) - err = checkForReplicaPod(ctx, rep.ObjectMeta.Name) + err = checkForReplicaPod(ctx, vmis) if err != nil { logrus.Errorf("StartReplicaPodContiner: check for pod status error %v", err) return err @@ -1270,7 +1284,8 @@ func StartReplicaPodContiner(ctx kubevirtContext, rep *appsv1.ReplicaSet) error return nil } -func checkForReplicaPod(ctx kubevirtContext, repName string) error { +func checkForReplicaPod(ctx kubevirtContext, vmis *vmiMetaData) error { + repName := vmis.repPod.ObjectMeta.Name var i int var status string var err error @@ -1279,11 +1294,11 @@ func checkForReplicaPod(ctx kubevirtContext, repName string) error { logrus.Infof("checkForReplicaPod: check(%d) wait 15 sec, %v", i, repName) time.Sleep(15 * time.Second) - status, err = InfoReplicaSetContainer(ctx, repName) + status, err = InfoReplicaSetContainer(ctx, vmis) if err != nil { logrus.Infof("checkForReplicaPod: repName %s, %v", repName, err) } else { - if status == "Running" || status == "NonLocal" { + if status == "Running" { logrus.Infof("checkForReplicaPod: (%d) status %s, good", i, status) return nil } else { @@ -1299,8 +1314,9 @@ func checkForReplicaPod(ctx kubevirtContext, repName string) error { } // InfoReplicaSetContainer gets the status of the ReplicaSet pod -func InfoReplicaSetContainer(ctx kubevirtContext, repName string) (string, error) { +func InfoReplicaSetContainer(ctx kubevirtContext, vmis *vmiMetaData) (string, error) { + repName := vmis.repPod.ObjectMeta.Name err := getConfig(&ctx) if err != nil { return "", err @@ -1310,24 +1326,17 @@ func InfoReplicaSetContainer(ctx kubevirtContext, repName string) (string, error return "", logError("InfoReplicaSetContainer: couldn't get the pod Config: %v", err) } - nodeName, ok := ctx.nodeNameMap["nodename"] - if !ok { - return "", logError("Failed to get nodeName") - } pods, err := podclientset.CoreV1().Pods(kubeapi.EVEKubeNameSpace).List(context.TODO(), metav1.ListOptions{ LabelSelector: fmt.Sprintf("app=%s", repName), }) - if err != nil { - return "", logError("InfoReplicaSetContainer: couldn't get the pods: %v", err) + if err != nil || len(pods.Items) == 0 { + // we either can not talk to the kubernetes api-server or it can not find our pod + retStatus, err2 := checkAndReturnStatus(vmis, true) + logError("InfoReplicaSetContainer: couldn't get the pods: %v, return %s", err, retStatus) + return retStatus, err2 } - var foundNonlocal bool for _, pod := range pods.Items { - if nodeName != pod.Spec.NodeName { - foundNonlocal = true - logrus.Infof("InfoReplicaSetContainer: rep %s, nodeName %v differ w/ hostname", repName, pod.Spec.NodeName) - continue - } var res string // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/ @@ -1345,16 +1354,15 @@ func InfoReplicaSetContainer(ctx kubevirtContext, repName string) (string, error default: res = "Scheduling" } - logrus.Infof("InfoReplicaSetContainer: rep %s, nodeName %v, status %s", pod.ObjectMeta.Name, pod.Spec.NodeName, res) + logrus.Infof("InfoReplicaSetContainer: rep %s, pod nodeName %v, status %s", pod.ObjectMeta.Name, pod.Spec.NodeName, res) if pod.Status.Phase != k8sv1.PodRunning { continue } + _, _ = checkAndReturnStatus(vmis, false) // reset the unknown timestamp return res, nil } - if foundNonlocal { - return "NonLocal", nil - } + return "", logError("InfoReplicaSetContainer: pod not ready") } @@ -1705,3 +1713,27 @@ func getMyNodeUUID(ctx *kubevirtContext, nodeName string) { ctx.nodeNameMap["nodename"] = nodeName } } + +// checkAndReturnStatus +// when pass-in gotUnknown is true, we failed to get the kubernetes pod, return 'Unknown' for +// the status, and if the status exceeds 5 minutes, return 'Halting' with error +// when pass-in !goUnknown, we reset the unknown timestamp +// see detail description in the 'zedkube.md' section 'Handle Domain Apps Status in domainmgr' +func checkAndReturnStatus(vmis *vmiMetaData, gotUnknown bool) (string, error) { + if gotUnknown { + if vmis.startUnknownTime.IsZero() { // first time, set the unknown timestamp + vmis.startUnknownTime = time.Now() + return "Unknown", nil + } else { + if time.Since(vmis.startUnknownTime) > unknownToHaltMinutes*time.Minute { + return "Halting", fmt.Errorf("Unknown status for more than 5 minute") + } else { + return "Unknown", nil + } + } + } else { + // we got the pod status, reset the unknown timestamp + vmis.startUnknownTime = time.Time{} + } + return "", nil +} diff --git a/pkg/pillar/types/domainmgrtypes.go b/pkg/pillar/types/domainmgrtypes.go index a4f0dfb146..cdcb5c3d9f 100644 --- a/pkg/pillar/types/domainmgrtypes.go +++ b/pkg/pillar/types/domainmgrtypes.go @@ -327,11 +327,6 @@ type DomainStatus struct { FmlCustomResolution string // if this node is the DNiD of the App IsDNidNode bool - // handle DomainConfig Delete - // for kubevirt EVE, App is configured into the kubernetes database, - // there is no need to delete the domain if the status check fails. - // But this flag is used to handle if the domain config is deleted. - DomainConfigDeleted bool // the device name is used for kube node name // Need to pass in from domainmgr to hypervisor context commands NodeName string