From 78273b8b8defc014bcea083e970b9ec09e99e00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=AA=20de=20Souza=20Pinto?= Date: Thu, 16 Nov 2023 12:57:37 +0100 Subject: [PATCH] pillar/volumemgr: Kick watchdog inside for loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit createOrUpdateDiskMetrics() has a lot of for loops that can take more than the watchdog time (500s) to execute. This commit adds some kicks to the watchdog inside these loops. Signed-off-by: RenĂª de Souza Pinto --- pkg/pillar/cmd/volumemgr/handlediskmetrics.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pkg/pillar/cmd/volumemgr/handlediskmetrics.go b/pkg/pillar/cmd/volumemgr/handlediskmetrics.go index a050f1c8a0..ce601bd5f9 100644 --- a/pkg/pillar/cmd/volumemgr/handlediskmetrics.go +++ b/pkg/pillar/cmd/volumemgr/handlediskmetrics.go @@ -97,7 +97,12 @@ func lookupAppDiskMetric(ctx *volumemgrContext, key string) *types.AppDiskMetric // diskMetricsTimerTask calculates and publishes disk metrics periodically func diskMetricsTimerTask(ctx *volumemgrContext, handleChannel chan interface{}) { log.Functionln("starting report diskMetricsTimerTask timer task") - createOrUpdateDiskMetrics(ctx) + + wdName := agentName + "metrics" + ctx.ps.StillRunning(wdName, warningTime, errorTime) + ctx.ps.RegisterFileWatchdog(wdName) + + createOrUpdateDiskMetrics(ctx, wdName) diskMetricInterval := time.Duration(ctx.globalConfig.GlobalValueInt(types.DiskScanMetricInterval)) * time.Second max := float64(diskMetricInterval) @@ -106,18 +111,15 @@ func diskMetricsTimerTask(ctx *volumemgrContext, handleChannel chan interface{}) // Return handle to caller handleChannel <- diskMetricTicker - wdName := agentName + "metrics" - // Run a periodic timer so we always update StillRunning stillRunning := time.NewTicker(25 * time.Second) ctx.ps.StillRunning(wdName, warningTime, errorTime) - ctx.ps.RegisterFileWatchdog(wdName) for { select { case <-diskMetricTicker.C: start := time.Now() - createOrUpdateDiskMetrics(ctx) + createOrUpdateDiskMetrics(ctx, wdName) ctx.ps.CheckMaxTimeTopic(wdName, "createOrUpdateDiskMetrics", start, warningTime, errorTime) @@ -128,13 +130,14 @@ func diskMetricsTimerTask(ctx *volumemgrContext, handleChannel chan interface{}) } // createOrUpdateDiskMetrics creates or updates metrics for all disks, mountpaths and volumeStatuses -func createOrUpdateDiskMetrics(ctx *volumemgrContext) { +func createOrUpdateDiskMetrics(ctx *volumemgrContext, wdName string) { log.Functionf("createOrUpdateDiskMetrics") var diskMetricList []*types.DiskMetric startPubTime := time.Now() disks := diskmetrics.FindDisksPartitions(log) for _, d := range disks { + ctx.ps.StillRunning(wdName, warningTime, errorTime) size, _ := diskmetrics.PartitionSize(log, d) log.Tracef("createOrUpdateDiskMetrics: Disk/partition %s size %d", d, size) var metric *types.DiskMetric @@ -161,6 +164,7 @@ func createOrUpdateDiskMetrics(ctx *volumemgrContext) { for _, path := range types.ReportDiskPaths { var u *types.UsageStat var err error + ctx.ps.StillRunning(wdName, warningTime, errorTime) if path == types.PersistDir { // dedicated handler for PersistDir as we have to use PersistType dependent calculations u, err = diskmetrics.PersistUsageStat(log) @@ -203,6 +207,7 @@ func createOrUpdateDiskMetrics(ctx *volumemgrContext) { log.Tracef("createOrUpdateDiskMetrics: persistUsage %d, elapse sec %v", persistUsage, time.Since(startPubTime).Seconds()) for _, path := range types.ReportDirPaths { + ctx.ps.StillRunning(wdName, warningTime, errorTime) usage, err := diskmetrics.DirUsage(log, path) log.Tracef("createOrUpdateDiskMetrics: ReportDirPath %s usage %d err %v", path, usage, err) if err != nil { @@ -225,6 +230,7 @@ func createOrUpdateDiskMetrics(ctx *volumemgrContext) { log.Tracef("createOrUpdateDiskMetrics: DirPaths in persist, elapse sec %v", time.Since(startPubTime).Seconds()) for _, path := range types.AppPersistPaths { + ctx.ps.StillRunning(wdName, warningTime, errorTime) usage, err := diskmetrics.DirUsage(log, path) log.Tracef("createOrUpdateDiskMetrics: AppPersistPath %s usage %d err %v", path, usage, err) if err != nil { @@ -246,6 +252,7 @@ func createOrUpdateDiskMetrics(ctx *volumemgrContext) { } publishDiskMetrics(ctx, diskMetricList...) for _, volumeStatus := range getAllVolumeStatus(ctx) { + ctx.ps.StillRunning(wdName, warningTime, errorTime) if err := createOrUpdateAppDiskMetrics(ctx, volumeStatus); err != nil { log.Errorf("CreateOrUpdateCommonDiskMetrics: exception while publishing diskmetric. %s", err.Error()) }