From 83a85279d17370ed27d6eb50f41b0ebd790e66d5 Mon Sep 17 00:00:00 2001 From: Chris Grindstaff Date: Mon, 4 Nov 2024 03:32:16 -0500 Subject: [PATCH] feat: improve Harvest memory logging (#3244) --- cmd/poller/collector/metrics.go | 91 +++++++++++++++++++++++++++++++++ cmd/poller/poller.go | 54 ++++++------------- 2 files changed, 108 insertions(+), 37 deletions(-) create mode 100644 cmd/poller/collector/metrics.go diff --git a/cmd/poller/collector/metrics.go b/cmd/poller/collector/metrics.go new file mode 100644 index 000000000..f76bcc6ef --- /dev/null +++ b/cmd/poller/collector/metrics.go @@ -0,0 +1,91 @@ +package collector + +import ( + "github.com/netapp/harvest/v2/pkg/slogx" + "github.com/netapp/harvest/v2/pkg/util" + "github.com/shirou/gopsutil/v4/mem" + "github.com/shirou/gopsutil/v4/process" + "log/slog" + "os" + "runtime/metrics" +) + +type MemMetrics struct { + RSSBytes uint64 + VMSBytes uint64 + SwapBytes uint64 + PercentageRssUsed float64 + LiveHeapBytes uint64 + HeapSizeBytes uint64 + HeapGoalBytes uint64 +} + +func MemoryMetrics() MemMetrics { + slog.Info("foo") + + var memStats MemMetrics + + // Get runtime metrics + // See https://github.com/golang/go/blob/master/src/runtime/metrics/doc.go + keys := []string{ + // Heap memory occupied by live objects that were marked by the previous GC. + "/gc/heap/live:bytes", + // Memory occupied by live objects and dead objects that have not + // yet been marked free by the garbage collector. + "/memory/classes/heap/objects:bytes", + // Heap size target for the end of the GC cycle. + "/gc/heap/goal:bytes", + } + sample := make([]metrics.Sample, len(keys)) + for i := range keys { + sample[i].Name = keys[i] + } + metrics.Read(sample) + + memStats.LiveHeapBytes = uint64SafeMetric(sample[0]) + memStats.HeapSizeBytes = uint64SafeMetric(sample[1]) + memStats.HeapGoalBytes = uint64SafeMetric(sample[2]) + + // Get OS memory metrics + pid := os.Getpid() + pid32, err := util.SafeConvertToInt32(pid) + if err != nil { + slog.Warn(err.Error(), slog.Int("pid", pid)) + return memStats + } + + proc, err := process.NewProcess(pid32) + if err != nil { + slog.Error("Failed to lookup process for poller", slogx.Err(err), slog.Int("pid", pid)) + return memStats + } + memInfo, err := proc.MemoryInfo() + if err != nil { + slog.Error("Failed to get memory info for poller", slogx.Err(err), slog.Int("pid", pid)) + return memStats + } + + // The unix poller used KB for memory so use the same here + memStats.RSSBytes = memInfo.RSS + memStats.VMSBytes = memInfo.VMS + memStats.SwapBytes = memInfo.Swap + + // Calculate memory percentage + memory, err := mem.VirtualMemory() + if err != nil { + slog.Error("Failed to get memory for machine", slogx.Err(err), slog.Int("pid", pid)) + return memStats + } + + memStats.PercentageRssUsed = float64(memInfo.RSS) / float64(memory.Total) * 100 + + return memStats +} + +// Return the uint64 value of a metric or zero +func uint64SafeMetric(sample metrics.Sample) uint64 { + if sample.Value.Kind() == metrics.KindBad { + return 0 + } + return sample.Value.Uint64() +} diff --git a/cmd/poller/poller.go b/cmd/poller/poller.go index c0dfa40eb..7bcdf8f82 100644 --- a/cmd/poller/poller.go +++ b/cmd/poller/poller.go @@ -59,8 +59,6 @@ import ( "github.com/netapp/harvest/v2/pkg/tree/node" "github.com/netapp/harvest/v2/pkg/util" goversion "github.com/netapp/harvest/v2/third_party/go-version" - "github.com/shirou/gopsutil/v4/mem" - "github.com/shirou/gopsutil/v4/process" "github.com/spf13/cobra" "gopkg.in/yaml.v3" "io" @@ -1408,41 +1406,16 @@ func (p *Poller) mergeConfPath() { func (p *Poller) addMemoryMetadata() { - pid := os.Getpid() - pid32, err := util.SafeConvertToInt32(pid) - if err != nil { - slog.Warn(err.Error(), slog.Int("pid", pid)) - return - } - - proc, err := process.NewProcess(pid32) - if err != nil { - slog.Error("Failed to lookup process for poller", slogx.Err(err), slog.Int("pid", pid)) - return - } - memInfo, err := proc.MemoryInfo() - if err != nil { - slog.Error("Failed to get memory info for poller", slogx.Err(err), slog.Int("pid", pid)) - return - } + memMetrics := collector.MemoryMetrics() // The unix poller used KB for memory so use the same here - _ = p.status.LazySetValueUint64("memory.rss", "host", memInfo.RSS/1024) - _ = p.status.LazySetValueUint64("memory.vms", "host", memInfo.VMS/1024) - _ = p.status.LazySetValueUint64("memory.swap", "host", memInfo.Swap/1024) - - // Calculate memory percentage - memory, err := mem.VirtualMemory() - if err != nil { - slog.Error("Failed to get memory for machine", slogx.Err(err), slog.Int("pid", pid)) - return - } - - memPercentage := float64(memInfo.RSS) / float64(memory.Total) * 100 - _ = p.status.LazySetValueFloat64("memory_percent", "host", memPercentage) + _ = p.status.LazySetValueUint64("memory.rss", "host", memMetrics.RSSBytes/1024) + _ = p.status.LazySetValueUint64("memory.vms", "host", memMetrics.VMSBytes/1024) + _ = p.status.LazySetValueUint64("memory.swap", "host", memMetrics.SwapBytes/1024) + _ = p.status.LazySetValueFloat64("memory_percent", "host", memMetrics.PercentageRssUsed) // Update maxRssBytes - p.maxRssBytes = max(p.maxRssBytes, memInfo.RSS) + p.maxRssBytes = max(p.maxRssBytes, memMetrics.RSSBytes) } func (p *Poller) logPollerMetadata() (map[string]*matrix.Matrix, error) { @@ -1451,19 +1424,26 @@ func (p *Poller) logPollerMetadata() (map[string]*matrix.Matrix, error) { slog.Error("Failed to send Harvest version", slogx.Err(err)) } - rss, _ := p.status.LazyGetValueFloat64("memory.rss", "host") remoteName := p.status.GetInstance("remote").GetLabel("name") remoteVersion := p.status.GetInstance("remote").GetLabel("version") + memMetrics := collector.MemoryMetrics() + p.maxRssBytes = max(p.maxRssBytes, memMetrics.RSSBytes) + slog.Info( "Metadata", - slog.Float64("rssMB", rss/1024), - slog.Uint64("maxRssMB", p.maxRssBytes/1024/1024), - slog.String("version", strings.TrimSpace(version.String())), slog.Group("remote", slog.String("name", remoteName), slog.String("version", remoteVersion), ), + slog.String("version", strings.TrimSpace(version.String())), + slog.Group("mem", + slog.Uint64("liveHeapMB", memMetrics.LiveHeapBytes/1024/1024), + slog.Uint64("heapMB", memMetrics.HeapSizeBytes/1024/1024), + slog.Uint64("heapGoalMB", memMetrics.HeapGoalBytes/1024/1024), + slog.Uint64("rssMB", memMetrics.RSSBytes/1024/1024), + slog.Uint64("maxRssMB", p.maxRssBytes/1024/1024), + ), slog.Uint64("uptimeSeconds", uint64(time.Since(p.startTime).Seconds())), )