From 766010a963872138fa75523d6ced5ae44b0e2633 Mon Sep 17 00:00:00 2001 From: Ryan Svihla <105286284+rsvihladremio@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:32:18 +0100 Subject: [PATCH] adding cgroupsv2 pressure stats (#274) * cleaned up the cgroupsv2 output * output memory, cpu, and io pressure container status * get load avg stats from the proc filesystem --- CHANGELOG.md | 21 ++++++++++++++++++++- cmd/local/local.go | 32 +++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2cf1e4..f05d035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [3.2.8] - 2024-12-18 + +### Added + +* store some very lightweight container cpu, mem, and disk usage statistics +* store cgroup version + +### Fixed + +* guard against integer overflow when copying files using k8s go api +* guard against reading malformed headers when extracting tar + +### Changed + +* updated libraries and go version +* updated linter and security checker + + ## [3.2.7] - 2024-10-09 ### Added @@ -797,7 +815,8 @@ someone has added the PAT which is always available ### Added - able to capture logs, configuration and diagnostic data from Dremio clusters deployed on Kubernetes and on-prem - + +[3.2.8]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.7...v3.2.8 [3.2.7]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.6...v3.2.7 [3.2.6]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.5...v3.2.6 [3.2.5]: https://github.com/dremio/dremio-diagnostic-collector/compare/v3.2.4...v3.2.5 diff --git a/cmd/local/local.go b/cmd/local/local.go index 2c86534..5a7ec2d 100644 --- a/cmd/local/local.go +++ b/cmd/local/local.go @@ -559,7 +559,7 @@ func runCollectOSConfig(c *conf.CollectConf, hook shutdown.CancelHook) error { simplelog.Warningf("unable to write lsblk for os_info.txt due to error %v", err) } const s = `stat -fc %T /sys/fs/cgroup/` - _, err = w.Write([]byte(s)) + _, err = w.Write([]byte(fmt.Sprintf("___\n>>> %v\n", s))) if err != nil { simplelog.Warningf("unable to write %s header for os_info.txt due to error %v", s, err) } @@ -568,6 +568,36 @@ func runCollectOSConfig(c *conf.CollectConf, hook shutdown.CancelHook) error { simplelog.Warningf("unable to write %s for os_info.txt due to error %v", s, err) } + // this only retrieves cgroupv2 files and will fail on cgroup1 and of course on prem + cgroupFiles := []string{ + "memory.current", + "memory.swap.current", + "memory.pressure", + "cpu.pressure", + "io.pressure", + } + for _, cgroupFile := range cgroupFiles { + commandToExecute := fmt.Sprintf("cat /sys/fs/cgroup/%v", cgroupFile) + _, err = w.Write([]byte(fmt.Sprintf("___\n>>> %v\n", commandToExecute))) + if err != nil { + simplelog.Warningf("unable to write %s header for os_info.txt due to error %v", commandToExecute, err) + } + err = ddcio.Shell(hook, w, commandToExecute) + if err != nil { + simplelog.Warningf("unable to write %s for os_info.txt due to error %v", commandToExecute, err) + } + } + + loadCommand := "cat /proc/loadavg" + _, err = w.Write([]byte(fmt.Sprintf("___\n>>> %v\n", loadCommand))) + if err != nil { + simplelog.Warningf("unable to write %s header for os_info.txt due to error %v", s, err) + } + err = ddcio.Shell(hook, w, loadCommand) + if err != nil { + simplelog.Warningf("unable to write %s for os_info.txt due to error %v", s, err) + } + if c.DremioPID() > 0 { _, err = w.Write([]byte("___\n>>> ps eww\n")) if err != nil {