From 3a012b5bb1139b17d79203e03360cde5d3d3d951 Mon Sep 17 00:00:00 2001 From: mort Date: Fri, 8 Mar 2024 15:39:33 +0100 Subject: [PATCH] Implemented new features - extract raid member disk name. Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc. Signed-off-by: Denys --- .gitignore | 1 + main.go | 51 +++++++++++++++++++++++++++++++++++++++------------ readjson.go | 34 +++++++++++++++++----------------- smartctl.go | 13 ++++++++++++- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 930cf26..357e66f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /.release /.tarballs debug/ +.idea/ Manifest smartctl_exporter diff --git a/main.go b/main.go index 0048098..7f55518 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,7 @@ package main import ( "net/http" "os" + "strings" "sync" "time" @@ -32,11 +33,18 @@ import ( webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag" ) +// Device +type Device struct { + Name string `json:"name"` + Info_Name string `json:"info_name"` + Type string `json:"type"` +} + // SMARTctlManagerCollector implements the Collector interface. type SMARTctlManagerCollector struct { CollectPeriod string CollectPeriodDuration time.Duration - Devices []string + Devices []Device logger log.Logger mutex sync.Mutex @@ -106,24 +114,43 @@ var ( ) // scanDevices uses smartctl to gather the list of available devices. -func scanDevices(logger log.Logger) []string { +func scanDevices(logger log.Logger) []Device { filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude) json := readSMARTctlDevices(logger) scanDevices := json.Get("devices").Array() - var scanDeviceResult []string + var scanDeviceResult []Device for _, d := range scanDevices { - deviceName := d.Get("name").String() + deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String())) if filter.ignored(deviceName) { level.Info(logger).Log("msg", "Ignoring device", "name", deviceName) } else { level.Info(logger).Log("msg", "Found device", "name", deviceName) - scanDeviceResult = append(scanDeviceResult, deviceName) + device := Device{ + Name: d.Get("name").String(), + Info_Name: deviceName, + Type: d.Get("type").String(), + } + scanDeviceResult = append(scanDeviceResult, device) } } return scanDeviceResult } +func filterDevices(logger log.Logger, devices []Device, filters []string) []Device { + var filtered []Device + for _, d := range devices { + for _, filter := range filters { + level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter) + if strings.Contains(d.Info_Name, filter) { + filtered = append(filtered, d) + break + } + } + } + return filtered +} + func main() { metricsPath := kingpin.Flag( "web.telemetry-path", "Path under which to expose metrics", @@ -140,13 +167,13 @@ func main() { level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info()) level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext()) - var devices []string + var devices []Device + devices = scanDevices(logger) + level.Info(logger).Log("msg", "Number of devices found", "count", len(devices)) if len(*smartctlDevices) > 0 { - devices = *smartctlDevices - } else { - level.Info(logger).Log("msg", "No devices specified, trying to load them automatically") - devices = scanDevices(logger) - level.Info(logger).Log("msg", "Number of devices found", "count", len(devices)) + level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", ")) + devices = filterDevices(logger, devices, *smartctlDevices) + level.Info(logger).Log("msg", "Devices filtered", "count", len(devices)) } collector := SMARTctlManagerCollector{ @@ -154,7 +181,7 @@ func main() { logger: logger, } - if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 { + if *smartctlRescanInterval >= 1*time.Second { level.Info(logger).Log("msg", "Start background scan process") level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval) go collector.RescanForDevices() diff --git a/readjson.go b/readjson.go index 77f2b2d..3f6b3ee 100644 --- a/readjson.go +++ b/readjson.go @@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result { } // Reading fake smartctl json -func readFakeSMARTctl(logger log.Logger, device string) gjson.Result { - s := strings.Split(device, "/") +func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result { + s := strings.Split(device.Name, "/") filename := fmt.Sprintf("debug/%s.json", s[len(s)-1]) level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename) jsonFile, err := os.ReadFile(filename) @@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result { } // Get json from smartctl and parse it -func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) { +func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) { start := time.Now() - out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output() + out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output() if err != nil { - level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device) + level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name) } json := parseJSON(string(out)) rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int()) jsonOk := jsonIsOk(logger, json) - level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start)) + level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start)) return json, rcOk && jsonOk } @@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result { } // Select json source and parse -func readData(logger log.Logger, device string) gjson.Result { +func readData(logger log.Logger, device Device) gjson.Result { if *smartctlFakeData { return readFakeSMARTctl(logger, device) } @@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result { jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()}) j, found := jsonCache.Load(device) if !found { - level.Warn(logger).Log("msg", "device not found", "device", device) + level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name) } return j.(JSONCache).JSON } @@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result { } // Parse smartctl return code -func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool { +func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool { result := true if SMARTCtlResult > 0 { b := SMARTCtlResult if (b & 1) != 0 { - level.Error(logger).Log("msg", "Command line did not parse", "device", device) + level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name) result = false } if (b & (1 << 1)) != 0 { - level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device) + level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name) result = false } if (b & (1 << 2)) != 0 { - level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device) + level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name) } if (b & (1 << 3)) != 0 { - level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device) + level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name) } if (b & (1 << 4)) != 0 { - level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device) + level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name) } if (b & (1 << 5)) != 0 { - level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device) + level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name) } if (b & (1 << 6)) != 0 { - level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device) + level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name) } if (b & (1 << 7)) != 0 { - level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device) + level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name) } } return result diff --git a/smartctl.go b/smartctl.go index d308d63..af44f02 100644 --- a/smartctl.go +++ b/smartctl.go @@ -15,6 +15,7 @@ package main import ( "fmt" + "regexp" "strings" "github.com/go-kit/log" @@ -42,6 +43,16 @@ type SMARTctl struct { device SMARTDevice } +func extractDiskName(input string) string { + re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P[a-z0-9_]+)(?:\].*|)$`) + match := re.FindStringSubmatch(input) + + if len(match) > 0 { + return match[re.SubexpIndex("disk")] + } + return "" +} + // NewSMARTctl is smartctl constructor func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl { var model_name string @@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr json: json, logger: logger, device: SMARTDevice{ - device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"), + device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())), serial: strings.TrimSpace(json.Get("serial_number").String()), family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")), model: strings.TrimSpace(model_name),