Skip to content

Commit

Permalink
Implemented new features - extract raid member disk name.
Browse files Browse the repository at this point in the history
Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc.

Signed-off-by: Denys <[email protected]>
  • Loading branch information
mort authored and zxzharmlesszxz committed Mar 8, 2024
1 parent 84d8cc3 commit 3a012b5
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 30 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/.release
/.tarballs
debug/
.idea/

Manifest
smartctl_exporter
Expand Down
51 changes: 39 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package main
import (
"net/http"
"os"
"strings"
"sync"
"time"

Expand All @@ -32,11 +33,18 @@ import (
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
)

// Device
type Device struct {
Name string `json:"name"`
Info_Name string `json:"info_name"`
Type string `json:"type"`
}

// SMARTctlManagerCollector implements the Collector interface.
type SMARTctlManagerCollector struct {
CollectPeriod string
CollectPeriodDuration time.Duration
Devices []string
Devices []Device

logger log.Logger
mutex sync.Mutex
Expand Down Expand Up @@ -106,24 +114,43 @@ var (
)

// scanDevices uses smartctl to gather the list of available devices.
func scanDevices(logger log.Logger) []string {
func scanDevices(logger log.Logger) []Device {
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)

json := readSMARTctlDevices(logger)
scanDevices := json.Get("devices").Array()
var scanDeviceResult []string
var scanDeviceResult []Device
for _, d := range scanDevices {
deviceName := d.Get("name").String()
deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
if filter.ignored(deviceName) {
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
} else {
level.Info(logger).Log("msg", "Found device", "name", deviceName)
scanDeviceResult = append(scanDeviceResult, deviceName)
device := Device{
Name: d.Get("name").String(),
Info_Name: deviceName,
Type: d.Get("type").String(),
}
scanDeviceResult = append(scanDeviceResult, device)
}
}
return scanDeviceResult
}

func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
var filtered []Device
for _, d := range devices {
for _, filter := range filters {
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
if strings.Contains(d.Info_Name, filter) {
filtered = append(filtered, d)
break
}
}
}
return filtered
}

func main() {
metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics",
Expand All @@ -140,21 +167,21 @@ func main() {
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())

var devices []string
var devices []Device
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
if len(*smartctlDevices) > 0 {
devices = *smartctlDevices
} else {
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically")
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
devices = filterDevices(logger, devices, *smartctlDevices)
level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
}

collector := SMARTctlManagerCollector{
Devices: devices,
logger: logger,
}

if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 {
if *smartctlRescanInterval >= 1*time.Second {
level.Info(logger).Log("msg", "Start background scan process")
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
go collector.RescanForDevices()
Expand Down
34 changes: 17 additions & 17 deletions readjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
}

// Reading fake smartctl json
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
s := strings.Split(device, "/")
func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
s := strings.Split(device.Name, "/")
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
jsonFile, err := os.ReadFile(filename)
Expand All @@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
}

// Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output()
if err != nil {
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device)
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
}
json := parseJSON(string(out))
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk
}

Expand All @@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
}

// Select json source and parse
func readData(logger log.Logger, device string) gjson.Result {
func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData {
return readFakeSMARTctl(logger, device)
}
Expand All @@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device)
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
}
return j.(JSONCache).JSON
}
Expand All @@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
}

// Parse smartctl return code
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool {
func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
result := true
if SMARTCtlResult > 0 {
b := SMARTCtlResult
if (b & 1) != 0 {
level.Error(logger).Log("msg", "Command line did not parse", "device", device)
level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
result = false
}
if (b & (1 << 1)) != 0 {
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
result = false
}
if (b & (1 << 2)) != 0 {
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
}
if (b & (1 << 3)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
}
if (b & (1 << 4)) != 0 {
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device)
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
}
if (b & (1 << 5)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
}
if (b & (1 << 6)) != 0 {
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device)
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
}
if (b & (1 << 7)) != 0 {
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
}
}
return result
Expand Down
13 changes: 12 additions & 1 deletion smartctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package main

import (
"fmt"
"regexp"
"strings"

"github.com/go-kit/log"
Expand Down Expand Up @@ -42,6 +43,16 @@ type SMARTctl struct {
device SMARTDevice
}

func extractDiskName(input string) string {
re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
match := re.FindStringSubmatch(input)

if len(match) > 0 {
return match[re.SubexpIndex("disk")]
}
return ""
}

// NewSMARTctl is smartctl constructor
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
var model_name string
Expand All @@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
json: json,
logger: logger,
device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(model_name),
Expand Down

0 comments on commit 3a012b5

Please sign in to comment.