Skip to content

Commit

Permalink
feat(inputs.ipmi_sensor): Collect additional commands (influxdata#15495)
Browse files Browse the repository at this point in the history
  • Loading branch information
powersj authored Jun 18, 2024
1 parent 784ede9 commit 6fb4276
Show file tree
Hide file tree
Showing 4 changed files with 301 additions and 65 deletions.
59 changes: 38 additions & 21 deletions plugins/inputs/ipmi_sensor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,49 +44,66 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
```toml @sample.conf
# Read metrics from the bare metal servers via IPMI
[[inputs.ipmi_sensor]]
## optionally specify the path to the ipmitool executable
## Specify the path to the ipmitool executable
# path = "/usr/bin/ipmitool"
##

## Use sudo
## Setting 'use_sudo' to true will make use of sudo to run ipmitool.
## Sudo must be configured to allow the telegraf user to run ipmitool
## without a password.
# use_sudo = false
##
## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR
# privilege = "ADMINISTRATOR"
##
## optionally specify one or more servers via a url matching

## Servers
## Specify one or more servers via a url. If no servers are specified, local
## machine sensor stats will be queried. Uses the format:
## [username[:password]@][protocol[(address)]]
## e.g.
## root:passwd@lan(127.0.0.1)
##
## if no servers are specified, local machine sensor stats will be queried
##
## e.g. root:passwd@lan(127.0.0.1)
# servers = ["USERID:PASSW0RD@lan(192.168.1.1)"]

## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid
## gaps or overlap in pulled data
interval = "30s"
## Session privilege level
## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR
# privilege = "ADMINISTRATOR"

## Timeout
## Timeout for the ipmitool command to complete.
# timeout = "20s"

## Timeout for the ipmitool command to complete. Default is 20 seconds.
timeout = "20s"
## Metric schema version
## See the plugin readme for more information on schema versioning.
# metric_version = 1

## Schema Version: (Optional, defaults to version 1)
metric_version = 2
## Sensors to collect
## Choose from:
## * sdr: default, collects sensor data records
## * chassis_power_status: collects the power status of the chassis
## * dcmi_power_reading: collects the power readings from the Data Center Management Interface
# sensors = ["sdr"]

## Hex key
## Optionally provide the hex key for the IMPI connection.
# hex_key = ""

## Cache
## If ipmitool should use a cache
## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04)
## the cache file may not work well for you if some sensors come up late
## Using a cache can speed up collection times depending on your device.
# use_cache = false

## Path to the ipmitools cache file (defaults to OS temp dir)
## The provided path must exist and must be writable
# cache_path = ""
```

## Sensors

By default the plugin collects data via the `sdr` command and returns those
values. However, there are additonal sensor options that be call on:

- `chassis_power_status` - returns 0 or 1 depending on the output of
`chassis power status`
- `dcmi_power_reading` - Returns the watt values from `dcmi power reading`

These sensor options are not affected by the metric version.

## Metrics

Version 1 schema:
Expand Down
137 changes: 114 additions & 23 deletions plugins/inputs/ipmi_sensor/ipmi_sensor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
)

Expand All @@ -31,21 +32,22 @@ var (
reV2ParseLine = regexp.MustCompile(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`)
reV2ParseDescription = regexp.MustCompile(`^(?P<analogValue>-?[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`)
reV2ParseUnit = regexp.MustCompile(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`)
dcmiPowerReading = regexp.MustCompile(`^(?P<name>[^|]*)\:(?P<value>.* Watts)?`)
)

// Ipmi stores the configuration values for the ipmi_sensor input plugin
type Ipmi struct {
Path string
Privilege string
HexKey string `toml:"hex_key"`
Servers []string
Timeout config.Duration
MetricVersion int
UseSudo bool
UseCache bool
CachePath string

Log telegraf.Logger `toml:"-"`
Path string `toml:"path"`
Privilege string `toml:"privilege"`
HexKey string `toml:"hex_key"`
Servers []string `toml:"servers"`
Sensors []string `toml:"sensors"`
Timeout config.Duration `toml:"timeout"`
MetricVersion int `toml:"metric_version"`
UseSudo bool `toml:"use_sudo"`
UseCache bool `toml:"use_cache"`
CachePath string `toml:"cache_path"`
Log telegraf.Logger `toml:"-"`
}

const cmd = "ipmitool"
Expand All @@ -66,6 +68,12 @@ func (m *Ipmi) Init() error {
if m.CachePath == "" {
m.CachePath = os.TempDir()
}
if len(m.Sensors) == 0 {
m.Sensors = []string{"sdr"}
}
if err := choice.CheckSlice(m.Sensors, []string{"sdr", "chassis_power_status", "dcmi_power_reading"}); err != nil {
return err
}

// Check parameters
if m.Path == "" {
Expand All @@ -87,32 +95,47 @@ func (m *Ipmi) Gather(acc telegraf.Accumulator) error {
wg.Add(1)
go func(a telegraf.Accumulator, s string) {
defer wg.Done()
err := m.parse(a, s)
if err != nil {
a.AddError(err)
for _, sensor := range m.Sensors {
a.AddError(m.parse(a, s, sensor))
}
}(acc, server)
}
wg.Wait()
} else {
err := m.parse(acc, "")
if err != nil {
return err
for _, sensor := range m.Sensors {
err := m.parse(acc, "", sensor)
if err != nil {
return err
}
}
}

return nil
}

func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
func (m *Ipmi) parse(acc telegraf.Accumulator, server string, sensor string) error {
var command []string
switch sensor {
case "sdr":
command = append(command, "sdr")
case "chassis_power_status":
command = append(command, "chassis", "power", "status")
case "dcmi_power_reading":
command = append(command, "dcmi", "power", "reading")
default:
return fmt.Errorf("unknown sensor type %q", sensor)
}

opts := make([]string, 0)
hostname := ""
if server != "" {
conn := NewConnection(server, m.Privilege, m.HexKey)
hostname = conn.Hostname
opts = conn.options()
}
opts = append(opts, "sdr")

opts = append(opts, command...)

if m.UseCache {
cacheFile := filepath.Join(m.CachePath, server+"_ipmi_cache")
_, err := os.Stat(cacheFile)
Expand All @@ -134,7 +157,7 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
}
opts = append(opts, "-S", cacheFile)
}
if m.MetricVersion == 2 {
if m.MetricVersion == 2 && sensor == "sdr" {
opts = append(opts, "elist")
}
name := m.Path
Expand All @@ -149,10 +172,78 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
if err != nil {
return fmt.Errorf("failed to run command %q: %w - %s", strings.Join(sanitizeIPMICmd(cmd.Args), " "), err, string(out))
}
if m.MetricVersion == 2 {
return m.parseV2(acc, hostname, out, timestamp)

switch sensor {
case "sdr":
if m.MetricVersion == 2 {
return m.parseV2(acc, hostname, out, timestamp)
} else {
return m.parseV1(acc, hostname, out, timestamp)
}
case "chassis_power_status":
return m.parseChassisPowerStatus(acc, hostname, out, timestamp)
case "dcmi_power_reading":
return m.parseDCMIPowerReading(acc, hostname, out, timestamp)
}

return fmt.Errorf("unknown sensor type %q", sensor)
}

func (m *Ipmi) parseChassisPowerStatus(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
// each line will look something like
// Chassis Power is on
// Chassis Power is off
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "Chassis Power is on") {
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 1}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
} else if strings.Contains(line, "Chassis Power is off") {
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 0}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
}
}

return scanner.Err()
}

func (m *Ipmi) parseDCMIPowerReading(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
// each line will look something like
// Current Power Reading : 0.000
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for scanner.Scan() {
ipmiFields := m.extractFieldsFromRegex(dcmiPowerReading, scanner.Text())
if len(ipmiFields) != 2 {
continue
}

tags := map[string]string{
"name": transform(ipmiFields["name"]),
}

// tag the server is we have one
if hostname != "" {
tags["server"] = hostname
}

fields := make(map[string]interface{})
valunit := strings.Split(ipmiFields["value"], " ")
if len(valunit) != 2 {
continue
}

var err error
fields["value"], err = aToFloat(valunit[0])
if err != nil {
continue
}
if len(valunit) > 1 {
tags["unit"] = transform(valunit[1])
}

acc.AddFields("ipmi_sensor", fields, tags, measuredAt)
}
return m.parseV1(acc, hostname, out, timestamp)

return scanner.Err()
}

func (m *Ipmi) parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
Expand Down
Loading

0 comments on commit 6fb4276

Please sign in to comment.