Skip to content

Commit

Permalink
feat: Tolerate collector failures (#1769)
Browse files Browse the repository at this point in the history
Signed-off-by: Jan-Otto Kröpke <[email protected]>
  • Loading branch information
jkroepke authored Nov 25, 2024
1 parent fd76be3 commit 1a4c6c5
Show file tree
Hide file tree
Showing 121 changed files with 1,728 additions and 1,223 deletions.
5 changes: 0 additions & 5 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ linters:
- exportloopref
- fatcontext
- funlen
- gochecknoglobals
- gocognit
- goconst
- gocyclo
Expand Down Expand Up @@ -88,7 +87,3 @@ issues:
- text: "don't use ALL_CAPS in Go names; use CamelCase"
linters:
- revive
- path: internal/perfdata/v1/
linters:
- godox
- stylecheck
13 changes: 13 additions & 0 deletions .run/all.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="all" type="GoApplicationRunConfiguration" factoryName="Go Application" folderName="run">
<module name="windows_exporter" />
<working_directory value="$PROJECT_DIR$" />
<parameters value="--web.listen-address=127.0.0.1:9182 --log.level=debug --collectors.enabled=ad,adcs,adfs,cache,container,cpu,cpu_info,cs,dfsr,dhcp,diskdrive,dns,exchange,filetime,fsrmquota,hyperv,iis,license,logical_disk,logon,memory,mscluster,msmq,mssql,net,netframework,nps,os,pagefile,perfdata,physical_disk,printer,process,remote_fx,scheduled_task,service,smb,smbclient,smtp,system,tcp,terminal_services,textfile,thermalzone,time,udp,update,vmware" />
<sudo value="true" />
<kind value="PACKAGE" />
<package value="github.com/prometheus-community/windows_exporter/cmd/windows_exporter" />
<directory value="$PROJECT_DIR$" />
<filePath value="$PROJECT_DIR$/exporter.go" />
<method v="2" />
</configuration>
</component>
17 changes: 10 additions & 7 deletions cmd/windows_exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/prometheus-community/windows_exporter/internal/httphandler"
"github.com/prometheus-community/windows_exporter/internal/log"
"github.com/prometheus-community/windows_exporter/internal/log/flag"
"github.com/prometheus-community/windows_exporter/internal/utils"
"github.com/prometheus-community/windows_exporter/pkg/collector"
"github.com/prometheus/common/version"
"github.com/prometheus/exporter-toolkit/web"
Expand All @@ -64,6 +65,8 @@ func main() {
}

func run() int {
startTime := time.Now()

app := kingpin.New("windows_exporter", "A metrics collector for Windows.")

var (
Expand Down Expand Up @@ -191,7 +194,7 @@ func run() int {

enabledCollectorList := expandEnabledCollectors(*enabledCollectors)
if err := collectors.Enable(enabledCollectorList); err != nil {
logger.Error("Couldn't enable collectors",
logger.Error("couldn't enable collectors",
slog.Any("err", err),
)

Expand All @@ -200,11 +203,11 @@ func run() int {

// Initialize collectors before loading
if err = collectors.Build(logger); err != nil {
logger.Error("Couldn't load collectors",
slog.Any("err", err),
)

return 1
for _, err := range utils.SplitError(err) {
logger.Warn("couldn't initialize collector",
slog.Any("err", err),
)
}
}

logCurrentUser(logger)
Expand All @@ -228,7 +231,7 @@ func run() int {
mux.HandleFunc("GET /debug/pprof/trace", pprof.Trace)
}

logger.Info("Starting windows_exporter",
logger.Info(fmt.Sprintf("starting windows_exporter in %s", time.Since(startTime)),
slog.String("version", version.Version),
slog.String("branch", version.Branch),
slog.String("revision", version.GetRevision()),
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/ad/ad.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package ad

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "ad"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -671,7 +671,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
data, ok := perfData["NTDS"]

if !ok {
return errors.New("perflib query for DirectoryServices (AD) returned empty result set")
return fmt.Errorf("failed to collect DirectoryServices (AD) metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/adcs/adcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package adcs

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -32,6 +31,7 @@ const Name = "adcs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -191,7 +191,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for Certification Authority (ADCS) returned empty result set")
return fmt.Errorf("failed to collect Certification Authority (ADCS) metrics: %w", types.ErrNoData)
}

for name, data := range perfData {
Expand Down
8 changes: 4 additions & 4 deletions internal/collector/adfs/adfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package adfs

import (
"errors"
"fmt"
"log/slog"
"maps"
Expand All @@ -34,6 +33,7 @@ const Name = "adfs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down Expand Up @@ -160,7 +160,7 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
avgConfigDBQueryTime,
federationMetadataRequests,
})
if err != nil && !errors.Is(err, perfdata.ErrNoData) {
if err != nil {
return fmt.Errorf("failed to create AD FS collector: %w", err)
}

Expand Down Expand Up @@ -435,13 +435,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
instanceKey := slices.Collect(maps.Keys(data))

if len(instanceKey) == 0 {
return errors.New("perflib query for ADFS returned empty result set")
return fmt.Errorf("failed to collect ADFS metrics: %w", types.ErrNoData)
}

adfsData, ok := data[instanceKey[0]]

if !ok {
return errors.New("perflib query for ADFS returned empty result set")
return fmt.Errorf("failed to collect ADFS metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package cache

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "cache"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for Perflib Cache metrics.
Expand Down Expand Up @@ -322,7 +322,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
cacheData, ok := data[perfdata.InstanceEmpty]

if !ok {
return errors.New("perflib query for Cache returned empty result set")
return fmt.Errorf("failed to collect Cache metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
1 change: 1 addition & 0 deletions internal/collector/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const Name = "container"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for containers metrics.
Expand Down
1 change: 1 addition & 0 deletions internal/collector/cpu/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const Name = "cpu"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

type Collector struct {
Expand Down
6 changes: 6 additions & 0 deletions internal/collector/cpu_info/cpu_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const Name = "cpu_info"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for a few WMI metrics in Win32_Processor.
Expand Down Expand Up @@ -147,6 +148,11 @@ func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error {
nil,
)

var dst []miProcessor
if err := c.miSession.Query(&dst, mi.NamespaceRootCIMv2, c.miQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/collector/cs/cs.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const Name = "cs"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for WMI metrics.
Expand Down
2 changes: 1 addition & 1 deletion internal/collector/dfsr/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ const (
databaseLookupsTotal = "Database Lookups"
usnJournalRecordsReadTotal = "USN Journal Records Read"
usnJournalRecordsAcceptedTotal = "USN Journal Records Accepted"
usnJournalUnreadPercentage = "USN Journal Records Unread Percentage"
usnJournalUnreadPercentage = "USN Journal Unread Percentage"
)
7 changes: 4 additions & 3 deletions internal/collector/dfsr/dfsr.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}

//nolint:gochecknoglobals
var ConfigDefaults = Config{
CollectorsEnabled: []string{"connection", "folder", "volume"},
}
Expand Down Expand Up @@ -542,7 +543,7 @@ func (c *Collector) collectPDHConnection(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replication Connections returned empty result set")
return fmt.Errorf("failed to collect DFS Replication Connections metrics: %w", types.ErrNoData)
}

for name, connection := range perfData {
Expand Down Expand Up @@ -620,7 +621,7 @@ func (c *Collector) collectPDHFolder(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replicated Folders returned empty result set")
return fmt.Errorf("failed to collect DFS Replicated Folders metrics: %w", types.ErrNoData)
}

for name, folder := range perfData {
Expand Down Expand Up @@ -824,7 +825,7 @@ func (c *Collector) collectPDHVolume(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for DFS Replication Volumes returned empty result set")
return fmt.Errorf("failed to collect DFS Replication Volumes metrics: %w", types.ErrNoData)
}

for name, volume := range perfData {
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/dhcp/dhcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package dhcp

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "dhcp"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector perflib DHCP metrics.
Expand Down Expand Up @@ -288,7 +288,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {

data, ok := perfData[perfdata.InstanceEmpty]
if !ok {
return errors.New("perflib query for DHCP Server returned empty result set")
return fmt.Errorf("failed to collect DHCP Server metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
7 changes: 7 additions & 0 deletions internal/collector/diskdrive/diskdrive.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const Name = "diskdrive"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for a few WMI metrics in Win32_DiskDrive.
Expand Down Expand Up @@ -119,6 +120,11 @@ func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error {
nil,
)

var dst []diskDrive
if err := c.miSession.Query(&dst, mi.NamespaceRootCIMv2, c.miQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}

return nil
}

Expand All @@ -133,6 +139,7 @@ type diskDrive struct {
Availability uint16 `mi:"Availability"`
}

//nolint:gochecknoglobals
var (
allDiskStatus = []string{
"OK",
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/dns/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package dns

import (
"errors"
"fmt"
"log/slog"

Expand All @@ -31,6 +30,7 @@ const Name = "dns"

type Config struct{}

//nolint:gochecknoglobals
var ConfigDefaults = Config{}

// A Collector is a Prometheus Collector for WMI Win32_PerfRawData_DNS_DNS metrics.
Expand Down Expand Up @@ -284,7 +284,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {

data, ok := perfData[perfdata.InstanceEmpty]
if !ok {
return errors.New("perflib query for DNS returned empty result set")
return fmt.Errorf("failed to collect DNS metrics: %w", types.ErrNoData)
}

ch <- prometheus.MustNewConstMetric(
Expand Down
1 change: 1 addition & 0 deletions internal/collector/exchange/exchange.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}

//nolint:gochecknoglobals
var ConfigDefaults = Config{
CollectorsEnabled: []string{
adAccessProcesses,
Expand Down
3 changes: 1 addition & 2 deletions internal/collector/exchange/exchange_active_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package exchange

import (
"errors"
"fmt"

"github.com/prometheus-community/windows_exporter/internal/perfdata"
Expand Down Expand Up @@ -73,7 +72,7 @@ func (c *Collector) collectActiveSync(ch chan<- prometheus.Metric) error {
}

if len(perfData) == 0 {
return errors.New("perflib query for MSExchange ActiveSync returned empty result set")
return fmt.Errorf("failed to collect MSExchange ActiveSync metrics: %w", types.ErrNoData)
}

for _, data := range perfData {
Expand Down
Loading

0 comments on commit 1a4c6c5

Please sign in to comment.