From 31bcf424731f812b66809e59152455c2fca30b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Thu, 14 Nov 2024 00:06:22 +0100 Subject: [PATCH] system: refactor collector (#1730) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- exporter.go | 12 +-- internal/collector/cpu/cpu.go | 2 +- internal/collector/system/const.go | 11 +++ internal/collector/system/system.go | 78 ++++++++++--------- internal/collector/terminal_services/const.go | 36 ++++----- .../terminal_services/terminal_services.go | 72 ++++++++--------- internal/perfdata/perftypes/const.go | 4 +- internal/perfdata/v2/collector.go | 46 +++++------ internal/perfdata/v2/pdh.go | 2 +- 9 files changed, 138 insertions(+), 125 deletions(-) create mode 100644 internal/collector/system/const.go diff --git a/exporter.go b/exporter.go index 9dc55210e..305af58c2 100644 --- a/exporter.go +++ b/exporter.go @@ -179,6 +179,12 @@ func run() int { logger.Debug("Logging has Started") + if v, ok := os.LookupEnv("WINDOWS_EXPORTER_PERF_COUNTERS_ENGINE"); ok && v == "pdh" || *togglePDH == "pdh" { + logger.Info("Using performance data helper from PHD.dll for performance counter collection. This is in experimental state.") + + toggle.PHDEnabled = true + } + if *printCollectors { printCollectorsToStdout() @@ -221,12 +227,6 @@ func run() int { logger.Info("Enabled collectors: " + strings.Join(enabledCollectorList, ", ")) - if v, ok := os.LookupEnv("WINDOWS_EXPORTER_PERF_COUNTERS_ENGINE"); ok && v == "pdh" || *togglePDH == "pdh" { - logger.Info("Using performance data helper from PHD.dll for performance counter collection. This is in experimental state.") - - toggle.PHDEnabled = true - } - mux := http.NewServeMux() mux.Handle("GET /health", httphandler.NewHealthHandler()) mux.Handle("GET /version", httphandler.NewVersionHandler()) diff --git a/internal/collector/cpu/cpu.go b/internal/collector/cpu/cpu.go index 628d05dda..36e7104a4 100644 --- a/internal/collector/cpu/cpu.go +++ b/internal/collector/cpu/cpu.go @@ -109,7 +109,7 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { var err error - c.perfDataCollector, err = perfdata.NewCollector(perfdata.V1, "Processor Information", perfdata.AllInstances, counters) + c.perfDataCollector, err = perfdata.NewCollector(perfdata.V2, "Processor Information", perfdata.AllInstances, counters) if err != nil { return fmt.Errorf("failed to create Processor Information collector: %w", err) } diff --git a/internal/collector/system/const.go b/internal/collector/system/const.go new file mode 100644 index 000000000..069ba842a --- /dev/null +++ b/internal/collector/system/const.go @@ -0,0 +1,11 @@ +package system + +const ( + ContextSwitchesPersec = "Context Switches/sec" + ExceptionDispatchesPersec = "Exception Dispatches/sec" + ProcessorQueueLength = "Processor Queue Length" + SystemCallsPersec = "System Calls/sec" + SystemUpTime = "System Up Time" + Processes = "Processes" + Threads = "Threads" +) diff --git a/internal/collector/system/system.go b/internal/collector/system/system.go index 4dd70ccaf..c6a7f4c39 100644 --- a/internal/collector/system/system.go +++ b/internal/collector/system/system.go @@ -4,11 +4,13 @@ package system import ( "errors" + "fmt" "log/slog" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/windows_exporter/internal/mi" - v1 "github.com/prometheus-community/windows_exporter/internal/perfdata/v1" + "github.com/prometheus-community/windows_exporter/internal/perfdata" + "github.com/prometheus-community/windows_exporter/internal/perfdata/perftypes" "github.com/prometheus-community/windows_exporter/internal/types" "github.com/prometheus/client_golang/prometheus" ) @@ -23,6 +25,8 @@ var ConfigDefaults = Config{} type Collector struct { config Config + perfDataCollector perfdata.Collector + contextSwitchesTotal *prometheus.Desc exceptionDispatchesTotal *prometheus.Desc processorQueueLength *prometheus.Desc @@ -54,7 +58,7 @@ func (c *Collector) GetName() string { } func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) { - return []string{"System"}, nil + return []string{}, nil } func (c *Collector) Close(_ *slog.Logger) error { @@ -62,6 +66,23 @@ func (c *Collector) Close(_ *slog.Logger) error { } func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { + counters := []string{ + ContextSwitchesPersec, + ExceptionDispatchesPersec, + ProcessorQueueLength, + SystemCallsPersec, + SystemUpTime, + Processes, + Threads, + } + + var err error + + c.perfDataCollector, err = perfdata.NewCollector(perfdata.V2, "System", nil, counters) + if err != nil { + return fmt.Errorf("failed to create System collector: %w", err) + } + c.contextSwitchesTotal = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "context_switches_total"), "Total number of context switches (WMI source: PerfOS_System.ContextSwitchesPersec)", @@ -117,78 +138,59 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { // Collect sends the metric values for each metric // to the provided prometheus Metric channel. -func (c *Collector) Collect(ctx *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error { - logger = logger.With(slog.String("collector", Name)) - if err := c.collect(ctx, logger, ch); err != nil { - logger.Error("failed collecting system metrics", - slog.Any("err", err), - ) - - return err +func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error { + if err := c.collect(ch); err != nil { + return fmt.Errorf("failed collecting system metrics: %w", err) } return nil } -// Win32_PerfRawData_PerfOS_System docs: -// - https://web.archive.org/web/20050830140516/http://msdn.microsoft.com/library/en-us/wmisdk/wmi/win32_perfrawdata_perfos_system.asp -type system struct { - ContextSwitchesPersec float64 `perflib:"Context Switches/sec"` - ExceptionDispatchesPersec float64 `perflib:"Exception Dispatches/sec"` - ProcessorQueueLength float64 `perflib:"Processor Queue Length"` - SystemCallsPersec float64 `perflib:"System Calls/sec"` - SystemUpTime float64 `perflib:"System Up Time"` - Processes float64 `perflib:"Processes"` - Threads float64 `perflib:"Threads"` -} - -func (c *Collector) collect(ctx *types.ScrapeContext, logger *slog.Logger, ch chan<- prometheus.Metric) error { - logger = logger.With(slog.String("collector", Name)) - - var dst []system - - if err := v1.UnmarshalObject(ctx.PerfObjects["System"], &dst, logger); err != nil { - return err +func (c *Collector) collect(ch chan<- prometheus.Metric) error { + perfData, err := c.perfDataCollector.Collect() + if err != nil { + return fmt.Errorf("failed to collect System metrics: %w", err) } - if len(dst) == 0 { - return errors.New("no data returned from Performance Counter") + data, ok := perfData[perftypes.EmptyInstance] + if !ok { + return errors.New("query for System returned empty result set") } ch <- prometheus.MustNewConstMetric( c.contextSwitchesTotal, prometheus.CounterValue, - dst[0].ContextSwitchesPersec, + data[ContextSwitchesPersec].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.exceptionDispatchesTotal, prometheus.CounterValue, - dst[0].ExceptionDispatchesPersec, + data[ExceptionDispatchesPersec].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.processorQueueLength, prometheus.GaugeValue, - dst[0].ProcessorQueueLength, + data[ProcessorQueueLength].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.processes, prometheus.GaugeValue, - dst[0].Processes, + data[Processes].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.systemCallsTotal, prometheus.CounterValue, - dst[0].SystemCallsPersec, + data[SystemCallsPersec].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.systemUpTime, prometheus.GaugeValue, - dst[0].SystemUpTime, + data[SystemUpTime].FirstValue, ) ch <- prometheus.MustNewConstMetric( c.threads, prometheus.GaugeValue, - dst[0].Threads, + data[Threads].FirstValue, ) // Windows has no defined limit, and is based off available resources. This currently isn't calculated by WMI and is set to default value. diff --git a/internal/collector/terminal_services/const.go b/internal/collector/terminal_services/const.go index a4176ad21..656e93e94 100644 --- a/internal/collector/terminal_services/const.go +++ b/internal/collector/terminal_services/const.go @@ -1,23 +1,23 @@ package terminal_services const ( - HandleCount = "Handle Count" - PageFaultsPersec = "Page Faults/sec" - PageFileBytes = "Page File Bytes" - PageFileBytesPeak = "Page File Bytes Peak" - PercentPrivilegedTime = "% Privileged Time" - PercentProcessorTime = "% Processor Time" - PercentUserTime = "% User Time" - PoolNonpagedBytes = "Pool Nonpaged Bytes" - PoolPagedBytes = "Pool Paged Bytes" - PrivateBytes = "Private Bytes" - ThreadCount = "Thread Count" - VirtualBytes = "Virtual Bytes" - VirtualBytesPeak = "Virtual Bytes Peak" - WorkingSet = "Working Set" - WorkingSetPeak = "Working Set Peak" + handleCount = "Handle Count" + pageFaultsPersec = "Page Faults/sec" + pageFileBytes = "Page File Bytes" + pageFileBytesPeak = "Page File Bytes Peak" + percentPrivilegedTime = "% Privileged Time" + percentProcessorTime = "% Processor Time" + percentUserTime = "% User Time" + poolNonpagedBytes = "Pool Nonpaged Bytes" + poolPagedBytes = "Pool Paged Bytes" + privateBytes = "Private Bytes" + threadCount = "Thread Count" + virtualBytes = "Virtual Bytes" + virtualBytesPeak = "Virtual Bytes Peak" + workingSet = "Working Set" + workingSetPeak = "Working Set Peak" - SuccessfulConnections = "Successful Connections" - PendingConnections = "Pending Connections" - FailedConnections = "Failed Connections" + successfulConnections = "Successful Connections" + pendingConnections = "Pending Connections" + failedConnections = "Failed Connections" ) diff --git a/internal/collector/terminal_services/terminal_services.go b/internal/collector/terminal_services/terminal_services.go index c9b5e1b6d..5b3dbe4e2 100644 --- a/internal/collector/terminal_services/terminal_services.go +++ b/internal/collector/terminal_services/terminal_services.go @@ -126,21 +126,21 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error { logger = logger.With(slog.String("collector", Name)) counters := []string{ - HandleCount, - PageFaultsPersec, - PageFileBytes, - PageFileBytesPeak, - PercentPrivilegedTime, - PercentProcessorTime, - PercentUserTime, - PoolNonpagedBytes, - PoolPagedBytes, - PrivateBytes, - ThreadCount, - VirtualBytes, - VirtualBytesPeak, - WorkingSet, - WorkingSetPeak, + handleCount, + pageFaultsPersec, + pageFileBytes, + pageFileBytesPeak, + percentPrivilegedTime, + percentProcessorTime, + percentUserTime, + poolNonpagedBytes, + poolPagedBytes, + privateBytes, + threadCount, + virtualBytes, + virtualBytesPeak, + workingSet, + workingSetPeak, } var err error @@ -154,9 +154,9 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error { if c.connectionBrokerEnabled { counters = []string{ - SuccessfulConnections, - PendingConnections, - FailedConnections, + successfulConnections, + pendingConnections, + failedConnections, } var err error @@ -317,94 +317,94 @@ func (c *Collector) collectTSSessionCounters(ch chan<- prometheus.Metric) error ch <- prometheus.MustNewConstMetric( c.handleCount, prometheus.GaugeValue, - data[HandleCount].FirstValue, + data[handleCount].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.pageFaultsPerSec, prometheus.CounterValue, - data[PageFaultsPersec].FirstValue, + data[pageFaultsPersec].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.pageFileBytes, prometheus.GaugeValue, - data[PageFileBytes].FirstValue, + data[pageFileBytes].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.pageFileBytesPeak, prometheus.GaugeValue, - data[PageFileBytesPeak].FirstValue, + data[pageFileBytesPeak].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.percentCPUTime, prometheus.CounterValue, - data[PercentPrivilegedTime].FirstValue, + data[percentPrivilegedTime].FirstValue, name, "privileged", ) ch <- prometheus.MustNewConstMetric( c.percentCPUTime, prometheus.CounterValue, - data[PercentProcessorTime].FirstValue, + data[percentProcessorTime].FirstValue, name, "processor", ) ch <- prometheus.MustNewConstMetric( c.percentCPUTime, prometheus.CounterValue, - data[PercentUserTime].FirstValue, + data[percentUserTime].FirstValue, name, "user", ) ch <- prometheus.MustNewConstMetric( c.poolNonPagedBytes, prometheus.GaugeValue, - data[PoolNonpagedBytes].FirstValue, + data[poolNonpagedBytes].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.poolPagedBytes, prometheus.GaugeValue, - data[PoolPagedBytes].FirstValue, + data[poolPagedBytes].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.privateBytes, prometheus.GaugeValue, - data[PrivateBytes].FirstValue, + data[privateBytes].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.threadCount, prometheus.GaugeValue, - data[ThreadCount].FirstValue, + data[threadCount].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.virtualBytes, prometheus.GaugeValue, - data[VirtualBytes].FirstValue, + data[virtualBytes].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.virtualBytesPeak, prometheus.GaugeValue, - data[VirtualBytesPeak].FirstValue, + data[virtualBytesPeak].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.workingSet, prometheus.GaugeValue, - data[WorkingSet].FirstValue, + data[workingSet].FirstValue, name, ) ch <- prometheus.MustNewConstMetric( c.workingSetPeak, prometheus.GaugeValue, - data[WorkingSetPeak].FirstValue, + data[workingSetPeak].FirstValue, name, ) } @@ -426,21 +426,21 @@ func (c *Collector) collectCollectionBrokerPerformanceCounter(ch chan<- promethe ch <- prometheus.MustNewConstMetric( c.connectionBrokerPerformance, prometheus.CounterValue, - data[SuccessfulConnections].FirstValue, + data[successfulConnections].FirstValue, "Successful", ) ch <- prometheus.MustNewConstMetric( c.connectionBrokerPerformance, prometheus.CounterValue, - data[PendingConnections].FirstValue, + data[pendingConnections].FirstValue, "Pending", ) ch <- prometheus.MustNewConstMetric( c.connectionBrokerPerformance, prometheus.CounterValue, - data[FailedConnections].FirstValue, + data[failedConnections].FirstValue, "Failed", ) diff --git a/internal/perfdata/perftypes/const.go b/internal/perfdata/perftypes/const.go index 83679e881..8e0550f7e 100644 --- a/internal/perfdata/perftypes/const.go +++ b/internal/perfdata/perftypes/const.go @@ -4,8 +4,8 @@ import "github.com/prometheus/client_golang/prometheus" // Conversion factors. const ( - TicksToSecondScaleFactor = 1 / 1e7 - WindowsEpoch = 116444736000000000 + TicksToSecondScaleFactor = 1 / 1e7 + WindowsEpoch int64 = 116444736000000000 ) // Based on https://github.com/leoluk/perflib_exporter/blob/master/collector/mapper.go diff --git a/internal/perfdata/v2/collector.go b/internal/perfdata/v2/collector.go index cd929162b..f6be2eddb 100644 --- a/internal/perfdata/v2/collector.go +++ b/internal/perfdata/v2/collector.go @@ -24,7 +24,7 @@ type Counter struct { Desc string Instances map[string]pdhCounterHandle Type uint32 - Frequency float64 + Frequency int64 } func NewCollector(object string, instances []string, counters []string) (*Collector, error) { @@ -67,30 +67,30 @@ func NewCollector(object string, instances []string, counters []string) (*Collec counter.Instances[instance] = counterHandle - if counter.Type == 0 { - // Get the info with the current buffer size - bufLen := uint32(0) + if counter.Type != 0 { + continue + } - if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, nil); ret != PdhMoreData { - return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret)) - } + // Get the info with the current buffer size + bufLen := uint32(0) - buf := make([]byte, bufLen) - if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, &buf[0]); ret != ErrorSuccess { - return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret)) - } + if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, nil); ret != PdhMoreData { + return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret)) + } - ci := (*PdhCounterInfo)(unsafe.Pointer(&buf[0])) - counter.Type = ci.DwType - counter.Desc = windows.UTF16PtrToString(ci.SzExplainText) + buf := make([]byte, bufLen) + if ret := PdhGetCounterInfo(counterHandle, 1, &bufLen, &buf[0]); ret != ErrorSuccess { + return nil, fmt.Errorf("PdhGetCounterInfo: %w", NewPdhError(ret)) + } - frequency := float64(0) + ci := (*PdhCounterInfo)(unsafe.Pointer(&buf[0])) + counter.Type = ci.DwType + counter.Desc = windows.UTF16PtrToString(ci.SzExplainText) - if ret := PdhGetCounterTimeBase(counterHandle, &frequency); ret != ErrorSuccess { + if counter.Type == perftypes.PERF_ELAPSED_TIME { + if ret := PdhGetCounterTimeBase(counterHandle, &counter.Frequency); ret != ErrorSuccess { return nil, fmt.Errorf("PdhGetCounterTimeBase: %w", NewPdhError(ret)) } - - counter.Frequency = frequency } } @@ -153,7 +153,7 @@ func (c *Collector) Collect() (map[string]map[string]perftypes.CounterValues, er continue } - items := (*[1 << 20]PdhRawCounterItem)(unsafe.Pointer(&buf[0]))[:itemCount] + items := unsafe.Slice((*PdhRawCounterItem)(unsafe.Pointer(&buf[0])), itemCount) if data == nil { data = make(map[string]map[string]perftypes.CounterValues, itemCount) @@ -193,14 +193,14 @@ func (c *Collector) Collect() (map[string]map[string]perftypes.CounterValues, er switch counter.Type { case perftypes.PERF_ELAPSED_TIME: - values.FirstValue = float64(item.RawValue.FirstValue-perftypes.WindowsEpoch) / counter.Frequency - values.SecondValue = float64(item.RawValue.SecondValue-perftypes.WindowsEpoch) / counter.Frequency + values.FirstValue = float64((item.RawValue.FirstValue - perftypes.WindowsEpoch) / counter.Frequency) case perftypes.PERF_100NSEC_TIMER, perftypes.PERF_PRECISION_100NS_TIMER: values.FirstValue = float64(item.RawValue.FirstValue) * perftypes.TicksToSecondScaleFactor - values.SecondValue = float64(item.RawValue.SecondValue) * perftypes.TicksToSecondScaleFactor - default: + case perftypes.PERF_AVERAGE_BULK: values.FirstValue = float64(item.RawValue.FirstValue) values.SecondValue = float64(item.RawValue.SecondValue) + default: + values.FirstValue = float64(item.RawValue.FirstValue) } data[instanceName][counter.Name] = values diff --git a/internal/perfdata/v2/pdh.go b/internal/perfdata/v2/pdh.go index 171b37038..b28dc28d3 100644 --- a/internal/perfdata/v2/pdh.go +++ b/internal/perfdata/v2/pdh.go @@ -622,7 +622,7 @@ func PdhGetRawCounterArray(hCounter pdhCounterHandle, lpdwBufferSize *uint32, lp // // lpdwItemCount // Time base that specifies the number of performance values a counter samples per second. -func PdhGetCounterTimeBase(hCounter pdhCounterHandle, pTimeBase *float64) uint32 { +func PdhGetCounterTimeBase(hCounter pdhCounterHandle, pTimeBase *int64) uint32 { ret, _, _ := pdhPdhGetCounterTimeBase.Call( uintptr(hCounter), uintptr(unsafe.Pointer(pTimeBase)))