From 14910efd4f583ccce1979588402b2fb878f3791a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Tue, 1 Oct 2024 23:23:23 +0200 Subject: [PATCH] updates: add windows update collector (#1652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- README.md | 5 +- docs/collector.update.md | 47 +++ pkg/collector/collector.go | 2 + pkg/collector/config.go | 3 + pkg/collector/map.go | 2 + pkg/collector/updates/updates.go | 405 ++++++++++++++++++++++++++ pkg/collector/updates/updates_test.go | 12 + 7 files changed, 474 insertions(+), 2 deletions(-) create mode 100644 docs/collector.update.md create mode 100644 pkg/collector/updates/updates.go create mode 100644 pkg/collector/updates/updates_test.go diff --git a/README.md b/README.md index c548efad1..7621305eb 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,11 @@ Name | Description | Enabled by default [system](docs/collector.system.md) | System calls | ✓ [tcp](docs/collector.tcp.md) | TCP connections | [teradici_pcoip](docs/collector.teradici_pcoip.md) | [Teradici PCoIP](https://www.teradici.com/web-help/pcoip_wmi_specs/) session metrics | -[time](docs/collector.time.md) | Windows Time Service | -[thermalzone](docs/collector.thermalzone.md) | Thermal information [terminal_services](docs/collector.terminal_services.md) | Terminal services (RDS) [textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | +[thermalzone](docs/collector.thermalzone.md) | Thermal information | +[time](docs/collector.time.md) | Windows Time Service | +[updates](docs/collector.updates.md) | Windows Update Service | [vmware_blast](docs/collector.vmware_blast.md) | VMware Blast session metrics | [vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent | diff --git a/docs/collector.update.md b/docs/collector.update.md new file mode 100644 index 000000000..6d975a60c --- /dev/null +++ b/docs/collector.update.md @@ -0,0 +1,47 @@ +# update collector + +The update collector exposes the Windows Update Service metrics. Note that the Windows Update Service must be running, else metric collection will fail. + +The Windows Update Service is responsible for managing the installation of updates for the operating system and other Microsoft software. The service can be configured to automatically download and install updates, or to notify the user when updates are available. + + +| | | +|---------------------|------------------------| +| Metric name prefix | `update` | +| Data source | Windows Update Service | +| Enabled by default? | No | +## Flags + +### `--collector.updates.online` +Whether to search for updates online. If set to `false`, the collector will only list updates that are already found by the Windows Update Service. +Set to `true` to search for updates online, which will take longer to complete. + +### `--collector.updates.scrape-interval` +Define the interval of scraping Windows Update information + +## Metrics + +| Name | Description | Type | Labels | +|--------------------------------|-----------------------------------------------|-------|-------------------------------| +| `windows_updates_pending_info` | Expose information single pending update item | gauge | `category`,`severity`,`title` | +| `windows_updates_scrape_query_duration_seconds` | Duration of the last scrape query to the Windows Update API | gauge | | +| `windows_updates_scrape_timestamp_seconds` | Timestamp of the last scrape | gauge | | + +### Example metrics +``` +# HELP windows_updates_pending Pending Windows Updates +# TYPE windows_updates_pending gauge +windows_updates_pending{category="Drivers",severity="",title="Intel Corporation - Bluetooth - 23.60.5.10"} 1 +# HELP windows_updates_scrape_query_duration_seconds Duration of the last scrape query to the Windows Update API +# TYPE windows_updates_scrape_query_duration_seconds gauge +windows_updates_scrape_query_duration_seconds 2.8161838 +# HELP windows_updates_scrape_timestamp_seconds Timestamp of the last scrape +# TYPE windows_updates_scrape_timestamp_seconds gauge +windows_updates_scrape_timestamp_seconds 1.727539734e+09 +``` + +## Useful queries +_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ + +## Alerting examples +_This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index 0e883871c..fa38ebc92 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -55,6 +55,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/textfile" "github.com/prometheus-community/windows_exporter/pkg/collector/thermalzone" "github.com/prometheus-community/windows_exporter/pkg/collector/time" + "github.com/prometheus-community/windows_exporter/pkg/collector/updates" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware_blast" "github.com/prometheus-community/windows_exporter/pkg/perflib" @@ -123,6 +124,7 @@ func NewWithConfig(config Config) *MetricCollectors { collectors[textfile.Name] = textfile.New(&config.Textfile) collectors[thermalzone.Name] = thermalzone.New(&config.Thermalzone) collectors[time.Name] = time.New(&config.Time) + collectors[updates.Name] = updates.New(&config.Updates) collectors[vmware.Name] = vmware.New(&config.Vmware) collectors[vmware_blast.Name] = vmware_blast.New(&config.VmwareBlast) diff --git a/pkg/collector/config.go b/pkg/collector/config.go index dda4d826c..5fa7c6e39 100644 --- a/pkg/collector/config.go +++ b/pkg/collector/config.go @@ -46,6 +46,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/textfile" "github.com/prometheus-community/windows_exporter/pkg/collector/thermalzone" "github.com/prometheus-community/windows_exporter/pkg/collector/time" + "github.com/prometheus-community/windows_exporter/pkg/collector/updates" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware_blast" ) @@ -96,6 +97,7 @@ type Config struct { Textfile textfile.Config `yaml:"textfile"` Thermalzone thermalzone.Config `yaml:"thermalzone"` Time time.Config `yaml:"time"` + Updates updates.Config `yaml:"updates"` Vmware vmware.Config `yaml:"vmware"` VmwareBlast vmware_blast.Config `yaml:"vmware_blast"` } @@ -149,6 +151,7 @@ var ConfigDefaults = Config{ Textfile: textfile.ConfigDefaults, Thermalzone: thermalzone.ConfigDefaults, Time: time.ConfigDefaults, + Updates: updates.ConfigDefaults, Vmware: vmware.ConfigDefaults, VmwareBlast: vmware_blast.ConfigDefaults, } diff --git a/pkg/collector/map.go b/pkg/collector/map.go index 2a43e6335..2fe3bc51c 100644 --- a/pkg/collector/map.go +++ b/pkg/collector/map.go @@ -50,6 +50,7 @@ import ( "github.com/prometheus-community/windows_exporter/pkg/collector/textfile" "github.com/prometheus-community/windows_exporter/pkg/collector/thermalzone" "github.com/prometheus-community/windows_exporter/pkg/collector/time" + "github.com/prometheus-community/windows_exporter/pkg/collector/updates" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware" "github.com/prometheus-community/windows_exporter/pkg/collector/vmware_blast" ) @@ -106,6 +107,7 @@ var BuildersWithFlags = map[string]BuilderWithFlags[Collector]{ textfile.Name: NewBuilderWithFlags(textfile.NewWithFlags), thermalzone.Name: NewBuilderWithFlags(thermalzone.NewWithFlags), time.Name: NewBuilderWithFlags(time.NewWithFlags), + updates.Name: NewBuilderWithFlags(updates.NewWithFlags), vmware.Name: NewBuilderWithFlags(vmware.NewWithFlags), vmware_blast.Name: NewBuilderWithFlags(vmware_blast.NewWithFlags), } diff --git a/pkg/collector/updates/updates.go b/pkg/collector/updates/updates.go new file mode 100644 index 000000000..8997ee3d2 --- /dev/null +++ b/pkg/collector/updates/updates.go @@ -0,0 +1,405 @@ +//go:build windows + +package updates + +import ( + "errors" + "fmt" + "log/slog" + "math" + "runtime" + "strconv" + "sync" + "time" + + "github.com/alecthomas/kingpin/v2" + "github.com/go-ole/go-ole" + "github.com/go-ole/go-ole/oleutil" + "github.com/prometheus-community/windows_exporter/pkg/types" + "github.com/prometheus/client_golang/prometheus" + "github.com/yusufpapurcu/wmi" +) + +const Name = "updates" + +type Config struct { + online bool `yaml:"online"` + scrapeInterval time.Duration `yaml:"scrape_interval"` +} + +var ConfigDefaults = Config{ + online: false, + scrapeInterval: 6 * time.Hour, +} + +var ErrNoUpdates = errors.New("no updates available") + +type Collector struct { + config Config + + mu sync.RWMutex + + metricsBuf []prometheus.Metric + + pendingUpdate *prometheus.Desc + queryDurationSeconds *prometheus.Desc + lastScrapeMetric *prometheus.Desc +} + +func New(config *Config) *Collector { + if config == nil { + config = &ConfigDefaults + } + + c := &Collector{ + config: *config, + } + + return c +} + +func NewWithFlags(app *kingpin.Application) *Collector { + c := &Collector{ + config: ConfigDefaults, + } + + app.Flag( + "collector.updates.online", + "Whether to search for updates online.", + ).Default(strconv.FormatBool(ConfigDefaults.online)).BoolVar(&c.config.online) + + app.Flag( + "collector.updates.scrape-interval", + "Define the interval of scraping Windows Update information.", + ).Default(ConfigDefaults.scrapeInterval.String()).DurationVar(&c.config.scrapeInterval) + + return c +} + +func (c *Collector) Close(_ *slog.Logger) error { + return nil +} + +func (c *Collector) Build(logger *slog.Logger, _ *wmi.Client) error { + logger = logger.With(slog.String("collector", Name)) + + logger.Info("update collector is in an experimental state! The configuration and metrics may change in future. Please report any issues.") + + initErrCh := make(chan error, 1) + go c.scheduleUpdateStatus(logger, initErrCh, c.config.online) + + if err := <-initErrCh; err != nil { + return fmt.Errorf("failed to initialize Windows Update collector: %w", err) + } + + c.pendingUpdate = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "pending_info"), + "Pending Windows Updates", + []string{"category", "severity", "title"}, + nil, + ) + + c.queryDurationSeconds = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "scrape_query_duration_seconds"), + "Duration of the last scrape query to the Windows Update API", + nil, + nil, + ) + + c.lastScrapeMetric = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "scrape_timestamp_seconds"), + "Timestamp of the last scrape", + nil, + nil, + ) + + return nil +} + +func (c *Collector) GetName() string { return Name } + +func (c *Collector) GetPerfCounter(_ *slog.Logger) ([]string, error) { + return []string{}, nil +} + +func (c *Collector) Collect(_ *types.ScrapeContext, _ *slog.Logger, ch chan<- prometheus.Metric) error { + c.mu.RLock() + defer c.mu.RUnlock() + + if c.metricsBuf == nil { + return ErrNoUpdates + } + + for _, m := range c.metricsBuf { + ch <- m + } + + return nil +} + +func (c *Collector) scheduleUpdateStatus(logger *slog.Logger, initErrCh chan<- error, online bool) { + // The only way to run WMI queries in parallel while being thread-safe is to + // ensure the CoInitialize[Ex]() call is bound to its current OS thread. + // Otherwise, attempting to initialize and run parallel queries across + // goroutines will result in protected memory errors. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if err := ole.CoInitializeEx(0, ole.COINIT_MULTITHREADED); err != nil { + var oleCode *ole.OleError + if errors.As(err, &oleCode) && oleCode.Code() != ole.S_OK && oleCode.Code() != wmi.S_FALSE { + initErrCh <- fmt.Errorf("CoInitializeEx: %w", err) + + return + } + } + + defer ole.CoUninitialize() + + // Create a new instance of the WMI object + mus, err := oleutil.CreateObject("Microsoft.Update.Session") + if err != nil { + initErrCh <- fmt.Errorf("create Microsoft.Update.Session: %w", err) + + return + } + + defer mus.Release() + + // Query the IDispatch interface of the object + musQueryInterface, err := mus.QueryInterface(ole.IID_IDispatch) + if err != nil { + initErrCh <- fmt.Errorf("IID_IDispatch: %w", err) + + return + } + + defer musQueryInterface.Release() + + _, err = oleutil.PutProperty(musQueryInterface, "ClientApplicationID", "windows_exporter") + if err != nil { + initErrCh <- fmt.Errorf("put ClientApplicationID: %w", err) + + return + } + + // https://learn.microsoft.com/en-us/windows/win32/api/wuapi/nf-wuapi-iupdatesession-createupdatesearcher + us, err := oleutil.CallMethod(musQueryInterface, "CreateUpdateSearcher") + defer func(hc *ole.VARIANT) { + if us != nil { + _ = hc.Clear() + } + }(us) + + if err != nil { + initErrCh <- fmt.Errorf("create update searcher: %w", err) + + return + } + + ush := us.ToIDispatch() + defer ush.Release() + + _, err = oleutil.PutProperty(ush, "Online", online) + if err != nil { + initErrCh <- fmt.Errorf("put Online: %w", err) + + return + } + + // lets use the fast local-only query to check if WindowsUpdates service is enabled on the host + hc, err := oleutil.CallMethod(ush, "GetTotalHistoryCount") + defer func(hc *ole.VARIANT) { + if hc != nil { + _ = hc.Clear() + } + }(hc) + + if err != nil { + initErrCh <- fmt.Errorf("windows updates service is disabled: %w", err) + + return + } + + close(initErrCh) + + usd := us.ToIDispatch() + defer usd.Release() + + for { + metricsBuf, err := c.fetchUpdates(logger, usd) + if err != nil { + logger.Error("failed to fetch updates", + slog.Any("err", err), + ) + + c.mu.Lock() + c.metricsBuf = nil + c.mu.Unlock() + + continue + } + + c.mu.Lock() + c.metricsBuf = metricsBuf + c.mu.Unlock() + + time.Sleep(c.config.scrapeInterval) + } +} + +func (c *Collector) fetchUpdates(logger *slog.Logger, usd *ole.IDispatch) ([]prometheus.Metric, error) { + metricsBuf := make([]prometheus.Metric, 0, len(c.metricsBuf)) + + timeStart := time.Now() + + usr, err := oleutil.CallMethod(usd, "Search", "IsInstalled=0 and IsHidden=0") + if err != nil { + return nil, fmt.Errorf("search for updates: %w", err) + } + + logger.Debug(fmt.Sprintf("search for updates took %s", time.Since(timeStart))) + + metricsBuf = append(metricsBuf, prometheus.MustNewConstMetric( + c.queryDurationSeconds, + prometheus.GaugeValue, + time.Since(timeStart).Seconds(), + )) + + usrd := usr.ToIDispatch() + defer usrd.Release() + + upd, err := oleutil.GetProperty(usrd, "Updates") + if err != nil { + return nil, fmt.Errorf("get updates: %w", err) + } + + updd := upd.ToIDispatch() + defer updd.Release() + + countUpdd, err := oleutil.GetProperty(updd, "Count") + if err != nil { + return nil, fmt.Errorf("get updates count: %w", err) + } + + for i := range int(countUpdd.Val) { + update, err := c.getUpdateStatus(updd, i) + if err != nil { + logger.Error("failed to fetch Windows Update history item", + slog.Any("err", err), + ) + + continue + } + + metricsBuf = append(metricsBuf, prometheus.MustNewConstMetric( + c.pendingUpdate, + prometheus.GaugeValue, + 1, + update.category, + update.severity, + update.title, + )) + } + + metricsBuf = append(metricsBuf, prometheus.MustNewConstMetric( + c.lastScrapeMetric, + prometheus.GaugeValue, + float64(time.Now().Unix()), + )) + + return metricsBuf, nil +} + +type windowsUpdate struct { + category string + severity string + title string +} + +// getUpdateStatus retrieves the update status of the given item. +// other available properties can be found here: +// https://learn.microsoft.com/en-us/previous-versions/windows/desktop/aa386114(v=vs.85) +func (c *Collector) getUpdateStatus(updd *ole.IDispatch, item int) (windowsUpdate, error) { + itemRaw, err := oleutil.GetProperty(updd, "Item", item) + if err != nil { + return windowsUpdate{}, fmt.Errorf("get update item: %w", err) + } + + updateItem := itemRaw.ToIDispatch() + defer updateItem.Release() + + severity, err := oleutil.GetProperty(updateItem, "MsrcSeverity") + if err != nil { + return windowsUpdate{}, fmt.Errorf("get MsrcSeverity: %w", err) + } + + categoriesRaw, err := oleutil.GetProperty(updateItem, "Categories") + if err != nil { + return windowsUpdate{}, fmt.Errorf("get Categories: %w", err) + } + + categories := categoriesRaw.ToIDispatch() + defer categories.Release() + + categoryName, err := getUpdateCategory(categories) + if err != nil { + return windowsUpdate{}, fmt.Errorf("get Category: %w", err) + } + + title, err := oleutil.GetProperty(updateItem, "Title") + if err != nil { + return windowsUpdate{}, fmt.Errorf("get Title: %w", err) + } + + return windowsUpdate{ + category: categoryName, + severity: severity.ToString(), + title: title.ToString(), + }, nil +} + +func getUpdateCategory(categories *ole.IDispatch) (string, error) { + var categoryName string + + categoryCount, err := oleutil.GetProperty(categories, "Count") + if err != nil { + return categoryName, fmt.Errorf("get Categories count: %w", err) + } + + order := int64(math.MaxInt64) + + for i := range categoryCount.Val { + err = func(i int64) error { + categoryRaw, err := oleutil.GetProperty(categories, "Item", i) + if err != nil { + return fmt.Errorf("get Category item: %w", err) + } + + category := categoryRaw.ToIDispatch() + defer category.Release() + + categoryNameRaw, err := oleutil.GetProperty(category, "Name") + if err != nil { + return fmt.Errorf("get Category item Name: %w", err) + } + + orderRaw, err := oleutil.GetProperty(category, "Order") + if err != nil { + return fmt.Errorf("get Category item Order: %w", err) + } + + if orderRaw.Val < order { + order = orderRaw.Val + categoryName = categoryNameRaw.ToString() + } + + return nil + }(i) + if err != nil { + return "", fmt.Errorf("get Category item: %w", err) + } + } + + return categoryName, nil +} diff --git a/pkg/collector/updates/updates_test.go b/pkg/collector/updates/updates_test.go new file mode 100644 index 000000000..afc4b256d --- /dev/null +++ b/pkg/collector/updates/updates_test.go @@ -0,0 +1,12 @@ +package updates_test + +import ( + "testing" + + "github.com/prometheus-community/windows_exporter/pkg/collector/updates" + "github.com/prometheus-community/windows_exporter/pkg/testutils" +) + +func BenchmarkCollector(b *testing.B) { + testutils.FuncBenchmarkCollector(b, "printer", updates.NewWithFlags) +}