Skip to content

Commit

Permalink
Add Macos observability lib (#28)
Browse files Browse the repository at this point in the history
* Add gitignore to node-observ-lib

* Fix typo in node default filteringSelector

* Prep alert group names for macos

* Add macos-observ-lib

* Change overview dashboard:
show networkErrorsAndDroppedPerSec instead of networkErrorPerSec for Linux/MacOS

* Add more alerts

* Move alerts to sep file

* Breaking: Update layout

To allow to locally import linux from macos

* Bring back NodeFilesystemAlmostOutOfFiles alert

* Show only errors when they occur

* Only show network interfaces that had traffic change at least once during selected dashboard interval
  • Loading branch information
v-zhuravlev authored Nov 28, 2023
1 parent 4a48f6b commit 94e744e
Show file tree
Hide file tree
Showing 473 changed files with 420 additions and 81,560 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
.jekyll-cache
jsonnetfile.lock.json
vendor
12 changes: 6 additions & 6 deletions docs/node-observ-lib/jsonnetfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
{
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "common-lib"
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
}
},
"version": "master"
"version": "main"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "common-lib"
}
},
"version": "main"
"version": "master"
},
{
"source": {
Expand Down
56 changes: 0 additions & 56 deletions docs/node-observ-lib/jsonnetfile.lock.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ You can use observ-lib to fill in monitoring-mixin structure:

```jsonnet
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';
local linux =
nodelib.new()
Expand Down Expand Up @@ -45,7 +45,7 @@ local linux =

```jsonnet
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';
local linux =
nodelib.new()
Expand Down Expand Up @@ -82,7 +82,7 @@ local linux =
// mixin.libsonnet file
local configOverride = import './overrides.libsonnet';
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';
local linux =
nodelib.new()
Expand All @@ -101,7 +101,7 @@ local linux =
```jsonnet
local g = import './g.libsonnet';
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';
local linux =
nodelib.new()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
new(this): {
groups: [
{
name: if this.config.uid == 'node' then 'node-exporter-filesystem' else this.config.uid + '-linux-filesystem-alerts',
name: if this.config.uid == 'node' then 'node-exporter-filesystem' else this.config.uid + '-filesystem-alerts',
rules: [
{
alert: 'NodeFilesystemSpaceFillingUp',
Expand Down Expand Up @@ -160,7 +160,7 @@
},
{
// defaults to 'node-exporter for backward compatibility with old node-mixin
name: if this.config.uid == 'node' then 'node-exporter' else this.config.uid + '-linux-alerts',
name: if this.config.uid == 'node' then 'node-exporter' else this.config.uid + '-alerts',
rules: [
{
alert: 'NodeNetworkReceiveErrs',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
{
new(this):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'.
// 'uid' - UID to prefix all dashboards original uids

filteringSelector: std.get(self, 'nodeExporterSelector', default='"job="node"'),
filteringSelector: std.get(self, 'nodeExporterSelector', default='job="node"'),
groupLabels: ['job'],
instanceLabels: ['instance'],
dashboardNamePrefix: 'Node exporter / ',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local logslib = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libsonnet';
{
local root = self,
Expand Down Expand Up @@ -59,7 +59,7 @@ local logslib = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libso
panels.diskUsage { gridPos+: { w: 12, h: 8 } },
g.panel.row.new('Network'),
panels.networkUsagePerSec { gridPos+: { w: 12, h: 8 } },
panels.networkErrorsPerSec { gridPos+: { w: 12, h: 8 } },
panels.networkErrorsAndDroppedPerSec { gridPos+: { w: 12, h: 8 } },
], 6, 2
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
{
new(this):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
local utils = commonlib.utils;
{
Expand Down Expand Up @@ -680,12 +680,18 @@ local utils = commonlib.utils;
networkErrorsAndDroppedPerSec:
commonlib.panels.network.timeSeries.errors.new(
'Network errors and dropped packets',
targets=[
t.networkOutErrorsPerSec,
t.networkInErrorsPerSec,
t.networkOutDroppedPerSec,
t.networkInDroppedPerSec,
],
targets=std.map(
function(t) t
{
expr: t.expr + '>0',
},
[
t.networkOutErrorsPerSec,
t.networkInErrorsPerSec,
t.networkOutDroppedPerSec,
t.networkInDroppedPerSec,
]
),
description=|||
**Network errors**:
Expand All @@ -711,7 +717,7 @@ local utils = commonlib.utils;
targets=std.map(
function(t) t
{
expr: 'topk(25, ' + t.expr + ')>0.5',
expr: 'topk(25, ' + t.expr + ')>0',
legendFormat: '{{' + this.config.instanceLabels[0] + '}}: ' + std.get(t, 'legendFormat', '{{ nic }}'),
},
[
Expand Down Expand Up @@ -757,7 +763,7 @@ local utils = commonlib.utils;
+ commonlib.panels.network.timeSeries.errors.withNegateOutPackets(),
networkUsagePerSec:
commonlib.panels.network.timeSeries.traffic.new(
targets=[t.networkInBitPerSec, t.networkOutBitPerSec]
targets=[t.networkInBitPerSecFiltered, t.networkOutBitPerSecFiltered]
)
+ commonlib.panels.network.timeSeries.traffic.withNegateOutPackets(),
networkPacketsPerSec:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local lokiQuery = g.query.loki;

Expand Down Expand Up @@ -696,6 +696,34 @@ local lokiQuery = g.query.loki;
'irate(node_network_receive_bytes_total{%(queriesSelector)s}[$__rate_interval])*8' % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} received'),
networkOutBitPerSecFiltered:
prometheusQuery.new(
prometheusDatasource,
|||
irate(node_network_transmit_bytes_total{%(queriesSelector)s}[$__rate_interval])*8
# only show interfaces that had traffic change at least once during selected dashboard interval:
and
increase(
node_network_transmit_bytes_total{%(queriesSelector)s}[$__range]
) > 0
||| % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} transmitted'),
networkInBitPerSecFiltered:
prometheusQuery.new(
prometheusDatasource,
|||
irate(node_network_receive_bytes_total{%(queriesSelector)s}[$__rate_interval])*8
# only show interfaces that had traffic change at least once during selected dashboard interval:
and
increase(
node_network_receive_bytes_total{%(queriesSelector)s}[$__range]
) > 0
||| % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} received'),


networkOutErrorsPerSec:
prometheusQuery.new(
prometheusDatasource,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// variables.libsonnet
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local var = g.dashboard.variable;
local commonlib = import 'common-lib/common/main.libsonnet';
local utils = commonlib.utils;
Expand Down
86 changes: 86 additions & 0 deletions docs/node-observ-lib/macos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# MacOS exporter observability lib

This jsonnet observability lib can be used to generate observability package for node exporter(MacOS).

## Import

```sh
jb init
jb install https://github.com/grafana/node_exporter/docs/node-observ-lib
```

## Examples

### Example 1: Basic example

You can use observ-lib to fill in monitoring-mixin structure:

```jsonnet
// mixin.libsonnet file
local macoslib = import 'node-observ-lib/macos/main.libsonnet';
local mac =
macoslib.new()
+ macoslib.withConfigMixin({
filteringSelector: 'job=~".*mac.*"',
groupLabels: ['job'],
instanceLabels: ['instance'],
dashboardNamePrefix: 'MacOS / ',
dashboardTags: ['macos-mixin'],
uid: 'darwin',
// enable loki logs
enableLokiLogs: true,
});
{
grafanaDashboards+:: mac.grafana.dashboards,
prometheusAlerts+:: mac.prometheus.alerts,
prometheusRules+:: mac.prometheus.recordingRules,
}
```
For more examples see [node-observ-lib/linux](../linux).

## Collectors used:

Grafana Agent or combination of node_exporter/promtail can be used in order to collect data required.

### Logs collection

Loki logs are used to populate logs dashboard and also for annotations.

To use logs, you need to opt-in, with setting `enableLokiLogs: true` in config.

See example above.

The following scrape snippet can be used in grafana-agent/promtail:

```yaml
- job_name: integrations/node_exporter_direct_scrape
static_configs:
- targets:
- localhost
labels:
__path__: /var/log/*.log
instance: '<your-instance-name>'
job: integrations/macos-node
pipeline_stages:
- multiline:
firstline: '^([\w]{3} )?[\w]{3} +[\d]+ [\d]+:[\d]+:[\d]+|[\w]{4}-[\w]{2}-[\w]{2} [\w]{2}:[\w]{2}:[\w]{2}(?:[+-][\w]{2})?'
- regex:
expression: '(?P<timestamp>([\w]{3} )?[\w]{3} +[\d]+ [\d]+:[\d]+:[\d]+|[\w]{4}-[\w]{2}-[\w]{2} [\w]{2}:[\w]{2}:[\w]{2}(?:[+-][\w]{2})?) (?P<hostname>\S+) (?P<sender>.+?)\[(?P<pid>\d+)\]:? (?P<message>(?s:.*))$'
- labels:
sender:
hostname:
pid:
- match:
selector: '{sender!="", pid!=""}'
stages:
- template:
source: message
template: '{{ .sender }}[{{ .pid }}]: {{ .message }}'
- labeldrop:
- pid
- output:
source: message
```
23 changes: 23 additions & 0 deletions docs/node-observ-lib/macos/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
new(this, parentPrometheus):
{
groups:
//keep only alerts listed in alertsMacKeep
std.filter(
function(group) std.length(group.rules) > 0,
[
{
name: group.name,
rules: [
rule
for rule in group.rules
if std.length(std.find(rule.alert, this.config.alertsMacKeep)) > 0
],
}
for group in parentPrometheus.alerts.groups
],

),

},
}
Loading

0 comments on commit 94e744e

Please sign in to comment.