Skip to content

Commit

Permalink
collectors/mdadm_linux: support reshape status + expose remaining mds…
Browse files Browse the repository at this point in the history
…tats

Signed-off-by: Philipp Born <[email protected]>
  • Loading branch information
tamcore committed Oct 30, 2024
1 parent e6a9cfb commit cca725b
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 0 deletions.
89 changes: 89 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,7 @@ node_md_blocks{device="md201"} 1.993728e+06
node_md_blocks{device="md219"} 7932
node_md_blocks{device="md3"} 5.853468288e+09
node_md_blocks{device="md4"} 4.883648e+06
node_md_blocks{device="md42"} 1.95338144e+09
node_md_blocks{device="md6"} 1.95310144e+08
node_md_blocks{device="md7"} 7.813735424e+09
node_md_blocks{device="md8"} 1.95310144e+08
Expand All @@ -1597,10 +1598,71 @@ node_md_blocks_synced{device="md201"} 114176
node_md_blocks_synced{device="md219"} 7932
node_md_blocks_synced{device="md3"} 5.853468288e+09
node_md_blocks_synced{device="md4"} 4.883648e+06
node_md_blocks_synced{device="md42"} 1.95338144e+09
node_md_blocks_synced{device="md6"} 1.6775552e+07
node_md_blocks_synced{device="md7"} 7.813735424e+09
node_md_blocks_synced{device="md8"} 1.6775552e+07
node_md_blocks_synced{device="md9"} 0
# HELP node_md_blocks_synced_pct Percentage of blocks synced on device.
# TYPE node_md_blocks_synced_pct gauge
node_md_blocks_synced_pct{device="md0"} 0
node_md_blocks_synced_pct{device="md00"} 0
node_md_blocks_synced_pct{device="md10"} 0
node_md_blocks_synced_pct{device="md101"} 0
node_md_blocks_synced_pct{device="md11"} 0
node_md_blocks_synced_pct{device="md12"} 0
node_md_blocks_synced_pct{device="md120"} 0
node_md_blocks_synced_pct{device="md126"} 0
node_md_blocks_synced_pct{device="md127"} 0
node_md_blocks_synced_pct{device="md201"} 5.7
node_md_blocks_synced_pct{device="md219"} 0
node_md_blocks_synced_pct{device="md3"} 0
node_md_blocks_synced_pct{device="md4"} 0
node_md_blocks_synced_pct{device="md42"} 0
node_md_blocks_synced_pct{device="md6"} 8.5
node_md_blocks_synced_pct{device="md7"} 0
node_md_blocks_synced_pct{device="md8"} 8.5
node_md_blocks_synced_pct{device="md9"} 0
# HELP node_md_blocks_synced_speed Estimated finishing time for current sync.
# TYPE node_md_blocks_synced_speed gauge
node_md_blocks_synced_speed{device="md0"} 0
node_md_blocks_synced_speed{device="md00"} 0
node_md_blocks_synced_speed{device="md10"} 0
node_md_blocks_synced_speed{device="md101"} 0
node_md_blocks_synced_speed{device="md11"} 0
node_md_blocks_synced_speed{device="md12"} 0
node_md_blocks_synced_speed{device="md120"} 0
node_md_blocks_synced_speed{device="md126"} 0
node_md_blocks_synced_speed{device="md127"} 0
node_md_blocks_synced_speed{device="md201"} 0.2
node_md_blocks_synced_speed{device="md219"} 0
node_md_blocks_synced_speed{device="md3"} 0
node_md_blocks_synced_speed{device="md4"} 0
node_md_blocks_synced_speed{device="md42"} 0
node_md_blocks_synced_speed{device="md6"} 17
node_md_blocks_synced_speed{device="md7"} 0
node_md_blocks_synced_speed{device="md8"} 17
node_md_blocks_synced_speed{device="md9"} 0
# HELP node_md_blocks_tobesynced Number of blocks on the device that need to be synced.
# TYPE node_md_blocks_tobesynced gauge
node_md_blocks_tobesynced{device="md0"} 248896
node_md_blocks_tobesynced{device="md00"} 4.186624e+06
node_md_blocks_tobesynced{device="md10"} 3.14159265e+08
node_md_blocks_tobesynced{device="md101"} 322560
node_md_blocks_tobesynced{device="md11"} 4.190208e+06
node_md_blocks_tobesynced{device="md12"} 3.886394368e+09
node_md_blocks_tobesynced{device="md120"} 2.095104e+06
node_md_blocks_tobesynced{device="md126"} 1.855870976e+09
node_md_blocks_tobesynced{device="md127"} 3.12319552e+08
node_md_blocks_tobesynced{device="md201"} 1.993728e+06
node_md_blocks_tobesynced{device="md219"} 7932
node_md_blocks_tobesynced{device="md3"} 5.853468288e+09
node_md_blocks_tobesynced{device="md4"} 4.883648e+06
node_md_blocks_tobesynced{device="md42"} 1.95338144e+09
node_md_blocks_tobesynced{device="md6"} 1.95310144e+08
node_md_blocks_tobesynced{device="md7"} 7.813735424e+09
node_md_blocks_tobesynced{device="md8"} 1.95310144e+08
node_md_blocks_tobesynced{device="md9"} 523968
# HELP node_md_disks Number of active/failed/spare disks of device.
# TYPE node_md_disks gauge
node_md_disks{device="md0",state="active"} 2
Expand Down Expand Up @@ -1642,6 +1704,9 @@ node_md_disks{device="md3",state="spare"} 2
node_md_disks{device="md4",state="active"} 0
node_md_disks{device="md4",state="failed"} 1
node_md_disks{device="md4",state="spare"} 1
node_md_disks{device="md42",state="active"} 2
node_md_disks{device="md42",state="failed"} 0
node_md_disks{device="md42",state="spare"} 1
node_md_disks{device="md6",state="active"} 1
node_md_disks{device="md6",state="failed"} 1
node_md_disks{device="md6",state="spare"} 1
Expand Down Expand Up @@ -1669,6 +1734,7 @@ node_md_disks_required{device="md201"} 2
node_md_disks_required{device="md219"} 0
node_md_disks_required{device="md3"} 8
node_md_disks_required{device="md4"} 0
node_md_disks_required{device="md42"} 3
node_md_disks_required{device="md6"} 2
node_md_disks_required{device="md7"} 4
node_md_disks_required{device="md8"} 2
Expand All @@ -1679,86 +1745,109 @@ node_md_state{device="md0",state="active"} 1
node_md_state{device="md0",state="check"} 0
node_md_state{device="md0",state="inactive"} 0
node_md_state{device="md0",state="recovering"} 0
node_md_state{device="md0",state="reshaping"} 0
node_md_state{device="md0",state="resync"} 0
node_md_state{device="md00",state="active"} 1
node_md_state{device="md00",state="check"} 0
node_md_state{device="md00",state="inactive"} 0
node_md_state{device="md00",state="recovering"} 0
node_md_state{device="md00",state="reshaping"} 0
node_md_state{device="md00",state="resync"} 0
node_md_state{device="md10",state="active"} 1
node_md_state{device="md10",state="check"} 0
node_md_state{device="md10",state="inactive"} 0
node_md_state{device="md10",state="recovering"} 0
node_md_state{device="md10",state="reshaping"} 0
node_md_state{device="md10",state="resync"} 0
node_md_state{device="md101",state="active"} 1
node_md_state{device="md101",state="check"} 0
node_md_state{device="md101",state="inactive"} 0
node_md_state{device="md101",state="recovering"} 0
node_md_state{device="md101",state="reshaping"} 0
node_md_state{device="md101",state="resync"} 0
node_md_state{device="md11",state="active"} 0
node_md_state{device="md11",state="check"} 0
node_md_state{device="md11",state="inactive"} 0
node_md_state{device="md11",state="recovering"} 0
node_md_state{device="md11",state="reshaping"} 0
node_md_state{device="md11",state="resync"} 1
node_md_state{device="md12",state="active"} 1
node_md_state{device="md12",state="check"} 0
node_md_state{device="md12",state="inactive"} 0
node_md_state{device="md12",state="recovering"} 0
node_md_state{device="md12",state="reshaping"} 0
node_md_state{device="md12",state="resync"} 0
node_md_state{device="md120",state="active"} 1
node_md_state{device="md120",state="check"} 0
node_md_state{device="md120",state="inactive"} 0
node_md_state{device="md120",state="recovering"} 0
node_md_state{device="md120",state="reshaping"} 0
node_md_state{device="md120",state="resync"} 0
node_md_state{device="md126",state="active"} 1
node_md_state{device="md126",state="check"} 0
node_md_state{device="md126",state="inactive"} 0
node_md_state{device="md126",state="recovering"} 0
node_md_state{device="md126",state="reshaping"} 0
node_md_state{device="md126",state="resync"} 0
node_md_state{device="md127",state="active"} 1
node_md_state{device="md127",state="check"} 0
node_md_state{device="md127",state="inactive"} 0
node_md_state{device="md127",state="recovering"} 0
node_md_state{device="md127",state="reshaping"} 0
node_md_state{device="md127",state="resync"} 0
node_md_state{device="md201",state="active"} 0
node_md_state{device="md201",state="check"} 1
node_md_state{device="md201",state="inactive"} 0
node_md_state{device="md201",state="recovering"} 0
node_md_state{device="md201",state="reshaping"} 0
node_md_state{device="md201",state="resync"} 0
node_md_state{device="md219",state="active"} 0
node_md_state{device="md219",state="check"} 0
node_md_state{device="md219",state="inactive"} 1
node_md_state{device="md219",state="recovering"} 0
node_md_state{device="md219",state="reshaping"} 0
node_md_state{device="md219",state="resync"} 0
node_md_state{device="md3",state="active"} 1
node_md_state{device="md3",state="check"} 0
node_md_state{device="md3",state="inactive"} 0
node_md_state{device="md3",state="recovering"} 0
node_md_state{device="md3",state="reshaping"} 0
node_md_state{device="md3",state="resync"} 0
node_md_state{device="md4",state="active"} 0
node_md_state{device="md4",state="check"} 0
node_md_state{device="md4",state="inactive"} 1
node_md_state{device="md4",state="recovering"} 0
node_md_state{device="md4",state="reshaping"} 0
node_md_state{device="md4",state="resync"} 0
node_md_state{device="md42",state="active"} 1
node_md_state{device="md42",state="check"} 0
node_md_state{device="md42",state="inactive"} 0
node_md_state{device="md42",state="recovering"} 0
node_md_state{device="md42",state="reshaping"} 0
node_md_state{device="md42",state="resync"} 0
node_md_state{device="md6",state="active"} 0
node_md_state{device="md6",state="check"} 0
node_md_state{device="md6",state="inactive"} 0
node_md_state{device="md6",state="recovering"} 1
node_md_state{device="md6",state="reshaping"} 0
node_md_state{device="md6",state="resync"} 0
node_md_state{device="md7",state="active"} 1
node_md_state{device="md7",state="check"} 0
node_md_state{device="md7",state="inactive"} 0
node_md_state{device="md7",state="recovering"} 0
node_md_state{device="md7",state="reshaping"} 0
node_md_state{device="md7",state="resync"} 0
node_md_state{device="md8",state="active"} 0
node_md_state{device="md8",state="check"} 0
node_md_state{device="md8",state="inactive"} 0
node_md_state{device="md8",state="recovering"} 0
node_md_state{device="md8",state="reshaping"} 0
node_md_state{device="md8",state="resync"} 1
node_md_state{device="md9",state="active"} 0
node_md_state{device="md9",state="check"} 0
node_md_state{device="md9",state="inactive"} 0
node_md_state{device="md9",state="recovering"} 0
node_md_state{device="md9",state="reshaping"} 0
node_md_state{device="md9",state="resync"} 1
# HELP node_memory_Active_anon_bytes Memory information field Active_anon_bytes.
# TYPE node_memory_Active_anon_bytes gauge
Expand Down
5 changes: 5 additions & 0 deletions collector/fixtures/proc/mdstat
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,9 @@ md120 : active linear sda1[1] sdb1[0]
md101 : active (read-only) raid0 sdb[2] sdd[1] sdc[0]
322560 blocks super 1.2 512k chunks

md42 : active raid5 sda1[3](S) sdd1[0] sde1[1]
1953381440 blocks super 1.2 level 5, 64k chunk, algorithm 2 [3/2] [UU_]
[===========>.........] reshape = 56.1% (1096879076/1953381440) finish=1868.1min speed=7640K/sec
bitmap: 4/15 pages [16KB], 65536KB chunk

unused devices: <none>
62 changes: 62 additions & 0 deletions collector/mdadm_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ var (
[]string{"device"},
prometheus.Labels{"state": "recovering"},
)
reshapingDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
[]string{"device"},
prometheus.Labels{"state": "reshaping"},
)
resyncDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "state"),
"Indicates the state of md-device.",
Expand Down Expand Up @@ -98,6 +104,31 @@ var (
[]string{"device"},
nil,
)

blocksToBeSyncedDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "blocks_tobesynced"),
"Number of blocks on the device that need to be synced.",
[]string{"device"},
nil,
)
blocksSyncedPctDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "blocks_synced_pct"),
"Percentage of blocks synced on device.",
[]string{"device"},
nil,
)
blocksSyncedFinishTimeDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "blocks_synced_speed"),
"Estimated finishing time for current sync.",
[]string{"device"},
nil,
)
blocksSyncedSpeedDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "md", "blocks_synced_speed"),
"Progress percentage of current sync.",
[]string{"device"},
nil,
)
)

func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
Expand Down Expand Up @@ -166,6 +197,13 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
mdStat.Name,
)

ch <- prometheus.MustNewConstMetric(
reshapingDesc,
prometheus.GaugeValue,
stateVals["reshaping"],
mdStat.Name,
)

ch <- prometheus.MustNewConstMetric(
recoveringDesc,
prometheus.GaugeValue,
Expand Down Expand Up @@ -199,6 +237,30 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
float64(mdStat.BlocksSynced),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksToBeSyncedDesc,
prometheus.GaugeValue,
float64(mdStat.BlocksToBeSynced),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksSyncedPctDesc,
prometheus.GaugeValue,
float64(mdStat.BlocksSyncedPct),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksSyncedFinishTimeDesc,
prometheus.GaugeValue,
float64(mdStat.BlocksSyncedFinishTime),
mdStat.Name,
)
ch <- prometheus.MustNewConstMetric(
blocksSyncedSpeedDesc,
prometheus.GaugeValue,
float64(mdStat.BlocksSyncedSpeed),
mdStat.Name,
)
}

return nil
Expand Down

0 comments on commit cca725b

Please sign in to comment.