-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Expand metrics on pg_stat_replication to include lag expressed as time. #115
Open
ahjmorton
wants to merge
10
commits into
rnaveiras:main
Choose a base branch
from
ahjmorton:add-replication-lag-time
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
f70335a
Add additional metrics for replication lag as time
50baa5f
Add new metric documentation
103ac73
Correct date on change log
458791c
Drop Postgres 9 support from stat_replication
4aaf535
Re-work standby side metrics to be based around the `pg_stat_wal_rece…
bac46c7
Add replay bytes as well as seconds
f5c3d70
Have WAL sender lag default to zero
242f796
Merge branch 'main' into add-replication-lag-time
rnaveiras cbcc86c
Merge branch 'main' into add-replication-lag-time
rnaveiras 9cc3974
Merge branch 'main' into add-replication-lag-time
rnaveiras File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package collector | ||
|
||
import ( | ||
"context" | ||
|
||
pgx "github.com/jackc/pgx/v4" | ||
"github.com/prometheus/client_golang/prometheus" | ||
) | ||
|
||
/* When pg_basebackup is running in stream mode, it opens a second connection | ||
to the server and starts streaming the transaction log in parallel while | ||
running the backup. In both connections (state=backup and state=streaming) the | ||
pg_log_location_diff is null and it requires to be excluded */ | ||
const ( | ||
// Scrape query | ||
statWalReceiver = ` | ||
WITH pg_wal_receiver AS ( | ||
SELECT status | ||
, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::float | ||
as postgres_wal_receiver_replay_bytes | ||
, ( | ||
CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() | ||
THEN 0 | ||
ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()) | ||
END | ||
) as postgres_wal_receiver_replay_lag | ||
FROM pg_stat_wal_receiver | ||
) | ||
SELECT * FROM pg_wal_receiver WHERE postgres_wal_receiver_replay_lag IS NOT NULL /*postgres_exporter*/` | ||
) | ||
|
||
type statWalReceiverScraper struct { | ||
walReceiverReplayBytes *prometheus.Desc | ||
walReceiverReplayLag *prometheus.Desc | ||
} | ||
|
||
// NewStatWalReceiverScraper returns a new Scraper exposing postgres pg_stat_replication | ||
func NewWalReceiverScraper() Scraper { | ||
return &statWalReceiverScraper{ | ||
walReceiverReplayBytes: prometheus.NewDesc( | ||
"postgres_wal_receiver_replay_lag_bytes", | ||
"delay in standby wal replay bytes pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::float", | ||
[]string{"status"}, | ||
nil, | ||
), | ||
walReceiverReplayLag: prometheus.NewDesc( | ||
"postgres_wal_receiver_replay_lag_seconds", | ||
"delay in standby wal replay seconds EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()", | ||
[]string{"status"}, | ||
nil, | ||
), | ||
} | ||
} | ||
|
||
func (c *statWalReceiverScraper) Name() string { | ||
return "StatWalReceiverScraperr" | ||
} | ||
|
||
func (c *statWalReceiverScraper) Scrape(ctx context.Context, conn *pgx.Conn, version Version, ch chan<- prometheus.Metric) error { | ||
var rows pgx.Rows | ||
var err error | ||
|
||
rows, err = conn.Query(ctx, statWalReceiver) | ||
|
||
if err != nil { | ||
return err | ||
} | ||
defer rows.Close() | ||
|
||
var status string | ||
var pgWalReceiverReplayBytes, pgWalReceiverReplayLag float64 | ||
|
||
for rows.Next() { | ||
|
||
if err := rows.Scan(&status, | ||
&pgWalReceiverReplayBytes, | ||
&pgWalReceiverReplayLag); err != nil { | ||
|
||
return err | ||
} | ||
// postgres_wal_receiver_replay_lag_bytes | ||
ch <- prometheus.MustNewConstMetric(c.walReceiverReplayBytes, | ||
prometheus.GaugeValue, | ||
pgWalReceiverReplayBytes, | ||
status) | ||
// postgres_wal_receiver_replay_lag_seconds | ||
ch <- prometheus.MustNewConstMetric(c.walReceiverReplayLag, | ||
prometheus.GaugeValue, | ||
pgWalReceiverReplayLag, | ||
status) | ||
} | ||
|
||
err = rows.Err() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is
pg_log_location_diff
referenced? We aren't using it anywhere in this code, are we? Maybe a third party library? It is definitely getting queried though. I tested the exporter with a streaming replication setup on my machine and I saw the following errors in the logs of the primary PostgreSQL instance:Even Google has no clue what
pg_log_location_diff
is!!