Skip to content

Commit

Permalink
Add a node_is_leader service to check for the leader states
Browse files Browse the repository at this point in the history
It's possible to check for any kind of leader of specifically for a
standby leader.
  • Loading branch information
blogh committed Aug 23, 2023
1 parent 8f96c37 commit 68b571d
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Add a new service `cluster_has_scheduled_action` to warn of any scheduled switchover or restart.
* Add options to `node_is_replica` to check specifically for a synchronous (`--is-sync`) or asynchronous node (`--is-async`).
* Add `standby-leader` as a valid leader type for `cluster_has_leader`.
* Add a new service `node_is_leader` to check if a node is a leader (which includes standby leader nodes)

### Fixed

Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Commands:
cluster_is_in_maintenance Check if the cluster is in maintenance...
cluster_node_count Count the number of nodes in the cluster.
node_is_alive Check if the node is alive ie patroni is...
node_is_leader Check if the node is a leader node.
node_is_pending_restart Check if the node is in pending restart...
node_is_primary Check if the node is the primary with the...
node_is_replica Check if the node is a running replica...
Expand Down Expand Up @@ -350,13 +351,37 @@ Options:
--help Show this message and exit.
```
### node_is_leader
```
Usage: check_patroni node_is_leader [OPTIONS]

Check if the node is a leader node.

This check applies to any kind of leaders including standby leaders. To
check explicitly for a standby leader use the `--is-standby-leader` option.

Check:
* `OK`: if the node is a leader.
* `CRITICAL:` otherwise

Perfdata: `is_leader` is 1 if the node is a leader node, 0 otherwise.

Options:
--is-standby-leader Check for a standby leader
--help Show this message and exit.
```
### node_is_primary
```
Usage: check_patroni node_is_primary [OPTIONS]

Check if the node is the primary with the leader lock.

This service is not valid for a standby leader, because this kind of node is
not a primary.

Check:
* `OK`: if the node is a primary with the leader lock.
* `CRITICAL:` otherwise
Expand Down
36 changes: 36 additions & 0 deletions check_patroni/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from .node import (
NodeIsAlive,
NodeIsAliveSummary,
NodeIsLeader,
NodeIsLeaderSummary,
NodeIsPendingRestart,
NodeIsPendingRestartSummary,
NodeIsPrimary,
Expand Down Expand Up @@ -470,6 +472,8 @@ def cluster_has_scheduled_action(ctx: click.Context) -> None:
def node_is_primary(ctx: click.Context) -> None:
"""Check if the node is the primary with the leader lock.
This service is not valid for a standby leader, because this kind of node is not a primary.
\b
Check:
* `OK`: if the node is a primary with the leader lock.
Expand All @@ -486,6 +490,38 @@ def node_is_primary(ctx: click.Context) -> None:
check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout)


@main.command(name="node_is_leader")
@click.option(
"--is-standby-leader",
"check_standby_leader",
is_flag=True,
default=False,
help="Check for a standby leader",
)
@click.pass_context
@nagiosplugin.guarded
def node_is_leader(ctx: click.Context, check_standby_leader: bool) -> None:
"""Check if the node is a leader node.
This check applies to any kind of leaders including standby leaders.
To check explicitly for a standby leader use the `--is-standby-leader` option.
\b
Check:
* `OK`: if the node is a leader.
* `CRITICAL:` otherwise
Perfdata: `is_leader` is 1 if the node is a leader node, 0 otherwise.
"""
check = nagiosplugin.Check()
check.add(
NodeIsLeader(ctx.obj.connection_info, check_standby_leader),
nagiosplugin.ScalarContext("is_leader", None, "@0:0"),
NodeIsLeaderSummary(check_standby_leader),
)
check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout)


@main.command(name="node_is_replica")
@click.option("--max-lag", "max_lag", type=str, help="maximum allowed lag")
@click.option(
Expand Down
39 changes: 39 additions & 0 deletions check_patroni/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,45 @@ def problem(self: "NodeIsPrimarySummary", results: nagiosplugin.Result) -> str:
return "This node is not the primary with the leader lock."


class NodeIsLeader(PatroniResource):
def __init__(
self: "NodeIsLeader",
connection_info: ConnectionInfo,
check_is_standby_leader: bool,
) -> None:
super().__init__(connection_info)
self.check_is_standby_leader = check_is_standby_leader

def probe(self: "NodeIsLeader") -> Iterable[nagiosplugin.Metric]:
apiname = "leader"
if self.check_is_standby_leader:
apiname = "standby-leader"

try:
self.rest_api(apiname)
except APIError:
return [nagiosplugin.Metric("is_leader", 0)]
return [nagiosplugin.Metric("is_leader", 1)]


class NodeIsLeaderSummary(nagiosplugin.Summary):
def __init__(
self: "NodeIsLeaderSummary",
check_is_standby_leader: bool,
) -> None:
if check_is_standby_leader:
self.leader_kind = "standby leader"
else:
self.leader_kind = "leader"

def ok(self: "NodeIsLeaderSummary", results: nagiosplugin.Result) -> str:
return f"This node is a {self.leader_kind} node."

@handle_unknown
def problem(self: "NodeIsLeaderSummary", results: nagiosplugin.Result) -> str:
return f"This node is not a {self.leader_kind} node."


class NodeIsReplica(PatroniResource):
def __init__(
self: "NodeIsReplica",
Expand Down
2 changes: 2 additions & 0 deletions docs/make_readme.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ readme "### node_is_alive"
helpme node_is_alive
readme "### node_is_pending_restart"
helpme node_is_pending_restart
readme "### node_is_leader"
helpme node_is_leader
readme "### node_is_primary"
helpme node_is_primary
readme "### node_is_replica"
Expand Down
26 changes: 26 additions & 0 deletions tests/json/node_is_leader_ko.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"state": "running",
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
"role": "master",
"server_version": 110012,
"cluster_unlocked": false,
"xlog": {
"location": 1174407088
},
"timeline": 58,
"replication": [
{
"usename": "replicator",
"application_name": "srv1",
"client_addr": "10.20.199.3",
"state": "streaming",
"sync_state": "async",
"sync_priority": 0
}
],
"database_system_identifier": "6965971025273547206",
"patroni": {
"version": "2.0.2",
"scope": "patroni-demo"
}
}
19 changes: 19 additions & 0 deletions tests/json/node_is_leader_ko_standby_leader.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"state": "running",
"postmaster_start_time": "2023-08-23 14:30:50.201691+00:00",
"role": "standby_leader",
"server_version": 140009,
"xlog": {
"received_location": 889192448,
"replayed_location": 889192448,
"replayed_timestamp": null,
"paused": false
},
"timeline": 1,
"dcs_last_seen": 1692805971,
"database_system_identifier": "7270495803765492571",
"patroni": {
"version": "3.1.0",
"scope": "patroni-demo-sb"
}
}
26 changes: 26 additions & 0 deletions tests/json/node_is_leader_ok.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"state": "running",
"postmaster_start_time": "2021-08-11 07:02:20.732 UTC",
"role": "master",
"server_version": 110012,
"cluster_unlocked": false,
"xlog": {
"location": 1174407088
},
"timeline": 58,
"replication": [
{
"usename": "replicator",
"application_name": "srv1",
"client_addr": "10.20.199.3",
"state": "streaming",
"sync_state": "async",
"sync_priority": 0
}
],
"database_system_identifier": "6965971025273547206",
"patroni": {
"version": "2.0.2",
"scope": "patroni-demo"
}
}
19 changes: 19 additions & 0 deletions tests/json/node_is_leader_ok_standby_leader.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"state": "running",
"postmaster_start_time": "2023-08-23 14:30:50.201691+00:00",
"role": "standby_leader",
"server_version": 140009,
"xlog": {
"received_location": 889192448,
"replayed_location": 889192448,
"replayed_timestamp": null,
"paused": false
},
"timeline": 1,
"dcs_last_seen": 1692805971,
"database_system_identifier": "7270495803765492571",
"patroni": {
"version": "3.1.0",
"scope": "patroni-demo-sb"
}
}
53 changes: 53 additions & 0 deletions tests/test_node_is_leader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from click.testing import CliRunner
from pytest_mock import MockerFixture

from check_patroni.cli import main

from .tools import my_mock


def test_node_is_leader_ok(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()

my_mock(mocker, "node_is_leader_ok", 200)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_leader"])
assert result.exit_code == 0
assert (
result.stdout
== "NODEISLEADER OK - This node is a leader node. | is_leader=1;;@0\n"
)

my_mock(mocker, "node_is_leader_ok_standby_leader", 200)
result = runner.invoke(
main,
["-e", "https://10.20.199.3:8008", "node_is_leader", "--is-standby-leader"],
)
print(result.stdout)
assert result.exit_code == 0
assert (
result.stdout
== "NODEISLEADER OK - This node is a standby leader node. | is_leader=1;;@0\n"
)


def test_node_is_leader_ko(mocker: MockerFixture, use_old_replica_state: bool) -> None:
runner = CliRunner()

my_mock(mocker, "node_is_leader_ko", 503)
result = runner.invoke(main, ["-e", "https://10.20.199.3:8008", "node_is_leader"])
assert result.exit_code == 2
assert (
result.stdout
== "NODEISLEADER CRITICAL - This node is not a leader node. | is_leader=0;;@0\n"
)

my_mock(mocker, "node_is_leader_ko_standby_leader", 503)
result = runner.invoke(
main,
["-e", "https://10.20.199.3:8008", "node_is_leader", "--is-standby-leader"],
)
assert result.exit_code == 2
assert (
result.stdout
== "NODEISLEADER CRITICAL - This node is not a standby leader node. | is_leader=0;;@0\n"
)

0 comments on commit 68b571d

Please sign in to comment.