From 1605fd990a89e66ec31cd6b63c79745e955c450f Mon Sep 17 00:00:00 2001 From: benoit Date: Tue, 15 Oct 2024 11:38:00 +0200 Subject: [PATCH] cluster_has_replica: account for standby leaders Before only leader where taken into account. Fix #72 --- CHANGELOG.md | 4 +++ check_patroni/cluster.py | 2 +- ...luster_has_replica_standby_cluster_ok.json | 33 +++++++++++++++++++ tests/test_cluster_has_replica.py | 25 ++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 tests/json/cluster_has_replica_standby_cluster_ok.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 50bf9f6..451b949 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +### Fixed + +* cluster_has_replica now properly accounts for standby leaders (#72, reported by @MLyssens) + ### Misc * Update the tests and the documentation to reflect that master is replaced by diff --git a/check_patroni/cluster.py b/check_patroni/cluster.py index 4598300..896203c 100644 --- a/check_patroni/cluster.py +++ b/check_patroni/cluster.py @@ -180,7 +180,7 @@ def debug_member(member: Any, health: str) -> None: # members because leader_tl will remain None. it's not # a big deal since having no leader is rare. for tmember in cluster_item_dict["members"]: - if tmember["role"] == "leader": + if tmember["role"] in ["leader", "standby_leader"]: leader_tl = int(tmember["timeline"]) break diff --git a/tests/json/cluster_has_replica_standby_cluster_ok.json b/tests/json/cluster_has_replica_standby_cluster_ok.json new file mode 100644 index 0000000..03ee45f --- /dev/null +++ b/tests/json/cluster_has_replica_standby_cluster_ok.json @@ -0,0 +1,33 @@ +{ + "members": [ + { + "name": "srv1", + "role": "standby_leader", + "state": "running", + "api_url": "https://10.20.199.3:8008/patroni", + "host": "10.20.199.3", + "port": 5432, + "timeline": 51 + }, + { + "name": "srv2", + "role": "replica", + "state": "in archive recovery", + "api_url": "https://10.20.199.4:8008/patroni", + "host": "10.20.199.4", + "port": 5432, + "timeline": 51, + "lag": 0 + }, + { + "name": "srv3", + "role": "sync_standby", + "state": "streaming", + "api_url": "https://10.20.199.5:8008/patroni", + "host": "10.20.199.5", + "port": 5432, + "timeline": 51, + "lag": 0 + } + ] +} diff --git a/tests/test_cluster_has_replica.py b/tests/test_cluster_has_replica.py index a6a88c0..78e809b 100644 --- a/tests/test_cluster_has_replica.py +++ b/tests/test_cluster_has_replica.py @@ -114,6 +114,31 @@ def test_cluster_has_replica_ok_with_count_thresholds_lag( assert result.exit_code == 0 +@pytest.fixture +def cluster_has_replica_standby_cluster_ok( + patroni_api: PatroniAPI, old_replica_state: bool, datadir: Path, tmp_path: Path +) -> Iterator[None]: + cluster_path: Union[str, Path] = "cluster_has_replica_standby_cluster_ok.json" + patroni_path = "cluster_has_replica_patroni_verion_3.1.0.json" + if old_replica_state: + cluster_path = cluster_api_set_replica_running(datadir / cluster_path, tmp_path) + patroni_path = "cluster_has_replica_patroni_verion_3.0.0.json" + with patroni_api.routes({"cluster": cluster_path, "patroni": patroni_path}): + yield None + + +@pytest.mark.usefixtures("cluster_has_replica_standby_cluster_ok") +def test_cluster_has_relica_standby_cluster_ok( + runner: CliRunner, patroni_api: PatroniAPI +) -> None: + result = runner.invoke(main, ["-e", patroni_api.endpoint, "cluster_has_replica"]) + assert ( + result.stdout + == "CLUSTERHASREPLICA OK - healthy_replica is 2 | healthy_replica=2 srv2_lag=0 srv2_sync=0 srv2_timeline=51 srv3_lag=0 srv3_sync=1 srv3_timeline=51 sync_replica=1 unhealthy_replica=0\n" + ) + assert result.exit_code == 0 + + @pytest.fixture def cluster_has_replica_ko( patroni_api: PatroniAPI, old_replica_state: bool, datadir: Path, tmp_path: Path