Skip to content

Commit

Permalink
fixup! CP-49634: Add alerting for Corosync upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent-lau committed Jul 11, 2024
1 parent 3263b67 commit 08ad258
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions ocaml/xapi/xapi_clustering.ml
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ module Watcher = struct
is an update *)
let cluster_change_interval = Mtime.Span.min

let cluster_stack_watcher : bool Atomic.t = Atomic.make false

(* we handle unclean hosts join and leave in the watcher, i.e. hosts joining and leaving
due to network problems, power cut, etc. Join and leave initiated by the
API will be handled in the API call themselves, but they share the same code
Expand Down Expand Up @@ -574,8 +576,8 @@ module Watcher = struct
Atomic.set cluster_change_watcher false

let watch_cluster_stack_version ~__context ~host =
while !Daemon.enabled do
( match find_cluster_host ~__context ~host with
if !Daemon.enabled then
match find_cluster_host ~__context ~host with
| Some ch ->
let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self:ch in
let cluster_rec =
Expand Down Expand Up @@ -605,20 +607,13 @@ module Watcher = struct
)
| None ->
debug "%s: No cluster host, no need to watch" __FUNCTION__
) ;
Ptime.Span.of_d_ps (7, 0L)
|> Option.get
|> Ptime.Span.to_float_s
|> Thread.delay
done ;
Mutex.execute mu (fun () -> cluster_stack_watcher := None)

(** [create_as_necessary] will create cluster watchers on the coordinator if they are not
already created.
There is no need to destroy them: once the clustering daemon is disabled,
these threads will exit as well. *)
let create_as_necessary ~__context ~host =
if Helpers.is_pool_master ~__context ~host then
if Helpers.is_pool_master ~__context ~host then (
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
if Atomic.compare_and_set cluster_change_watcher false true then (
debug "%s: create watcher for corosync-notifyd on coordinator"
Expand All @@ -630,5 +625,20 @@ module Watcher = struct
before us, leave it to them *)
debug
"%s: not create watcher for corosync-notifyd as it already exists"
__FUNCTION__ ;

if Xapi_cluster_helpers.corosync3_enabled ~__context then
if Atomic.compare_and_set cluster_stack_watcher false true then (
debug
"%s: create cluster stack watcher for out-of-date cluster stack \
(corosync2)"
__FUNCTION__ ;
ignore
@@ Thread.create
(fun () -> watch_cluster_stack_version ~__context ~host)
()
) else
debug "%s: not create watcher for cluster stack as it already exists"
__FUNCTION__
)
end

0 comments on commit 08ad258

Please sign in to comment.