diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c index de99d0c..36f0d34 100644 --- a/src/sbd-cluster.c +++ b/src/sbd-cluster.c @@ -51,7 +51,15 @@ static crm_cluster_t cluster; static gboolean sbd_remote_check(gpointer user_data); static long unsigned int find_pacemaker_remote(void); static void sbd_membership_destroy(gpointer user_data); +static bool wait_for_pacemaker_remote_lost = false; +static void signal_exitreq(void) +{ + union sigval signal_value; + pid_t ppid = getppid(); + + sigqueue(ppid, SIG_EXITREQ, signal_value); +} #if SUPPORT_PLUGIN static void @@ -459,6 +467,10 @@ sbd_remote_check(gpointer user_data) set_servant_health(pcmk_health_online, LOG_INFO, "Connected to Pacemaker Remote %lu", (long unsigned int)remoted_pid); } else { + if (wait_for_pacemaker_remote_lost) { + signal_exitreq(); + return true; + } set_servant_health(pcmk_health_unclean, LOG_WARNING, "Connection to Pacemaker Remote %lu lost", (long unsigned int)remoted_pid); } @@ -520,6 +532,16 @@ cluster_shutdown(int nsig) clean_up(0); } +static void +trigger_wait_for_pacemaker_remote_lost(int nsig) +{ + /* if we've never seen pacemaker_remoted request exit immeditely */ + if ((remoted_pid <= 0) || !remote_node) { + signal_exitreq(); + } + wait_for_pacemaker_remote_lost = true; +} + int servant_cluster(const char *diskname, int mode, const void* argp) { @@ -539,6 +561,7 @@ servant_cluster(const char *diskname, int mode, const void* argp) mainloop_add_signal(SIGTERM, cluster_shutdown); mainloop_add_signal(SIGINT, cluster_shutdown); + mainloop_add_signal(SIGUSR2, trigger_wait_for_pacemaker_remote_lost); g_main_run(mainloop); g_main_destroy(mainloop); diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c index 3991b3b..7c0ae00 100644 --- a/src/sbd-inquisitor.c +++ b/src/sbd-inquisitor.c @@ -177,14 +177,14 @@ void servants_start(void) } } -void servants_kill(void) +void servants_kill(int sig) { struct servants_list_item *s; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) - sigqueue(s->pid, SIGKILL, svalue); + sigqueue(s->pid, sig, svalue); } } @@ -465,7 +465,7 @@ void inquisitor_child(void) clock_gettime(CLOCK_MONOTONIC, &t_now); if (sig == SIG_EXITREQ || sig == SIGTERM) { - servants_kill(); + servants_kill(SIGKILL); watchdog_close(true); exiting = 1; } else if (sig == SIGCHLD) { @@ -523,6 +523,8 @@ void inquisitor_child(void) if (exiting) continue; servants_start(); + } else if (sig == SIGUSR2) { + servants_kill(SIGUSR2); } if (exiting) { @@ -631,7 +633,7 @@ void inquisitor_child(void) */ cl_log(LOG_DEBUG, "Decoupling"); if (inquisitor_decouple() < 0) { - servants_kill(); + servants_kill(SIGKILL); exiting = 1; continue; } else { @@ -647,7 +649,7 @@ void inquisitor_child(void) /* We're still being watched by our * parent. We don't fence, but exit. */ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up."); - servants_kill(); + servants_kill(SIGKILL); exiting = 1; continue; } diff --git a/src/sbd_remote.service.in b/src/sbd_remote.service.in index e05f80e..51f94a4 100644 --- a/src/sbd_remote.service.in +++ b/src/sbd_remote.service.in @@ -10,7 +10,7 @@ Type=forking PIDFile=@localstatedir@/run/sbd.pid EnvironmentFile=-@CONFIGDIR@/sbd ExecStart=@sbindir@/sbd $SBD_OPTS -p @localstatedir@/run/sbd.pid watch -ExecStop=@bindir@/kill -TERM $MAINPID +ExecStop=@bindir@/kill -USR2 $MAINPID # Could this benefit from exit codes for restart? # Does this need to be set to msgwait * 1.2?