-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make startup of services more-robust in the face of PostgreSQL not be…
…ing ready. #1393
- Loading branch information
1 parent
b28f852
commit 271f8b7
Showing
7 changed files
with
187 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
pscheduler-server/pscheduler-server/daemons/service-template.m4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
changequote(<<<,>>>)dnl | ||
changecom()dnl | ||
# | ||
# Systemd unit for __PROG__ | ||
# | ||
# Systemd version __SYSTEMD_VERSION__ was installed at build time. | ||
# | ||
# | ||
# Version of systemd installed by distros we support: | ||
# | ||
# EL7 229 | ||
# EL8 239 | ||
# EL9 252 | ||
# D10 241 | ||
# D11 247 | ||
# D12 252 | ||
# U20 245 | ||
# U22 249 | ||
|
||
[Unit] | ||
Description=pScheduler Server - __PROG__ | ||
# This forces starting and stopping in concert | ||
PartOf=__PGSERVICE__ | ||
After=__PGSERVICE__ | ||
Wants=__PGSERVICE__ | ||
|
||
[Service] | ||
# Systemd 240 added exec, which is better. | ||
Type=ifelse(eval(__SYSTEMD_VERSION__ < 240),1,simple,exec) | ||
|
||
User=__PSUSER__ | ||
Group=__PSUSER__ | ||
|
||
PermissionsStartOnly=true | ||
LimitNOFILE=32768 | ||
LimitNPROC=32768 | ||
|
||
Restart=always | ||
RestartSec=15 | ||
|
||
# This is slightly longer than the database check below so failures | ||
# will be more apparent than just a timeout. | ||
TimeoutStartSec=130 | ||
|
||
# Wait for the database to become accessible. This is done because | ||
# the PostgreSQL service can appear up when it isn't ready to take | ||
# queries yet. That will cause this service to die. | ||
ExecStartPre=__DAEMONDIR__/wait-for-database --dsn @__DSN__ --dwell 120 --retry 5 | ||
|
||
# Create the run directory | ||
ExecStartPre=/bin/mkdir -p __RUNDIR__/__PROG__ | ||
ExecStartPre=/bin/chmod 755 __RUNDIR__/__PROG__ | ||
|
||
# Set up some temporary space and export its location | ||
ExecStartPre=/bin/mkdir -p __RUNDIR__/__PROG__/tmp | ||
ExecStartPre=/bin/chmod 700 __RUNDIR__/__PROG__/tmp | ||
Environment=TMPDIR=__RUNDIR__/__PROG__/tmp | ||
|
||
# Set ownership | ||
ExecStartPre=/bin/chown -R __PSUSER__:__PSUSER__ __RUNDIR__/__PROG__ | ||
|
||
# Generate options file | ||
ExecStartPre=/bin/sh -c "if [ -r __CONFIGDIR__/__PROG__.conf ]; then opts=$(sed -e 's/#.*$//' __CONFIGDIR__/__PROG__.conf); echo OPTIONS=$opts > __RUNDIR__/__PROG__/options; chown __PSUSER__:__PSUSER__ __RUNDIR__/__PROG__/options; fi" | ||
|
||
# Redirections | ||
StandardOutput=journal | ||
StandardError=journal | ||
|
||
# Start service | ||
EnvironmentFile=-__RUNDIR__/__PROG__/options | ||
ExecStart=__DAEMONDIR__/__PROG__ --dsn @__DSN__ $OPTIONS | ||
|
||
# Stop service | ||
ExecStopPost=/bin/rm -rf __RUNDIR__/__PROG__ | ||
|
||
[Install] | ||
WantedBy=multi-user.target |
44 changes: 0 additions & 44 deletions
44
pscheduler-server/pscheduler-server/daemons/service-template.raw
This file was deleted.
Oops, something went wrong.
86 changes: 86 additions & 0 deletions
86
pscheduler-server/pscheduler-server/daemons/wait-for-database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# Check that the database is available for connections and working. | ||
# | ||
|
||
import datetime | ||
import optparse | ||
import pscheduler | ||
import psycopg2 | ||
import sys | ||
import time | ||
|
||
pscheduler.set_graceful_exit() | ||
|
||
# | ||
# Gargle the arguments | ||
# | ||
|
||
opt_parser = optparse.OptionParser() | ||
|
||
# Program options | ||
|
||
opt_parser.add_option("--dsn", | ||
help="Database connection string, prefix with @ to read from file", | ||
action="store", type="string", dest="dsn", | ||
default="") | ||
opt_parser.add_option("--dwell", | ||
help="How long to keep trying to connect (seconds)", | ||
action="store", type="int", dest="dwell", | ||
default=60) | ||
opt_parser.add_option("--retry", | ||
help="How often to try connecting (seconds)", | ||
action="store", type="int", dest="retry", | ||
default=2) | ||
|
||
(options, args) = opt_parser.parse_args() | ||
|
||
|
||
def try_database(dsn): | ||
""" | ||
Attempt to connect to the database and use it, returning a tuple | ||
of True/False if successful/failed and an error message. | ||
""" | ||
db = None | ||
try: | ||
db = pscheduler.PgConnection(dsn, name="check-database") | ||
rows = list(db.query('SELECT 12345')) | ||
except Exception as ex: | ||
return (False, str(ex)) | ||
finally: | ||
if db is not None: | ||
db.close() | ||
|
||
if (len(rows) != 1) or (rows[0] != (12345,)): | ||
return(False, 'Got unexpected data: %s' % (rows)) | ||
|
||
return (True, 'OK') | ||
|
||
|
||
attempts = 0 | ||
dwell_until = pscheduler.time_now() + datetime.timedelta(seconds=options.dwell) | ||
|
||
# This will be overwritten in the loop below. | ||
reason = 'Never tried to connect' | ||
|
||
while pscheduler.time_now() < dwell_until: | ||
|
||
status, reason = try_database(options.dsn) | ||
|
||
if status: | ||
print('Successfully connected to the database.', file=sys.stderr) | ||
exit(0) | ||
|
||
attempts += 1 | ||
if attempts == 1: | ||
print('Failed first attempt connecting to the database:', reason, file=sys.stderr) | ||
|
||
time.sleep(options.retry) | ||
|
||
|
||
# No dice. | ||
|
||
print('Unable to connect within the dwell time. Last error:\n', | ||
reason, file=sys.stderr) | ||
|
||
exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters