Skip to content

Commit

Permalink
concourse-worker: add watchdog process
Browse files Browse the repository at this point in the history
  • Loading branch information
Steve Durrheimer committed May 29, 2020
1 parent fb0f13c commit ec468d4
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 0 deletions.
2 changes: 2 additions & 0 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ concourse_worker: no
concourse_worker_name: "{{ ansible_hostname }}"
concourse_worker_launcher_path: "{{ concourse_install_dir }}/concourse-worker"
concourse_retire_worker_path: "{{ concourse_install_dir }}/concourse-retire-worker"
concourse_worker_watchdog_sec: 5
concourse_worker_watchdog_path: "{{ concourse_install_dir }}/concourse-worker-watchdog"
concourse_work_dir: "{{ concourse_install_dir }}/work"
concourse_tsa_public_key_path: "{{ concourse_install_dir }}/host_key.pub"
concourse_tsa_worker_key_path: "{{ concourse_install_dir }}/worker_key"
Expand Down
2 changes: 2 additions & 0 deletions tasks/install-worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
dest: "{{ concourse_worker_launcher_path }}"
- src: concourse-retire-worker.j2
dest: "{{ concourse_retire_worker_path }}"
- src: concourse-worker-watchdog.j2
dest: "{{ concourse_worker_watchdog_path }}"

- name: create worker service | concourse
template:
Expand Down
17 changes: 17 additions & 0 deletions templates/concourse-worker-watchdog.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# {{ ansible_managed }}

while(true); do
FAIL=0

curl --silent 127.0.0.1:8888 || FAIL=1

if [[ $FAIL -eq 0 ]]; then
/bin/systemd-notify "WATCHDOG=1";
sleep $(({{ concourse_worker_watchdog_sec * 1000000 }} / 2000000))
else
echo "> $0: FAILURE"
sleep 1
fi
done
10 changes: 10 additions & 0 deletions templates/concourse-worker.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,15 @@ TasksMax=infinity
Delegate=yes
KillMode=process

## Watchdog
WatchdogSec={{ concourse_worker_watchdog_sec }}
NotifyAccess=all
ExecStartPost={{ concourse_worker_watchdog_path }}
# If there is `StartLimitBurst` failed restart attempt
# within `StartLimitInterval` then force poweroff
StartLimitInterval=5min
StartLimitBurst=4
StartLimitAction=poweroff-force

[Install]
WantedBy=multi-user.target

0 comments on commit ec468d4

Please sign in to comment.