Skip to content

Commit

Permalink
Merge pull request #7 from cycloidio/fl-watch
Browse files Browse the repository at this point in the history
watchdog: restart worker if failing
  • Loading branch information
talset authored Jul 31, 2020
2 parents fb0f13c + 1b91a3a commit 3d69a40
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 4 deletions.
2 changes: 1 addition & 1 deletion defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ concourse_group: "{{ concourse_user }}"
concourse_gid: "{{ concourse_uid }}"
concourse_force_restart: no
concourse_service_enabled: yes

concourse_service_watchdog_enabled: yes

# Concourse source variables

Expand Down
11 changes: 9 additions & 2 deletions tasks/install-worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,21 @@
dest: "{{ concourse_worker_launcher_path }}"
- src: concourse-retire-worker.j2
dest: "{{ concourse_retire_worker_path }}"
- src: concourse-worker-watchdog.j2
dest: "{{ concourse_install_dir }}/concourse-worker-watchdog"

- name: create worker service | concourse
template:
src: concourse-worker.service.j2
dest: /etc/systemd/system/concourse-worker.service
src: "{{ item['src'] }}"
dest: "{{ item['dest'] }}"
owner: root
force: yes
become: yes
become_user: root
with_items:
- src: concourse-worker.service.j2
dest: /etc/systemd/system/concourse-worker.service
- src: concourse-worker-watchdog.service.j2
dest: /etc/systemd/system/concourse-worker-watchdog.service
notify:
- restart concourse worker
14 changes: 14 additions & 0 deletions tasks/start.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
become: yes
when: concourse_worker

- name: configure worker watchdog service | concourse
service:
name: concourse-worker-watchdog
enabled: "{{ concourse_service_watchdog_enabled }}"
become: yes
when: concourse_worker

- name: start web service | concourse
service:
name: concourse-web
Expand All @@ -25,3 +32,10 @@
state: started
become: yes
when: concourse_worker and concourse_service_enabled

- name: start worker watchdog service | concourse
service:
name: concourse-worker-watchdog
state: started
become: yes
when: concourse_worker and concourse_service_enabled
2 changes: 1 addition & 1 deletion templates/concourse-retire-worker.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export {{ key }}="{{ value }}"
# If $1 PID of concourse worker is provided, do a kill instead of an api call
# Mostly used by systemd for concourse compatiility issues https://github.com/concourse/concourse/pull/3929

until ! curl --fail 127.0.0.1:7777/ping; do
until ! curl --silent --fail 127.0.0.1:7777/ping; do

if [[ -z "$1" ]]; then
{{ concourse_binary_path }} retire-worker \
Expand Down
32 changes: 32 additions & 0 deletions templates/concourse-worker-watchdog.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

watchdog() {
#WORKER_PID=$1

RETRY=3
while(true); do
FAIL=0

curl --silent 127.0.0.1:8888 || FAIL=1

#if [[ $FAIL -eq 0 ]]; then
if [[ $FAIL -eq 1 ]]; then
if [[ $RETRY -ne 0 ]]; then
echo "retry $RETRY"
((RETRY=RETRY-1))
else
echo "restart worker"
#/bin/systemd-notify --pid=$WORKER_PID "WATCHDOG=1";
/bin/systemctl restart concourse-worker
RETRY=3
fi
else
echo "watchdog: concourse-worker healthcheck ok"
#sleep 1
RETRY=3
fi
sleep 15
done
}

watchdog
15 changes: 15 additions & 0 deletions templates/concourse-worker-watchdog.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# {{ ansible_managed }}

[Unit]
Description=concourse-worker-watchdog
Requires=network-online.target
After=concourse-worker.service

[Service]
ExecStart={{ concourse_install_dir }}/concourse-worker-watchdog
ExecStop=/bin/kill $MAINPID
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure

[Install]
WantedBy=multi-user.target
1 change: 1 addition & 0 deletions templates/concourse-worker.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Description=concourse-worker
Requires=network-online.target
After=network-online.target
Before=concourse-worker-watchdog.service

[Service]
ExecStart={{ concourse_worker_launcher_path }}
Expand Down

0 comments on commit 3d69a40

Please sign in to comment.