diff --git a/defaults/main.yml b/defaults/main.yml index 2caa8d6..e8d73d6 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -17,7 +17,7 @@ concourse_group: "{{ concourse_user }}" concourse_gid: "{{ concourse_uid }}" concourse_force_restart: no concourse_service_enabled: yes - +concourse_service_watchdog_enabled: yes # Concourse source variables diff --git a/tasks/install-worker.yml b/tasks/install-worker.yml index 01e31c8..70fa48f 100644 --- a/tasks/install-worker.yml +++ b/tasks/install-worker.yml @@ -24,14 +24,21 @@ dest: "{{ concourse_worker_launcher_path }}" - src: concourse-retire-worker.j2 dest: "{{ concourse_retire_worker_path }}" + - src: concourse-worker-watchdog.j2 + dest: "{{ concourse_install_dir }}/concourse-worker-watchdog" - name: create worker service | concourse template: - src: concourse-worker.service.j2 - dest: /etc/systemd/system/concourse-worker.service + src: "{{ item['src'] }}" + dest: "{{ item['dest'] }}" owner: root force: yes become: yes become_user: root + with_items: + - src: concourse-worker.service.j2 + dest: /etc/systemd/system/concourse-worker.service + - src: concourse-worker-watchdog.service.j2 + dest: /etc/systemd/system/concourse-worker-watchdog.service notify: - restart concourse worker diff --git a/tasks/start.yml b/tasks/start.yml index d9c29df..3481b5f 100644 --- a/tasks/start.yml +++ b/tasks/start.yml @@ -12,6 +12,13 @@ become: yes when: concourse_worker +- name: configure worker watchdog service | concourse + service: + name: concourse-worker-watchdog + enabled: "{{ concourse_service_watchdog_enabled }}" + become: yes + when: concourse_worker + - name: start web service | concourse service: name: concourse-web @@ -25,3 +32,10 @@ state: started become: yes when: concourse_worker and concourse_service_enabled + +- name: start worker watchdog service | concourse + service: + name: concourse-worker-watchdog + state: started + become: yes + when: concourse_worker and concourse_service_enabled diff --git a/templates/concourse-retire-worker.j2 b/templates/concourse-retire-worker.j2 index 59509d4..e4ede41 100644 --- a/templates/concourse-retire-worker.j2 +++ b/templates/concourse-retire-worker.j2 @@ -9,7 +9,7 @@ export {{ key }}="{{ value }}" # If $1 PID of concourse worker is provided, do a kill instead of an api call # Mostly used by systemd for concourse compatiility issues https://github.com/concourse/concourse/pull/3929 -until ! curl --fail 127.0.0.1:7777/ping; do +until ! curl --silent --fail 127.0.0.1:7777/ping; do if [[ -z "$1" ]]; then {{ concourse_binary_path }} retire-worker \ diff --git a/templates/concourse-worker-watchdog.j2 b/templates/concourse-worker-watchdog.j2 new file mode 100644 index 0000000..7a9a020 --- /dev/null +++ b/templates/concourse-worker-watchdog.j2 @@ -0,0 +1,32 @@ +#!/bin/bash + +watchdog() { + #WORKER_PID=$1 + + RETRY=3 + while(true); do + FAIL=0 + + curl --silent 127.0.0.1:8888 || FAIL=1 + + #if [[ $FAIL -eq 0 ]]; then + if [[ $FAIL -eq 1 ]]; then + if [[ $RETRY -ne 0 ]]; then + echo "retry $RETRY" + ((RETRY=RETRY-1)) + else + echo "restart worker" + #/bin/systemd-notify --pid=$WORKER_PID "WATCHDOG=1"; + /bin/systemctl restart concourse-worker + RETRY=3 + fi + else + echo "watchdog: concourse-worker healthcheck ok" + #sleep 1 + RETRY=3 + fi + sleep 15 + done +} + +watchdog diff --git a/templates/concourse-worker-watchdog.service.j2 b/templates/concourse-worker-watchdog.service.j2 new file mode 100644 index 0000000..e79483b --- /dev/null +++ b/templates/concourse-worker-watchdog.service.j2 @@ -0,0 +1,15 @@ +# {{ ansible_managed }} + +[Unit] +Description=concourse-worker-watchdog +Requires=network-online.target +After=concourse-worker.service + +[Service] +ExecStart={{ concourse_install_dir }}/concourse-worker-watchdog +ExecStop=/bin/kill $MAINPID +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/templates/concourse-worker.service.j2 b/templates/concourse-worker.service.j2 index 3d049db..8aff373 100644 --- a/templates/concourse-worker.service.j2 +++ b/templates/concourse-worker.service.j2 @@ -4,6 +4,7 @@ Description=concourse-worker Requires=network-online.target After=network-online.target +Before=concourse-worker-watchdog.service [Service] ExecStart={{ concourse_worker_launcher_path }}