From a5e6c89d8be8ede0b1306dcd46d68a75f712a506 Mon Sep 17 00:00:00 2001 From: Katharina Przybill <30441792+kathap@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:44:48 +0100 Subject: [PATCH 1/3] Draining CC-api VMs should let local-worker jobs finish --- jobs/cloud_controller_ng/spec | 3 +++ jobs/cloud_controller_ng/templates/drain.sh.erb | 4 ---- jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb | 7 +++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/jobs/cloud_controller_ng/spec b/jobs/cloud_controller_ng/spec index 46fc28ab07..452070ad5f 100644 --- a/jobs/cloud_controller_ng/spec +++ b/jobs/cloud_controller_ng/spec @@ -948,6 +948,9 @@ properties: cc.jobs.local.number_of_workers: default: 2 description: "Number of local cloud_controller_worker workers" + cc.jobs.local.local_worker_grace_period_seconds: + default: 300 + description: "The number of seconds to wait for each local cloud_controller_worker worker process to finish processing jobs before forcefully shutting it down" cc.thresholds.api.alert_if_above_mb: description: "The cc will alert if memory remains above this threshold for 3 monit cycles" diff --git a/jobs/cloud_controller_ng/templates/drain.sh.erb b/jobs/cloud_controller_ng/templates/drain.sh.erb index 11cd4c0b76..f2f1edc2b7 100755 --- a/jobs/cloud_controller_ng/templates/drain.sh.erb +++ b/jobs/cloud_controller_ng/templates/drain.sh.erb @@ -1,9 +1,5 @@ #!/usr/bin/env bash -for i in {1..<%=p("cc.jobs.local.number_of_workers")%>}; do - /var/vcap/jobs/bpm/bin/bpm stop cloud_controller_ng -p "local_worker_${i}" 1>&2 -done - /var/vcap/jobs/cloud_controller_ng/bin/shutdown_drain 1>&2 echo 0 # tell bosh not wait for anything diff --git a/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb b/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb index 77d60cc85b..5fd3cef2de 100644 --- a/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb +++ b/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb @@ -8,3 +8,10 @@ require 'cloud_controller/drain' @drain = VCAP::CloudController::Drain.new('/var/vcap/sys/log/cloud_controller_ng') @drain.shutdown_nginx('/var/vcap/sys/run/bpm/cloud_controller_ng/nginx.pid', <%= p("cc.nginx_drain_timeout") %>) @drain.shutdown_cc('/var/vcap/sys/run/bpm/cloud_controller_ng/cloud_controller_ng.pid') +@threads = [] +@grace_period_in_seconds = <%= p("cc.jobs.local.local_worker_grace_period_seconds") %> +(1..<%= p("cc.jobs.local.number_of_workers") %>).each do |i| + @threads << Thread.new { @drain.shutdown_delayed_worker("/var/vcap/sys/run/bpm/cloud_controller_ng/local_worker_#{i}.pid", @grace_period_in_seconds.to_i) } +end + +@threads.each(&:join) From ae69a9edc967f749d9d703a8a339e7df009beedc Mon Sep 17 00:00:00 2001 From: Katharina Przybill <30441792+kathap@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:17:34 +0100 Subject: [PATCH 2/3] add test --- jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb | 4 ++-- spec/cloud_controller_ng/drain_spec.rb | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb b/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb index 5fd3cef2de..aa7c6eab86 100644 --- a/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb +++ b/jobs/cloud_controller_ng/templates/shutdown_drain.rb.erb @@ -9,9 +9,9 @@ require 'cloud_controller/drain' @drain.shutdown_nginx('/var/vcap/sys/run/bpm/cloud_controller_ng/nginx.pid', <%= p("cc.nginx_drain_timeout") %>) @drain.shutdown_cc('/var/vcap/sys/run/bpm/cloud_controller_ng/cloud_controller_ng.pid') @threads = [] -@grace_period_in_seconds = <%= p("cc.jobs.local.local_worker_grace_period_seconds") %> +@local_worker_grace_period_seconds = <%= p("cc.jobs.local.local_worker_grace_period_seconds") %> (1..<%= p("cc.jobs.local.number_of_workers") %>).each do |i| - @threads << Thread.new { @drain.shutdown_delayed_worker("/var/vcap/sys/run/bpm/cloud_controller_ng/local_worker_#{i}.pid", @grace_period_in_seconds.to_i) } + @threads << Thread.new { @drain.shutdown_delayed_worker("/var/vcap/sys/run/bpm/cloud_controller_ng/local_worker_#{i}.pid", @local_worker_grace_period_seconds.to_i) } end @threads.each(&:join) diff --git a/spec/cloud_controller_ng/drain_spec.rb b/spec/cloud_controller_ng/drain_spec.rb index 6bffff1cfd..520f5af449 100644 --- a/spec/cloud_controller_ng/drain_spec.rb +++ b/spec/cloud_controller_ng/drain_spec.rb @@ -19,6 +19,13 @@ module Test expect(rendered_file).to include("@drain.shutdown_nginx('/var/vcap/sys/run/bpm/cloud_controller_ng/nginx.pid', 30)") end + context "when 'local_worker_grace_period_seconds' is provided" do + it 'renders the provided value' do + rendered_file = template.render({ 'cc' => { 'jobs' => { 'local' => { 'worker_grace_period_seconds' => 300 } } } }, consumes: {}) + expect(rendered_file).to include('@local_worker_grace_period_seconds = 300') + end + end + context 'when nginx timeout is provided' do it 'renders the provided value' do rendered_file = template.render({ 'cc' => { 'nginx_drain_timeout' => 60 } }, consumes: {}) From bbf1da160d50a125d95d9d04769c47118e1f9ba7 Mon Sep 17 00:00:00 2001 From: Katharina Przybill <30441792+kathap@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:22:14 +0100 Subject: [PATCH 3/3] add 2. test --- spec/cloud_controller_ng/drain_spec.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spec/cloud_controller_ng/drain_spec.rb b/spec/cloud_controller_ng/drain_spec.rb index 520f5af449..39b07b43fa 100644 --- a/spec/cloud_controller_ng/drain_spec.rb +++ b/spec/cloud_controller_ng/drain_spec.rb @@ -26,6 +26,13 @@ module Test end end + context "when 'local.number_of_workers' is provided" do + it 'renders the provided number of workers' do + rendered_file = template.render({ 'cc' => { 'jobs' => { 'local' => { 'number_of_workers' => 5 } } } }, consumes: {}) + expect(rendered_file).to include('(1..5).each do |i|') + end + end + context 'when nginx timeout is provided' do it 'renders the provided value' do rendered_file = template.render({ 'cc' => { 'nginx_drain_timeout' => 60 } }, consumes: {})