Skip to content

Commit

Permalink
Merge pull request #145 from harness/lifecycleTemplate
Browse files Browse the repository at this point in the history
fix: [PIPE-20874]: Adjust JFR shutdown and add a default shutdown hook
  • Loading branch information
jasonmcintosh authored Dec 11, 2024
2 parents 3ce93e7 + 8a095d7 commit 260d465
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 56 deletions.
2 changes: 1 addition & 1 deletion src/common/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: library
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.3.72
version: 1.3.73

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
80 changes: 80 additions & 0 deletions src/common/templates/_deployment-lifecycle.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{{/*
USAGE:
{{- include "harnesscommon.v1.renderLifecycleHooks" (dict "ctx" $) }}
A JFR hook can be automatically added if JFR is enabled
*/}}
{{- define "harnesscommon.v1.renderLifecycleHooks" }}
{{- $ := .ctx }}
{{- if hasKey $.Values "lifecycleHooks" }}
{{ include "harnesscommon.tplvalues.render" (dict "value" $.Values.lifecycleHooks "context" $) }}
{{- else if $.Values.global.jfr.enabled }}
postStart:
exec:
command:
- /bin/sh
- -c
- |
mkdir -p ${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME};
ln -s ${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME} ${JFR_DUMP_ROOT_LOCATION}/POD_NAME ;
preStop:
exec:
command:
- /bin/sh
- -c
- |
## The pod is out of service at this point in Endpoints (nominally). Allow time before we start signally process
## stops and termination via the shutdown file.
sleep 30;
ts=$(date '+%s');
loc=${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/$ts/${POD_NAME};
mkdir -p $loc; sleep 1; echo $ts > $loc/restart;
echo $(date '+%s') > $loc/begin;
PID=$(jps|grep -vi jps|awk '{ print $1}');

#Copy GC log file
cp mygclogfilename.gc $loc/;

#Retry 10 times to take thread dump. Unsuccessful attempt has just 1 line with java process id in the output.
for ((n=0;n<10;n++)); do
jcmd $PID Thread.print -e > $loc/thread-dump-attempt-$n.txt;
if [ $(wc -l < $loc/thread-dump-attempt-$n.txt) -gt 1 ]; then break; fi;
done

#10 retries to take heap histogram. Unsuccessful attempt has just 1 line with java process id in the output.
for ((n=0;n<10;n++)); do
jcmd $PID GC.class_histogram -all > $loc/heap-histogram-attempt-$n.txt;
if [ $(wc -l < $loc/heap-histogram-attempt-$n.txt) -gt 1 ]; then break; fi;
done

## Dump native memory for analysis
jcmd $PID VM.native_memory > $loc/native-memory-dump.txt;

#Dump latest chunk of JFR recording
jcmd $PID JFR.dump name=jfrRecording filename=${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME}/container_termination_$(date +%Y_%m_%d_%H_%M_%S).jfr > $loc/jfr_done.txt

echo $(date '+%s') > $loc/end

## Once JFR is caught, drop maintenace file & sleep final for grace period & complete termination
touch shutdown;
sleep 30;
## Send SIGTERM which starts JVM Shutdown hooks & upon completion will exit
kill -15 $PID;
{{- else }}
{{/*
This is the default lifecycle hook that should be applied to services. It adds shutdown delays to allow connection
draining via maintenance checks on the shutdown file.
*/}}
preStop:
exec:
command:
- /bin/sh
- -c
- |
## The pod is out of service at this point in Endpoints (nominally). Allow time before we stop processing async
## and trigger termination or processes via the shutdown file.
sleep 30;
touch shutdown;
sleep 30;
{{- end }}
{{- end }}

62 changes: 7 additions & 55 deletions src/common/templates/_jfr-helper.tpl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
{{/*
To note this template also depends on a lifecycle hook being set. See _deployment-lifecycle.tpl for the default hook
which SHOULD always be available. These templates enable the volumes and ENV variables that the hook would read.
Since there's ONLY a single hook allowed, and a set of "sane" defaults on termination are needed, the JFR
code is actually mastered by default there
*/}}
{{/*
USAGE:
{{- include "harnesscommon.jfr.v1.renderEnvironmentVars" (dict "ctx" $) }}
Expand All @@ -19,61 +26,6 @@ USAGE:
{{- end }}
{{- end }}
{{/*
USAGE:
{{- include "harnesscommon.v1.renderLifecycleHooks" (dict "ctx" $) }}
*/}}
{{- define "harnesscommon.v1.renderLifecycleHooks" }}
{{- $ := .ctx }}
{{- if $.Values.lifecycleHooks }}
{{ include "harnesscommon.tplvalues.render" (dict "value" $.Values.lifecycleHooks "context" $) }}
{{- else if $.Values.global.jfr.enabled }}
postStart:
exec:
command:
- /bin/sh
- -c
- |
mkdir -p ${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME};
ln -s ${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME} ${JFR_DUMP_ROOT_LOCATION}/POD_NAME ;
preStop:
exec:
command:
- /bin/sh
- -c
- |
touch shutdown;
sleep 20;
ts=$(date '+%s');
loc=${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/$ts/${POD_NAME};
mkdir -p $loc; sleep 1; echo $ts > $loc/restart;
echo $(date '+%s') > $loc/begin;
PID=$(jps|grep -vi jps|awk '{ print $1}');
#Copy GC log file
cp mygclogfilename.gc $loc/;

#Retry 10 times to take thread dump. Unsuccessful attempt has just 1 line with java process id in the output.
for ((n=0;n<10;n++)); do
jcmd $PID Thread.print -e > $loc/thread-dump-attempt-$n.txt;
if [ $(wc -l < $loc/thread-dump-attempt-$n.txt) -gt 1 ]; then break; fi;
done

#10 retries to take heap histogram. Unsuccessful attempt has just 1 line with java process id in the output.
for ((n=0;n<10;n++)); do
jcmd $PID GC.class_histogram -all > $loc/heap-histogram-attempt-$n.txt;
if [ $(wc -l < $loc/heap-histogram-attempt-$n.txt) -gt 1 ]; then break; fi;
done

jcmd $PID VM.native_memory > $loc/native-memory-dump.txt;

#Dump latest chunk of JFR recording
jcmd $PID JFR.dump name=jfrRecording filename=${JFR_DUMP_ROOT_LOCATION}/dumps/${SERVICE_NAME}/${ENV_TYPE}/jfr_dumps/${POD_NAME}/container_termination_$(date +%Y_%m_%d_%H_%M_%S).jfr > $loc/jfr_done.txt

echo $(date '+%s') > $loc/end
kill -15 $PID;
{{- end }}
{{- end }}

{{/*
USAGE:
{{- include "harnesscommon.jfr.v1.volumes" (dict "ctx" $) }}
Expand Down

0 comments on commit 260d465

Please sign in to comment.