Skip to content

Commit

Permalink
rpm: improve process timing for safety (#764)
Browse files Browse the repository at this point in the history
Points

* FROM-package just leaves tmp files if it supports the features.
* TO-package trigger the features if there are those tmp files.
* Thus, can ensure that both FROM and TO support the feature.
* Make installing plugin and restarting the same condition.
* Disable `%systemd_postun_with_restart` completely to align
specifications with DEB.

Before

1. to-pre(2): Collect plugin-list.
2. Install TO-package
3. to-post(2): Install plugin.
4. from-preun(1): Do nothing.
5. Uninstall FROM-package
6. from-postun(1): Restart if need.

After

1. to-pre(2): Collect plugin-list.
2. Install TO-package
3. to-post(2): Do nothing.
4. from-preun(1):
   * Check auto or not.
   * Leave plugin-install flag and pid if need.
5. Uninstall FROM-package
6. from-postun(1): Disable `%systemd_postun_with_restart`.
7. to-posttrans: Install plugin and restart if need.

Signed-off-by: Daijiro Fukuda <[email protected]>
  • Loading branch information
daipom authored Dec 11, 2024
1 parent 262d113 commit 038b770
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 56 deletions.
18 changes: 12 additions & 6 deletions .github/workflows/yum.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ jobs:
- "update-to-next-version-service-status.sh disabled active"
- "update-to-next-version-service-status.sh disabled inactive"
- "update-to-next-version-with-auto-and-manual.sh"
- "update-to-next-major-version.sh auto"
- "update-to-next-major-version.sh manual"
- "update-to-next-major-version.sh etc"
- "update-to-next-major-version.sh auto active"
- "update-to-next-major-version.sh auto inactive"
- "update-to-next-major-version.sh manual active"
- "update-to-next-major-version.sh manual inactive"
- "update-to-next-major-version.sh etc active"
- "update-to-next-major-version.sh etc inactive"
- "update-without-data-lost.sh v5 v6"
- "update-without-data-lost.sh v6 v5"
include:
Expand Down Expand Up @@ -229,9 +232,12 @@ jobs:
- "update-to-next-version-service-status.sh disabled active"
- "update-to-next-version-service-status.sh disabled inactive"
- "update-to-next-version-with-auto-and-manual.sh"
- "update-to-next-major-version.sh auto"
- "update-to-next-major-version.sh manual"
- "update-to-next-major-version.sh etc"
- "update-to-next-major-version.sh auto active"
- "update-to-next-major-version.sh auto inactive"
- "update-to-next-major-version.sh manual active"
- "update-to-next-major-version.sh manual inactive"
- "update-to-next-major-version.sh etc active"
- "update-to-next-major-version.sh etc inactive"
- "update-without-data-lost.sh v5 v6"
- "update-without-data-lost.sh v6 v5"
include:
Expand Down
1 change: 1 addition & 0 deletions fluent-package/apt/systemd-test/downgrade-to-v5-lts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ systemctl status --no-pager fluentd
systemctl status --no-pager td-agent

# Fluentd should be restarted.
# NOTE: Unlike RPM, the restart behavior depends on TO-side. So, it restarts.
test $main_pid -ne $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID)
106 changes: 59 additions & 47 deletions fluent-package/yum/fluent-package.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
%define v4migration_old_rotate_config_saved /tmp/@PACKAGE_DIR@/.old_rotate_config
%define v4migration_enabled_service /tmp/@PACKAGE_DIR@/.v4migration_enabled_service
%define local_base_plugins /tmp/@PACKAGE_DIR@/.local_base_plugins
%define install_plugins /tmp/@PACKAGE_DIR@/.install_plugins
%define pid_for_auto_restart /tmp/@PACKAGE_DIR@/.pid_for_auto_restart

# Omit the brp-python-bytecompile automagic because post hook for ffi fails on AmazonLinux 2.
%if %{_amazon_ver} == 2
Expand Down Expand Up @@ -160,6 +162,14 @@ mkdir -p %{buildroot}%{_sysconfdir}/@PACKAGE_DIR@/plugin
mkdir -p %{buildroot}/tmp/@PACKAGE_DIR@

%pre
# Make sure the previous tmp files for auto restart does not remain.
# Note:
# %preun (FROM-side) can create these files, but they will be removed in %posttrans (TO-side).
# This means that these files may not be removed depending on the version of TO-side.
# In the future, want to figure out a more secure way to manage tmp files...
rm -f %{install_plugins}
rm -f %{pid_for_auto_restart}

if ! getent group @COMPAT_SERVICE_NAME@ >/dev/null; then
if ! getent group @SERVICE_NAME@ >/dev/null; then
/usr/sbin/groupadd --system @SERVICE_NAME@
Expand Down Expand Up @@ -188,16 +198,35 @@ else
fi
fi
if [ $1 -eq 2 ]; then
. %{_sysconfdir}/sysconfig/@SERVICE_NAME@
echo "pre FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART"
if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = auto ]; then
# collect installed gems during upgrading
# Collect plugin-list.
# Note:
# This should be done in %preun(1) of FROM-side, but we have no choice but to do this here.
# %preun(1) of FROM-side is executed after TO-side installs the files and replaces embedded Ruby.
# So, this needs to be done before it.
if [ -e /usr/sbin/fluent-gem ]; then
/usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose > %{local_base_plugins}
fi
fi

%preun
%systemd_preun @[email protected]
if [ $1 -eq 1 ]; then
# systemctl ... --property=MainPID --value is available since systemd 230 or later.
# thus for amazonlinux:2, it can not be used.
pid="$(systemctl show "@SERVICE_NAME@" --property=MainPID | cut -d'=' -f2)"
if [ "$pid" -eq 0 ]; then
echo "Do not use auto restart because the service is not active"
else
. %{_sysconfdir}/sysconfig/@SERVICE_NAME@
echo "FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART"
if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = auto ]; then
# Present that FROM-side wants auto installing plugins and restarting.
# Note: Wants to collect plugin-list here, but we need to do it in %pre (see comments in %pre).
touch %{install_plugins}
echo "$pid" > %{pid_for_auto_restart}
fi
fi
fi

%post
%systemd_post @[email protected]
Expand Down Expand Up @@ -291,51 +320,9 @@ if [ -f "%{_sysconfdir}/prelink.conf" ]; then
%{__sed} -i"" %{_sysconfdir}/prelink.conf -e "/\/opt\/td-agent\/bin\/ruby/d"
fi
fi
if [ $1 -eq 2 ]; then
# install missing plugins during upgrading package
if [ -f %{local_base_plugins} ]; then
local_current_plugins=$(/usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose)
if ! grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins}; then
echo "No missing plugins to install"
else
if ! curl --fail --silent -O https://rubygems.org/specs.4.8.gz; then
echo "No network connectivity..."
else
grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins} | while read missing_gem
do
if ! /usr/sbin/fluent-gem install --no-document $missing_gem; then
echo "Can't install missing plugin automatically: please install $missing_gem manually."
fi
done
fi
fi
rm -f %{local_base_plugins}
fi
fi

%postun
if [ $1 -eq 1 ]; then
# Control service during upgrading
. %{_sysconfdir}/sysconfig/@SERVICE_NAME@
echo "postun FLUENT_PACKAGE_SERVICE_RESTART: $FLUENT_PACKAGE_SERVICE_RESTART"
if [ "$FLUENT_PACKAGE_SERVICE_RESTART" = "auto" ]; then
# systemctl ... --property=MainPID --value is available since systemd 230 or later.
# thus for amazonlinux:2, it can not be used.
pid=$(systemctl show fluentd --property=MainPID | cut -d'=' -f2)
if [ $pid -gt 0 ]; then
echo "Kick auto service upgrade mode to MainPID:$pid"
kill -USR2 $pid
else
# no running fluentd service
echo "Suppress auto service upgrade mode to MainPID:$pid"
fi
elif [ "$FLUENT_PACKAGE_SERVICE_RESTART" = "manual" ]; then
echo "No need to restart service in manual mode..."
else
# no support for upgrading without downtime
%systemd_postun_with_restart @[email protected]
fi
fi
# Disable systemd_postun_with_restart to manage restart on the package side.
if [ $1 -eq 0 ]; then
# Uninstall
# Without this uninstall conditional guard block ($1 -eq 0), symlink
Expand Down Expand Up @@ -400,6 +387,31 @@ if [ -f %{v4migration} ]; then
rm -f %{v4migration_with_restart}
fi
fi
if [ -f %{install_plugins} ] && [ -f %{local_base_plugins} ]; then
local_current_plugins=$(/usr/sbin/fluent-gem list '^fluent-plugin-' --no-versions --no-verbose)
if ! grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins}; then
echo "No missing plugins to install"
else
if ! curl --fail --silent -O https://rubygems.org/specs.4.8.gz; then
echo "No network connectivity..."
else
grep --fixed-strings --line-regexp --invert-match "$local_current_plugins" %{local_base_plugins} | while read missing_gem
do
if ! /usr/sbin/fluent-gem install --no-document $missing_gem; then
echo "Can't install missing plugin automatically: please install $missing_gem manually."
fi
done
fi
fi
fi
rm -f %{install_plugins}
rm -f %{local_base_plugins}
if [ -f %{pid_for_auto_restart} ]; then
pid=$(cat %{pid_for_auto_restart})
echo "Kick auto restart to MainPID:$pid"
kill -USR2 $pid
rm -f %{pid_for_auto_restart}
fi

%files
%doc README.md
Expand Down
25 changes: 23 additions & 2 deletions fluent-package/yum/systemd-test/downgrade-to-v5-lts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ sudo $DNF remove -y fluent-package
sudo $DNF install -y \
/host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-[0-9]*.rpm

# Customize the env file to prevent replacing by downgrade.
# Need this to test the case where some tmp files is left.
echo "FOO=foo" >> /etc/sysconfig/fluentd

sudo systemctl enable --now fluentd
systemctl status --no-pager fluentd
systemctl status --no-pager td-agent
Expand All @@ -42,5 +46,22 @@ systemctl is-enabled fluentd
systemctl status --no-pager fluentd
systemctl status --no-pager td-agent

# Fluentd should be restarted.
test $main_pid -ne $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID)
# Fluentd should NOT be restarted.
# NOTE: Unlike DEB, the restart behavior depends on FROM-side. So, it does not restart.
# (it should restarts only when triggering zerodowntime-restart).
test $main_pid -eq $(eval $(systemctl show fluentd --property=MainPID) && echo $MainPID)

# === Test: Remained tmp files should not affect to next upgrade ===
# (This happens when env file was customized but the FLUENT_PACKAGE_SERVICE_RESTART was still `auto`)

# Some tmp files remains, though it is not happy.
test -e /tmp/fluent/.install_plugins
test -e /tmp/fluent/.pid_for_auto_restart

sudo $DNF install -y \
/host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-[0-9]*.rpm | tee upgrade.log

# zerodowntime-restart should NOT be triggered.
(! grep "Kick auto restart" upgrade.log)

# ======
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ set -exu
. $(dirname $0)/commonvar.sh

service_restart=$1
status_before_update=$2 # active / inactive

# Install the current
package="/host/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/fluent-package-*.rpm"
sudo $DNF install -y $package

if [ "$status_before_update" = active ]; then
sudo systemctl start fluentd
fi

# Set FLUENT_PACKAGE_SERVICE_RESTART
sed -i "s/=auto/=$service_restart/" /etc/sysconfig/fluentd

Expand All @@ -22,7 +27,7 @@ package="/host/v6-test/${distribution}/${DISTRIBUTION_VERSION}/x86_64/Packages/f
sudo $DNF install -y $package

# Test: Check whether plugin/gem were installed during upgrading
if [ "$service_restart" = auto ]; then
if [ "$service_restart" = auto ] && [ "$status_before_update" = active ]; then
# plugin gem should be installed automatically
/opt/fluent/bin/fluent-gem list | grep fluent-plugin-concat
# Non fluent-plugin- prefix gem should not be installed automatically
Expand Down

0 comments on commit 038b770

Please sign in to comment.