Merge pull request #321 from UtkarshBhatthere/feature/prohibitScaledown #309
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Tests | |
on: | |
- push | |
- pull_request | |
jobs: | |
build-microceph: | |
name: Build microceph snap | |
runs-on: ubuntu-22.04 | |
env: | |
SNAPCRAFT_BUILD_ENVIRONMENT: "lxd" | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Clear FORWARD firewall rules | |
run: ~/actionutils.sh cleaript | |
- name: Install dependencies | |
run: | | |
~/actionutils.sh setup_lxd | |
sudo snap install snapcraft --classic | |
snap list | |
- name: Build snaps | |
run: snapcraft | |
- name: Upload snap artifact | |
if: always() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: snaps | |
path: "*.snap" | |
retention-days: 5 | |
unit-tests: | |
name: Run Unit tests | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Install dependencies | |
run: | | |
sudo add-apt-repository ppa:dqlite/dev -y --no-update | |
sudo apt-get update | |
sudo apt-get install -y libdqlite-dev | |
- name: Install Go (1.21) | |
uses: actions/setup-go@v5 | |
with: | |
go-version: 1.21.x | |
- name: Run unit-tests | |
run: | | |
cd microceph | |
make check-unit | |
single-system-tests: | |
name: Single node with encryption | |
runs-on: ubuntu-22.04 | |
needs: build-microceph | |
steps: | |
- name: Download snap | |
uses: actions/download-artifact@v3 | |
with: | |
name: snaps | |
path: /home/runner | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Free disk | |
run: ~/actionutils.sh free_runner_disk | |
- name: Install and setup | |
run: | | |
~/actionutils.sh install_microceph | |
set -uex | |
# Verify metadata.yaml | |
meta=/var/snap/microceph/current/conf/metadata.yaml | |
cat $meta | |
grep -q ceph-version $meta | |
# Verify health and auto crush rule | |
sudo microceph.ceph health | grep -q "OSD count 0 < osd_pool_default_size 3" | |
sudo microceph.ceph osd crush rule ls | grep -F microceph_auto_osd | |
- name: Add OSD with failure | |
run: | | |
set -eux | |
loop_file="$(sudo mktemp -p /mnt XXXX.img)" | |
sudo truncate -s 1G "${loop_file}" | |
loop_dev="$(sudo losetup --show -f "${loop_file}")" | |
minor="${loop_dev##/dev/loop}" | |
sudo mknod -m 0660 "/dev/sdi21" b 7 "${minor}" | |
set +e | |
rc=$(sudo microceph disk add --wipe "/dev/sdi21" --encrypt |& grep -c Failure) | |
if [[ $rc -eq 0 ]] ; then echo "FDE should fail without dmcrypt: $rc"; exit 1; fi | |
# actually creating additional loop devices is not required since this should fail in validation. | |
rc=$(sudo microceph disk add "/dev/sdi21" "/dev/sdi22" --wal-device "/dev/sdi23" |& grep -c "not supported for batch disk addition") | |
if [[ $rc -eq 0 ]] ; then echo "Batch disk addition should fail with wal device: $rc"; exit 1; fi | |
rc=$(sudo microceph disk add "/dev/sdi21" "/dev/sdi22" --db-device "/dev/sdi23" |& grep -c "not supported for batch disk addition") | |
if [[ $rc -eq 0 ]] ; then echo "Batch disk addition should fail with db device: $rc"; exit 1; fi | |
- name: Add OSDs | |
run: ~/actionutils.sh add_encrypted_osds | |
- name: Enable RGW | |
run: ~/actionutils.sh enable_rgw | |
- name: Run system tests | |
run: | | |
set -eux | |
# List disks | |
sudo microceph disk list | |
# Show ceph's status | |
sudo microceph.ceph status | |
# Ceph status expectations for a single node cluster | |
test_single() { | |
local status="$1" | |
( echo "$status" | grep -qF "mon: 1 daemons" ) || { echo fail ; return ; } | |
( echo "$status" | grep -qE "mgr: .*active, " )|| { echo fail ; return ; } | |
( echo "$status" | grep -qF "osd: 3 osds" ) || { echo fail ; return ; } | |
( echo "$status" | grep -qF "rgw: 1 daemon" ) || { echo fail ; return ; } | |
echo ok | |
} | |
# Confirm ceph is healthy and services started | |
res=$( test_single "$( sudo microceph.ceph status )" ) | |
[ $res = "ok" ] || { echo "single node status fails" ; exit 1 ; } | |
# Check health after restart | |
sudo snap stop microceph | |
sudo snap start microceph | |
for i in $(seq 1 16); do | |
status=$( sudo microceph.ceph status ) | |
echo "$status" | |
res=$( test_single "$status" ) | |
if [ $res = "ok" ] ; then | |
echo "Single tests pass" | |
break | |
else | |
echo "Single tests fail, retry $i/16" | |
sleep 15 | |
fi | |
done | |
sleep 1 | |
pgrep ceph-osd || { echo "No ceph-osd process found" ; exit 1; } | |
- name: Exercise RGW | |
run: ~/actionutils.sh testrgw | |
- name: Test Cluster Config | |
run: | | |
set -eux | |
cip=$(ip -4 -j route | jq -r '.[] | select(.dst | contains("default")) | .prefsrc' | tr -d '[:space:]') | |
# pre config set timestamp for service age | |
ts=$(sudo systemctl show --property ActiveEnterTimestampMonotonic snap.microceph.osd.service | cut -d= -f2) | |
# set config | |
sudo microceph cluster config set cluster_network $cip/8 --wait | |
# post config set timestamp for service age | |
ts2=$(sudo systemctl show --property ActiveEnterTimestampMonotonic snap.microceph.osd.service | cut -d= -f2) | |
# Check config output | |
output=$(sudo microceph cluster config get cluster_network | grep -cim1 'cluster_network') | |
if [[ $output -lt 1 ]] ; then echo "config check failed: $output"; exit 1; fi | |
# Check service restarted | |
if [ $ts2 -lt $ts ]; then echo "config check failed: TS1: $ts2 TS2: $ts3"; exit 1; fi | |
# reset config | |
sudo microceph cluster config reset cluster_network --wait | |
# post config reset timestamp for service age | |
ts3=$(sudo systemctl show --property ActiveEnterTimestampMonotonic snap.microceph.osd.service | cut -d= -f2) | |
# Check service restarted | |
if [ $ts3 -lt $ts2 ]; then echo "config check failed: TS2: $ts2 TS3: $ts3"; exit 1; fi | |
- name: Test pool operations | |
run: | | |
set +e | |
sudo microceph.ceph osd pool create mypool | |
sudo microceph pool set-rf --size 1 "" | |
sudo microceph.ceph config get osd.1 osd_pool_default_size | fgrep -x "1" | |
sudo microceph pool set-rf --size 3 mypool | |
sudo microceph.ceph osd pool get mypool size | fgrep -x "size: 3" | |
sudo microceph pool set-rf --size 1 "*" | |
sudo microceph.ceph osd pool get mypool size | fgrep -x "size: 1" | |
- name: Test log operations | |
run: | | |
set +e | |
sudo microceph log set-level warning | |
output=$(sudo microceph log get-level) | |
if [[ "$output" != "3" ]] ; then echo "incorrect log level: $output"; exit 1; fi | |
multi-node-tests: | |
name: Multi node testing | |
runs-on: ubuntu-22.04 | |
needs: build-microceph | |
steps: | |
- name: Download snap | |
uses: actions/download-artifact@v3 | |
with: | |
name: snaps | |
path: /home/runner | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Clear FORWARD firewall rules | |
run: ~/actionutils.sh cleaript | |
- name: Free disk | |
run: ~/actionutils.sh free_runner_disk | |
- name: Install dependencies | |
run: ~/actionutils.sh setup_lxd | |
- name: Create containers with loopback devices | |
run: ~/actionutils.sh create_containers | |
- name: Install local microceph snap | |
run: ~/actionutils.sh install_multinode | |
- name: Bootstrap | |
run: ~/actionutils.sh bootstrap_head custom | |
- name: Setup cluster | |
run: ~/actionutils.sh cluster_nodes custom | |
- name: Verify config | |
run: ~/actionutils.sh test_ceph_conf | |
- name: Add 2 OSDs | |
run: | | |
for c in node-wrk1 node-wrk2 ; do | |
~/actionutils.sh add_osd_to_node $c | |
done | |
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | |
- name: Test failure domain scale up | |
run: | | |
set -uex | |
# We still have failure domain OSD | |
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 1 | |
# Add a 3rd OSD, should switch to host failure domain | |
~/actionutils.sh add_osd_to_node node-wrk0 | |
~/actionutils.sh headexec wait_for_osds 3 | |
sleep 1 | |
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph -s" | |
# Now default to host rule | |
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 2 | |
- name: Test 3 osds present | |
run: | | |
set -uex | |
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in" | |
- name: Test crush rules | |
run: | | |
set -uex | |
lxc exec node-wrk0 -- sh -c "microceph.ceph osd crush rule ls" | grep -F microceph_auto_host | |
lxc exec node-wrk0 -- sh -c "microceph.ceph osd pool ls detail" | grep -F "crush_rule 2" | |
- name: Add another OSD | |
run: | | |
~/actionutils.sh add_osd_to_node node-wrk3 | |
~/actionutils.sh headexec wait_for_osds 4 | |
- name: Remove OSD again | |
run: | | |
set -uex | |
lxc exec node-wrk0 -- sh -c "microceph disk remove 4" | |
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in" | |
- name: Test migrate services | |
run: | | |
set -uex | |
~/actionutils.sh test_migration node-wrk1 node-wrk3 | |
- name: Enable services on wrk1 | |
run: ~/actionutils.sh headexec enable_services node-wrk1 | |
- name: Remove OSD again but don't scaledown crush rule. | |
run: | | |
set -uex | |
lxc exec node-wrk0 -- sh -c "microceph disk remove 3 --prohibit-crush-scaledown --bypass-safety-checks" | |
lxc exec node-wrk0 -- sh -c "microceph.ceph osd crush rule ls" | grep -F microceph_auto_host | |
- name: Re-Add OSD | |
run: | | |
set -uex | |
~/actionutils.sh add_osd_to_node node-wrk0 | |
~/actionutils.sh headexec wait_for_osds 3 | |
- name: Test remove node wrk3 | |
run: | | |
set -uex | |
~/actionutils.sh headexec remove_node node-wrk3 | |
if lxc exec node-wrk0 -- sh -c "microceph status" | grep -q "^- node-wrk3 " ; then | |
echo "Failed: node-wrk3 still present" | |
exit 1 | |
fi | |
output=$(lxc exec node-wrk0 -- sh -c "microceph.ceph -s") | |
(echo $output | fgrep "mon: 3 daemons") || | |
(echo $output | grep "mon: 4 daemons.*out of quorum: node-wrk3") | |
- name: Test client configurations | |
run: ~/actionutils.sh check_client_configs | |
loop-file-tests: | |
name: Test with loopback file OSDs | |
runs-on: ubuntu-22.04 | |
needs: build-microceph | |
steps: | |
- name: Download snap | |
uses: actions/download-artifact@v3 | |
with: | |
name: snaps | |
path: /home/runner | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Clear FORWARD firewall rules | |
run: ~/actionutils.sh cleaript | |
- name: Free disk | |
run: ~/actionutils.sh free_runner_disk | |
- name: Install and setup | |
run: ~/actionutils.sh install_microceph | |
- name: Add loopback file OSDs | |
run: | | |
set -uex | |
sudo microceph disk add loop,1G,4 | |
~/actionutils.sh wait_for_osds 4 | |
sudo microceph.ceph -s | |
- name: Enable RGW | |
run: ~/actionutils.sh enable_rgw | |
- name: Exercise RGW | |
run: ~/actionutils.sh testrgw | |
- name: Remove OSD | |
run: | | |
set -uex | |
sudo microceph disk remove osd.1 | |
~/actionutils.sh wait_for_osds 3 | |
sudo microceph.ceph -s | |
wal-db-tests: | |
name: Test WAL/DB device usage | |
runs-on: ubuntu-22.04 | |
needs: build-microceph | |
steps: | |
- name: Download snap | |
uses: actions/download-artifact@v3 | |
with: | |
name: snaps | |
path: /home/runner | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Clear FORWARD firewall rules | |
run: ~/actionutils.sh cleaript | |
- name: Free disk | |
run: ~/actionutils.sh free_runner_disk | |
- name: Install and setup | |
run: ~/actionutils.sh install_microceph | |
- name: Add loopback file OSDs | |
run: | | |
set -uex | |
sudo microceph disk add loop,1G,3 | |
~/actionutils.sh wait_for_osds 3 | |
sudo microceph.ceph -s | |
- name: Add WAL/DB enabled OSD | |
run: | | |
set -uex | |
~/actionutils.sh create_loop_devices | |
sudo microceph disk list | |
sudo microceph disk add /dev/sdia --wal-device /dev/sdib --db-device /dev/sdic | |
~/actionutils.sh wait_for_osds 4 | |
sudo microceph.ceph -s | |
- name: Check that disk list doesn't contain WAL/DB blockdevs | |
run: | | |
set -uex | |
sudo microceph disk list | |
for d in sdb sdc ; do | |
if sudo microceph disk list | grep -q /dev/$d ; then | |
echo "Error: found WAL/DB device: /dev/$d" | |
exit 1 | |
fi | |
done | |
- name: Enable RGW | |
run: ~/actionutils.sh enable_rgw | |
- name: Exercise RGW | |
run: ~/actionutils.sh testrgw | |
upgrade-quincy-tests: | |
name: Test quincy upgrades | |
runs-on: ubuntu-22.04 | |
needs: build-microceph | |
steps: | |
- name: Download snap | |
uses: actions/download-artifact@v3 | |
with: | |
name: snaps | |
path: /home/runner | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Copy utils | |
run: cp tests/scripts/actionutils.sh $HOME | |
- name: Clear FORWARD firewall rules | |
run: ~/actionutils.sh cleaript | |
- name: Free disk | |
run: ~/actionutils.sh free_runner_disk | |
- name: Install dependencies | |
run: ~/actionutils.sh setup_lxd | |
- name: Create containers with loopback devices | |
run: ~/actionutils.sh create_containers | |
- name: Install quincy stable from store | |
run: ~/actionutils.sh install_store quincy/stable | |
- name: Bootstrap | |
run: ~/actionutils.sh bootstrap_head | |
- name: Setup cluster | |
run: ~/actionutils.sh cluster_nodes | |
- name: Add 3 OSDs | |
run: | | |
for c in node-wrk0 node-wrk1 node-wrk2 ; do | |
~/actionutils.sh add_osd_to_node $c | |
done | |
~/actionutils.sh headexec wait_for_osds 3 | |
- name: Enable RGW | |
run: ~/actionutils.sh headexec enable_rgw | |
- name: Exercise RGW | |
run: ~/actionutils.sh headexec testrgw | |
- name: Install local build | |
run: ~/actionutils.sh install_multinode | |
- name: Wait until 3 OSDs are up | |
run: ~/actionutils.sh headexec wait_for_osds 3 | |
- name: Exercise RGW again | |
run: ~/actionutils.sh headexec testrgw |