From 3efe4207dd695e0f97e55350e54428ff01312177 Mon Sep 17 00:00:00 2001 From: Milan Lenco Date: Mon, 2 Oct 2023 17:05:00 +0200 Subject: [PATCH] Collect EVE info via console if ssh access is not working Signed-off-by: Milan Lenco --- .github/actions/collect-info/action.yml | 16 +++---- .github/workflows/eden.yml | 13 ++---- pkg/controller/functions.go | 1 + shell-scripts/collect-info-console.sh | 58 +++++++++++++++++++++++++ shell-scripts/collect-info-ssh.sh | 46 ++++++++++++++++++++ 5 files changed, 114 insertions(+), 20 deletions(-) create mode 100755 shell-scripts/collect-info-console.sh create mode 100755 shell-scripts/collect-info-ssh.sh diff --git a/.github/actions/collect-info/action.yml b/.github/actions/collect-info/action.yml index 289525008..a8e6f3839 100644 --- a/.github/actions/collect-info/action.yml +++ b/.github/actions/collect-info/action.yml @@ -1,20 +1,14 @@ name: 'Collect and store debug info' -description: 'Collect debug info using EVE script executed via ssh and store downloaded tarball under the specified file name' +description: 'Collect debug info using EVE script executed via ssh or console and store downloaded tarball under the specified file name' runs: using: 'composite' steps: - name: Collect info run: | - # Give EVE 5 minutes at most to enable ssh access (if tests failed early). - for i in $(seq 60); do ./eden eve ssh && break || sleep 5; done - ./eden sdn fwd eth0 22 --\ - ssh -o StrictHostKeyChecking=no -p FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP collect-info.sh &&\ - ./eden sdn fwd eth0 22 --\ - scp -o StrictHostKeyChecking=no -P FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP:/persist/eve-info-* . &&\ - # upload-artifact complains about colon in the file name - # make sure to update upload step if changing name - mv eve-info-* eve-info.tar.gz ||\ - echo "failed to collect info" + # Do not pollute console logs which are collected by publish-logs action. + cp dist/default-eve.log dist/default-eve.log.backup || true + ./shell-scripts/collect-info-ssh.sh || ./shell-scripts/collect-info-console.sh 120 + cp dist/default-eve.log.backup dist/default-eve.log || true shell: bash working-directory: "./eden" diff --git a/.github/workflows/eden.yml b/.github/workflows/eden.yml index 4b4e83dc1..04defea55 100644 --- a/.github/workflows/eden.yml +++ b/.github/workflows/eden.yml @@ -60,15 +60,10 @@ jobs: - name: Collect info if: ${{ failure() }} run: | - # Give EVE 5 minutes at most to enable ssh access (if tests failed early). - for i in $(seq 60); do ./eden eve ssh && break || sleep 5; done - ./eden sdn fwd eth0 22 --\ - ssh -o StrictHostKeyChecking=no -p FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP collect-info.sh &&\ - ./eden sdn fwd eth0 22 --\ - scp -o StrictHostKeyChecking=no -P FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP:/persist/eve-info-* . &&\ - # upload-artifact complains about colon in the file name - mv eve-info-* eve-info.tar.gz ||\ - echo "failed to collect info" + # Do not pollute console logs which are collected by publish-logs action. + cp dist/default-eve.log dist/default-eve.log.backup || true + ./shell-scripts/collect-info-ssh.sh || ./shell-scripts/collect-info-console.sh 120 + cp dist/default-eve.log.backup dist/default-eve.log || true - name: Collect logs if: ${{ always() }} run: | diff --git a/pkg/controller/functions.go b/pkg/controller/functions.go index 991d5f2ba..d528c0729 100644 --- a/pkg/controller/functions.go +++ b/pkg/controller/functions.go @@ -127,6 +127,7 @@ func (cloud *CloudCtx) OnBoardDev(node *device.Ctx) error { node.SetConfigItem("app.allow.vnc", "true") node.SetConfigItem("newlog.allow.fastupload", "true") node.SetConfigItem("timer.download.retry", "60") + node.SetConfigItem("debug.enable.console", "true") // TODO: allow to enable/disable: //node.SetConfigItem("network.fallback.any.eth", "disabled") log.Debugf("will apply devModel %s", node.GetDevModel()) diff --git a/shell-scripts/collect-info-console.sh b/shell-scripts/collect-info-console.sh new file mode 100755 index 000000000..8e8308d46 --- /dev/null +++ b/shell-scripts/collect-info-console.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +# This script runs collect-info.sh on EVE VM and downloads produced tarball +# using only serial console. This is especially useful when networking +# on the virtualized EVE is not working and therefore collect-info-ssh.sh +# is unable to do the same via SSH tunnel. + +# Use output filename without colon, otherwise Github action "upload-artifact" complains. +OUTPUT="eve-info.tar.gz" + +# 20 seconds should be enough for collect-info.sh to prepare tarball with debug info +# if run locally on a solid machine. However, on Github runners, it can take up to 2 minutes +# to complete (which is what we set from Github actions). +WAIT_TIME="${1:-20}" + +# Switch to debug container where collect-info.sh is installed. +for i in $(seq 3); do + { + echo "eve verbose off"; echo "eve enter debug"; sleep 3; + echo "which collect-info.sh"; sleep 3 + } | telnet localhost 7777 | tee telnet.stdout + grep -q "/usr/bin/collect-info.sh" telnet.stdout && break + sleep 60 +done + +for i in $(seq 3); do + { + echo "rm -f /persist/eve-info*"; echo "/usr/bin/collect-info.sh"; + sleep $((WAIT_TIME+60*(i-1))) + } | telnet localhost 7777 | tee telnet.stdout + TGZNAME="$(sed -n "s/EVE info is collected '\(.*\)'/\1/p" telnet.stdout)" + [ -n "${TGZNAME}" ] && break +done + +if [ -z "${TGZNAME}" ]; then + echo "Failed to run collect-info.sh script" + exit 1 +fi + +for i in $(seq 3); do + { + # Filename does not fit on one console line, we have to use asterisk. + echo "echo \>\>\>\$(base64 -w 0 /persist/eve-info*)\<\<\<"; + # This is fairly quick even on Github runners - around 10 seconds, but depends + # on the tarball size. + sleep $((20+60*(i-1))) + } | telnet localhost 7777 | sed -n "s/>>>\(.*\)<< "${OUTPUT}" + [ -s "${OUTPUT}" ] && break + echo "Failed to receive eve-info tarball, retrying..." +done + +if [ ! -s "${OUTPUT}" ]; then + echo "Failed to receive eve-info" + exit 1 +fi + +FILESIZE="$(stat -c%s "$OUTPUT")" +echo "Received ${OUTPUT} with size ${FILESIZE}" \ No newline at end of file diff --git a/shell-scripts/collect-info-ssh.sh b/shell-scripts/collect-info-ssh.sh new file mode 100755 index 000000000..9e7730235 --- /dev/null +++ b/shell-scripts/collect-info-ssh.sh @@ -0,0 +1,46 @@ +#/bin/sh + +# Use output filename without colon, otherwise Github action "upload-artifact" complains. +OUTPUT="eve-info.tar.gz" + +ssh() { + ./eden sdn fwd eth0 22 --\ + ssh -o StrictHostKeyChecking=no -p FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP "$@" +} + +scp() { + ./eden sdn fwd eth0 22 --\ + scp -o StrictHostKeyChecking=no -P FWD_PORT -i ./dist/default-certs/id_rsa root@FWD_IP:$1 $2 +} + +if ./eden eve status | grep -q "no onboarded EVE"; then + echo "Cannot get eve-info via SSH from non-onboarded EVE VM" + exit 1 +fi + +# Give EVE 5 minutes at most to enable ssh access. +# This delay is typically needed if tests failed early. +for i in $(seq 60); do + ./eden eve ssh : && break || sleep 5 +done + +ssh collect-info.sh | tee ssh.stdout +if [ $? -ne 0 ]; then + echo "Failed to run collect-info.sh script" + exit 1 +fi + +TGZNAME="$(cat ssh.stdout | sed -n "s/EVE info is collected '\(.*\)'/\1/p")" +if [ -z "${TGZNAME}" ]; then + echo "Failed to parse eve-info tarball filename" + exit 1 +fi + +scp "${TGZNAME}" ${OUTPUT} +if [ $? -ne 0 ]; then + echo "Failed to receive eve-info" + exit 1 +fi + +FILESIZE="$(stat -c%s "$OUTPUT")" +echo "Received ${OUTPUT} with size ${FILESIZE}" \ No newline at end of file