diff --git a/cronstatus.sh b/cronstatus.sh new file mode 100644 index 0000000..5647b07 --- /dev/null +++ b/cronstatus.sh @@ -0,0 +1,108 @@ +#!/bin/bash + + +LOGDIR=/var/tmp/cronlogs +outfile=/tmp/cronjob_status.$$.tmp +outfile=/tmp/cronjob_status.tmp + +typeset -i iMaxAge=`date +%s` +typeset -i iErrJobs=0 + + +# ---------------------------------------------------------------------- +# FUNCTIONS +# ---------------------------------------------------------------------- + +# get a value from logfile (everything behind "=" +# param: label +# global: $logfile +function getLogValue(){ + grep "^$1=" $logfile | cut -f 2- -d "=" +} + + + +# ---------------------------------------------------------------------- +# MAIN +# ---------------------------------------------------------------------- + +ls -1t $LOGDIR/*log | fgrep -v "/__" | while read logfile +do + typeset -i iErr=0 + + server=`basename $logfile | cut -f 1 -d "_"` + jobname=`basename $logfile | cut -f 2 -d "_" | sed "s#\.log##"` + + + sPre=" " + sCmd=`getLogValue SCRIPTNAME` + sLastStart=`getLogValue SCRIPTSTARTTIME` + typeset -i iJobExpire=`getLogValue JOBEXPIRE` + typeset -i rc=`getLogValue 'SCRIPTRC' | head -1` + typeset -i iEcectime=`getLogValue 'SCRIPTEXECTIME' | head -1 | cut -f 1 -d " "` + sTTL=`getLogValue 'SCRIPTTTL'` + + # ----- check return code + statusRc='OK' + if [ $rc -ne 0 ]; then + iErr=$iErr+1 + statusRc='ERROR' + fi + + # ----- check ttl value + typeset -i iTTL=$sTTL + typeset -i iTTLsec=0 + iTTL=$iTTL + iTTLsec=$iTTL*60 + ttlstatus="OK" + if [ -z $sTTL ]; then + iErr=$iErr+1 + statusTtl="ERROR: ttl value is empty" + else + # human readable ttl in min/ hours/ days + statusTtl="$iTTL min" + if [ $iTTL -gt 60 ]; then + iTTL=$iTTL/60; + statusTtl="$sTTL - $iTTL h" + if [ $iTTL -gt 24 ]; then + iTTL=$iTTL/24; + statusTtl="$sTTL - $iTTL d" + fi + fi + if [ $iTTLsec -lt $iEcectime ]; then + iErr=$iErr+1 + statusTtl="ERROR: $iTTL min = $iTTLsec s - is too low; exec time is $iEcectime s - set a higher TTL for this cronjob" + iErr=$iErr+1 + else + statusTtl="$statusTtl OK" + fi + fi + # ----- check expire + statusExpire="`date -d @$iJobExpire '+%Y-%m-%d %H:%M:%S'`" + if [ $iJobExpire -lt $iMaxAge ]; then + statusExpire="${statusExpire} ERROR" + iErr=$iErr+1 + else + statusExpire="${statusExpire} OK" + fi + + # ----- OUTPUT + echo + echo --- $logfile + + echo "${sPre}${sCmd}" + echo "${sPre}last start: ${sLastStart}" + echo "${sPre}returncode: ${rc} ${statusRc}" + echo "${sPre}duration: ${iEcectime} s" + echo "${sPre}ttl: ${statusTtl}" + echo "${sPre}expires: ${iJobExpire} ${statusExpire}" + + if [ $iErr -gt 0 ]; then + echo "${sPre}CHECK FAILED" + iErrJobs=$iErrJobs+1 + fi + +done + +echo +echo TOTALSTATUS: $iErrJobs cronjobs have an error diff --git a/cronwrapper.sh b/cronwrapper.sh new file mode 100644 index 0000000..da94341 --- /dev/null +++ b/cronwrapper.sh @@ -0,0 +1,214 @@ +#!/bin/bash +# ------------------------------------------------------------ +# +# CRONWRAPPER +# +# ------------------------------------------------------------ +# Was ist das hier? +# Es wird ein beliebiges Skript aufgerufen. Anhand des +# Die gesamte Ausgabe erfolgt in einer vorgegebenen Syntax, +# was das Parsen der Ausgabe vereinfacht +# +# Fuer MPC: +# 1) die gesamte Ausgabe wird aut. in ein Logfile geschrieben +# (s. $OUTFILE). +# 2) Cron soll immer nur auf einem Server laufen - ttl eingefuegt +# Es wird ein Lockfile mit expire-Zeit geschrieben +# +# Aufruf: +# {Skriptname} [ttl] [aufzurufendes Skript] [Bezeichner] +# ttl: aufruf-Rhytmus dieses Skripts im Cron - in Minuten +# Skript: Skript mit komplettem Pfad +# Bezeichner: optional +# +# ------------------------------------------------------------ +# 2002-02-06 ahahn V1.0 +# 2002-07-15 Stderr wird auch ins Logfile geschrieben +# 2002-09-17 ahahn Email wird versendet, wenn Skript nicht +# ausführbar ist. +# 2003-04-05 ahahn show output of executed script +# 2004-03-26 ahahn added output with labels 2 grab infos from output +# 2006-01-01 ahahn disabled email +# 2009-05-01 ahahn MPC: keinerlei Ausgabe auf stdout- Ausgabe nur im Log +# 2009-05-04 ahahn Test auf execute Rechte deaktiviert +# 2009-05-13 ahahn Check: Cron darf nur einmalig auf einem Server laufen +# Dies erfordert Umstellung der Parameter-Struktur +# 2009-05-14 ahahn sleep eingebaut mit Hilfe what_am_i +# 2009-05-18 ahahn mehr Infos zu Locking und ausfuehrendem Server im Output +# 2010-10-19 ahahn add JOBEXPIRE to output (to detect outdated cronjobs) +# 2012-04-03 ahahn Sourcen von $0.cfg fuer eigene Variablenwerte +# 2012-04-04 ahahn aktiver Job verwendet separates Logfile +# 2012-04-05 ahahn TTL mit in der Ausgabe +# 2012-04-13 ahahn joblog hinzugefuegt +# 2013-05-15 axel.hahn@iml.unibe.ch FIRST IML VERSION +# 2013-07-xx axel.hahn@iml.unibe.ch TTL ist max 1h TTL-Parameter-Wert +# 2013-08-07 axel.hahn@iml.unibe.ch Strip html in der Ausgabe +# 2017-10-13 axel.hahn@iml.unibe.ch use eval to execute multiple commands +# ------------------------------------------------------------ + +# helper function - writes everything to file +function w() { + echo $* >>$OUTFILE +} + +# ------------------------------------------------------------ +# CONFIG +# ------------------------------------------------------------ +# allg. Konfiguration laden +# . `dirname $0`/config_allgemein.sh +line1="--------------------------------------------------------------------------------" + +typeset -i TTL=$1 +CALLSCRIPT=$2 +LABELSTR=$3 +LOGFILE=/tmp/call_any_script_$$.log + +if [ "${LABELSTR}" = "" ]; then + LABELSTR=`basename "${CALLSCRIPT}" | cut -f 1 -d " " ` +fi +# Label darf keine Unterstriche enthalten +LABELSTR=`echo ${LABELSTR} | sed "s#_#-#g"` +TOUCHPART="_flag-${LABELSTR}_expire_" + +LOGDIR="/var/tmp/cronlogs" +# WHATAMI=/data/srdrs/admin/bin/what_am_i +JOBBLOGBASE=`hostname`_joblog_ + +# . $0.cfg + +FINALOUTFILE="$LOGDIR/`hostname`_${LABELSTR}.log" +JOBLOG="$LOGDIR/${JOBBLOGBASE}`date +%a`.done" +# OUTFILE="$LOGDIR/`hostname`_${LABELSTR}.log" +OUTFILE="$FINALOUTFILE.running" +typeset -i iStart=`date +%s` + + +# ------------------------------------------------------------ +# WRITE HEADER +# ------------------------------------------------------------ +mkdir $LOGDIR 2>/dev/null +chmod 777 $LOGDIR 2>/dev/null +rm -f $OUTFILE 2>/dev/null +touch $OUTFILE +w REM $line1 +w REM CRON WRAPPER - `hostname` +# w REM `$WHATAMI` +w REM $line1 + +w "SCRIPTNAME=${CALLSCRIPT}" +w "SCRIPTTTL=${TTL}" +w "SCRIPTSTARTTIME=`date \"+%Y-%m-%d %H:%M:%S\"`, $iStart" +w "SCRIPTLABEL=${LABELSTR}" + +if [ -z "${CALLSCRIPT}" ]; then + w REM STOP: no script was found. check syntax for `basename $0` + exit 1 +fi +# ------------------------------------------------------------ +# entspr. Nummer im Service warten; +# z.B. author-01 wartet 0 sec; author-02 wartet 1 sec +# ------------------------------------------------------------ +# typeset -i sleep=`$WHATAMI | head -1 | sed "s#[a-zA-Z :]##g" | sed "s#--##g" | cut -f 2 -d "-"`-1 +# if [ $sleep -lt 0 ]; then +# sleep=0 +# fi +# +# w REM sleep $sleep sec +# sleep $sleep + + +# ------------------------------------------------------------ +# CHECK: runs this job on another machine? +# ------------------------------------------------------------ +w REM $line1 +# w REM check: runs this job on another machine? +typeset -i iExpire=`date +%s` +typeset -i iExpDelta=$TTL*3/2 +if [ $iExpDelta -gt 60 ]; then + iExpDelta=60 +fi + +# let iExpire=$iExpire+$TTL*60*3/2 +let iExpire=$iExpire+$TTL*60+$iExpDelta*60 +if [ $TTL -eq 0 ]; then + iExpire=0 +fi + +lastfile=${LOGDIR}/*${TOUCHPART}* +ls $lastfile>/dev/null 2>&1 +if [ $? -eq 0 ]; then + TOUCHFILE=`basename $lastfile` + typeset -i expdate=`echo $TOUCHFILE| cut -f 4 -d "_"` 2>/dev/null + runserver=`echo $TOUCHFILE| cut -f 5 -d "_"` + + w REM INFO: expires $expdate - `date -d @$expdate` + typeset -i timeleft=$expdate-$iStart + w REM INFO: job is locked for other servers for $timeleft more seconds + hostname | fgrep $runserver >/dev/null + if [ $? -ne 0 ]; then + w REM INFO: it locked up to $expdate by $runserver + if [ $timeleft -gt 0 ]; then + w REM STOP: job is locked. + mv $OUTFILE ${FINALOUTFILE} + exit 2 + else + w REM INFO: OK, job is expired + fi + else + w REM INFO: job was executed on the same machine and can be executed here again. + fi +else + w REM OK, executing job the first time +fi + +# -- delete all touchfiles of this job +rm -f ${LOGDIR}/*${TOUCHPART}* 2>/dev/null + +# -- create touchfile for this server +touch "${LOGDIR}/${TOUCHPART}${iExpire}_`hostname`" +w JOBEXPIRE=${iExpire} +# w REM INFO: created touchfile ${TOUCHPART}${iExpire}_`hostname` +w REM $line1 + +# ------------------------------------------------------------ +# MAIN +# ------------------------------------------------------------ +rc=none +RETSTATUS="OK" +eval ${CALLSCRIPT} >"${LOGFILE}" 2>&1 +rc=$? +if [ $rc -ne 0 ]; then + RETSTATUS="WARNING !!!" +fi + + +typeset -i iEnd=`date +%s` +w "SCRIPTENDTIME=`date \"+%Y-%m-%d %H:%M:%S\"`, $iEnd" +let iExectime=$iEnd-$iStart +w SCRIPTEXECTIME=$iExectime s + +w SCRIPTRC=$rc + +# w "sending email..." +# cat "${LOGFILE}" | mail -s"${EMAIL_SUBJECT} - ${LABELSTR} - $RETSTATUS" "${EMAIL_TO}" +# w " rc=$?" +w "REM $line1" + +cat "${LOGFILE}" | sed -e 's/<[^>]*>//g' | sed "s#^#SCRIPTOUT=#g" >>$OUTFILE +w "REM $line1" + +# write a log for execution of a cronjob +echo "job=${LABELSTR}:host=`hostname`:start=$iStart:end=$iEnd:exectime=$iExectime:ttl=${TTL}:rc=$rc" >>$JOBLOG +chmod 777 $JOBLOG 2>/dev/null +find $LOGDIR -name "${JOBBLOGBASE}*" -type f -mtime +4 -exec rm -f {} \; + +# ------------------------------------------------------------ +# CLEANUP UND ENDE +# ------------------------------------------------------------ +rm -f "${LOGFILE}" +w "REM $0 finished at `date`" +mv $OUTFILE ${FINALOUTFILE} + +# ------------------------------------------------------------ +# EOF +# ------------------------------------------------------------ diff --git a/inc_cronfunctions.sh b/inc_cronfunctions.sh new file mode 100644 index 0000000..c524e85 --- /dev/null +++ b/inc_cronfunctions.sh @@ -0,0 +1,49 @@ +# Handling der Returncodes +typeset -i rc=0 +typeset -i rcAll=0 + +function fetchRc(){ + rc=$? + echo "rc=$rc" + rcAll=$rcAll+$rc +} + +# ein Kommando ausfuehren und returncode ausgeben und auf rcAll aufsummieren +function exec2() { + + set -vx + $* + rc=$? + set +vx + rcAll=$rcAll+$rc +} + + +# vom Remoteserver eine Liste von Verzeichnissen holen +# Params: Server Zielverzeichnis lokal Liste der Verzeichnisse (remote) +function getRemoteFiles(){ + + srvSource=$1 + targetDir=$2 + shift 2 + + dirlist=$* + + echo --- ${srvSource} - to $targetDir + mkdir -p ${targetDir} + + for mydir in $dirlist + do + echo -n "${mydir} " + rsync -a ${srvSource}:${mydir} ${targetDir} + fetchRc + done + echo + +} + +function quit(){ + echo + echo beende mit Returncode $rcAll + exit $rcAll +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..f9089bf --- /dev/null +++ b/readme.md @@ -0,0 +1,71 @@ +# Axels Cronwrapper + +Unix shell scripts to make it easier handling the status of cronjobs. + +GNU GPL 3.0 + + + +## Introduction + +Default Unix and linux cronjobs are quite basic stuff. Mostly you create +"simple, stupid" jobs without output ... that just run. Or should. + +If you use cronjob you need to hide the output otherwise the root user gets an +email. So if you generate the output and have many cronjobs then you need a +convention how to name your log files. + +How do you check if a job was successful? Watching each log? + +How do you detect if the last job was successful but does not run anymore? + +## Cronwrapper + +### Installation + +Copy the 3 shellscript files somewhere. I suggest /usr/local/bin/. +If you use ansible, puppet, ... use a file comand to put it to all +your systems into the same directory. + +### Usage + +As an example ... if you have a cronjob like this: + +```bash +12 3 * * * root /usr/local/bin/my-database-dumper.sh +``` + +Using my wrappper +* you add the wrapper in front +* add a TTL (in minutes) as first param +* add the command as third param - if you use arguments, then you need to quote it +* optional: add a label for the output file (it overrides the default naming convention of the log) + +The cronjob above needs to be rewritten like that: +```bash +12 3 * * * root /usr/local/bin/cronwrapper.sh 1440 /usr/local/bin/my-database-dumper.sh +``` + +### Advantages + +Just using a wrapper breaks tons of limits! + +* The wrapper fetches any output and creates a log file with the name of the started script + (remark: you can override the naming with the 3rd parameter). + Do not try to keep silent anymore: write as many output as you want, write the output that you can understand the execution! +* The wrapper logs by itself a few things: + * the started command + * starting time + * ending time + * ... and having these: the execution time + * the exitcode of the command/ script; + This means: be strinct like all commands do! Write your cronjob script that + ends with exitcode 0 if it is successful and quit with non-zero if any + error occurs + * all metadata and the output will be written in a log file with parsable + syntax! Just using grep and cut you could verify all your jobs. But there is + an additional check script too. + * The TTL value (parameter 2) generates a file with a timestamp. The check + script detects with it if a cronjob log is outdated + + \ No newline at end of file