Skip to content

Commit

Permalink
Improved logging when plugins are cancelled for taking too long
Browse files Browse the repository at this point in the history
Fixes: freeipa#322

Signed-off-by: Sam Morris <[email protected]>
  • Loading branch information
yrro committed Mar 12, 2024
1 parent 3d85d43 commit 001e8d9
Showing 1 changed file with 40 additions and 10 deletions.
50 changes: 40 additions & 10 deletions src/ipahealthcheck/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,19 @@ def find_plugins(name, registry):


def run_plugin(plugin, available=(), timeout=constants.DEFAULT_TIMEOUT):
plugin_name = F"{plugin.__class__.__module__}:{plugin.__class__.__name__}"
timed_out = []
def signal_handler(signum, frame):
raise TimeoutError('Request timed out')

# Our exception will likely be caught by code called by the plugin,
# which may not log the traceback, or may even catch the exception
# itself! So while we throw the exception in order to interrupt the
# plugin code, we also stash a copy of it so that we can log it after
# the plugin returns.
timed_out.append(TimeoutError(f"Health check {plugin_name} cancelled after after {timeout} sec"))
logger.error("--- %s ---", timed_out[0])
traceback.print_stack()
logger.error("--- The following messages were logged by the plugin after it was cancelled. They may not indicate the reason why the plugin took too long to run. ---")
raise timed_out[0]
# manually calculate duration when we create results of our own
start = datetime.now(tz=timezone.utc)
signal.signal(signal.SIGALRM, signal_handler)
Expand All @@ -58,16 +68,36 @@ def signal_handler(signum, frame):
# Treat no result as success, fudge start time
result = Result(plugin, constants.SUCCESS, start=start)
yield result
except TimeoutError as e:
yield Result(plugin, constants.ERROR, exception=str(e),
start=start)
except Exception as e:
logger.debug('Exception raised: %s', e)
logger.debug(traceback.format_exc())
yield Result(plugin, constants.CRITICAL, exception=str(e),
traceback=traceback.format_exc(),
start=start)
if e is TimeoutError:
# The plugin code _may_ have raised our original TimeoutError
# however we also have to deal with cases where it did not raise
# anything! We do that in the finally block below; we don't
# therefore need to do anything here.
pass
else:
# We've got no way to know whether the plugin's own exception was
# related to our TimeoutError; if it was then we will yield a
# result here based on the plugin's own exception, and _also_ later
# on in the finally block.
logger.log('Exception raised in health check %r: %s', plugin_name, e, level=constants.CRITICAL, exc_info=True)
yield Result(plugin, constants.CRITICAL, exception=str(e),
traceback=traceback.format_exc(),
start=start)
finally:
if timed_out:
logger.error("--- Increasing the timeout in /etc/ipahealthcheck/ipahealthcheck.conf may allow this healthcheck to complete before the deadline. ---")
# If the plugin already handled our TimeoutError properly (i.e,
# logged it and raised its own exception with a sensible error
# message) then there is no need to yield our own error result
# here; however we can't rely on the plugin doing so. It may have
# caught our TimeoutError and logged something misleading; in that
# case let's err on the side of caution and return an additional
# result.
yield Result(plugin, constants.ERROR, exception=str(timed_out[0]),
start=start)
else:
logging.debug("Plugin %s complete after %s sec", plugin_name, datetime.utcnow() - start)
signal.alarm(0)


Expand Down

0 comments on commit 001e8d9

Please sign in to comment.