Skip to content

Commit

Permalink
Surface slow interrupt warnings in the executor logs
Browse files Browse the repository at this point in the history
  • Loading branch information
bduffany committed Dec 20, 2024
1 parent b684cda commit 0ca1f25
Showing 1 changed file with 13 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ var (
vmIdx int
vmIdxMu sync.Mutex

fatalErrPattern = regexp.MustCompile(`\b` + fatalInitLogPrefix + `(.*)`)
fatalErrPattern = regexp.MustCompile(`\b` + fatalInitLogPrefix + `(.*)`)
slowInterruptWarningPattern = regexp.MustCompile(`hrtimer: interrupt took \d+ ns`)
)

func init() {
Expand Down Expand Up @@ -2081,6 +2082,12 @@ func (c *FirecrackerContainer) Exec(ctx context.Context, cmd *repb.Command, stdi
if err := c.parseSegFault(result); err != nil {
log.CtxWarningf(ctx, "Segfault occurred during task execution (recycled=%v) : %s", c.recycled, err)
}
// Slow hrtimer interrupts can happen during periods of high contention
// and may help explain action failures - surface these in the executor
// logs.
if warning := c.parseSlowInterruptWarning(); warning != "" {
log.CtxWarningf(ctx, "Slow interrupt warning reported in kernel logs: %q", warning)
}
}()

// Emit metrics to track time spent preparing VM to execute a command
Expand Down Expand Up @@ -2585,6 +2592,11 @@ func (c *FirecrackerContainer) parseSegFault(cmdResult *interfaces.CommandResult
return status.UnavailableErrorf("process hit a segfault:\n%s", tail)
}

func (c *FirecrackerContainer) parseSlowInterruptWarning() string {
tail := string(c.vmLog.Tail())
return slowInterruptWarningPattern.FindString(tail)
}

func (c *FirecrackerContainer) observeStageDuration(taskStage string, durationUsec int64) {
metrics.FirecrackerStageDurationUsec.With(prometheus.Labels{
metrics.Stage: taskStage,
Expand Down

0 comments on commit 0ca1f25

Please sign in to comment.