diff --git a/enterprise/server/remote_execution/container/container.go b/enterprise/server/remote_execution/container/container.go index 2f5c2ff6e40..b26594e3fa6 100644 --- a/enterprise/server/remote_execution/container/container.go +++ b/enterprise/server/remote_execution/container/container.go @@ -229,6 +229,16 @@ func (s *UsageStats) Reset() { s.peakMemoryUsageBytes = 0 } +// TODO: remove after debugging stats issue +func (s *UsageStats) Clone() *UsageStats { + clone := *s + if s.last != nil { + clone.last = s.last.CloneVT() + } + // Baseline PSI protos are readonly; no need to clone. + return &clone +} + // TaskStats returns the usage stats for an executed task. func (s *UsageStats) TaskStats() *repb.UsageStats { if s.last == nil { diff --git a/enterprise/server/remote_execution/containers/podman/podman.go b/enterprise/server/remote_execution/containers/podman/podman.go index 1edeac05715..922cacb289d 100644 --- a/enterprise/server/remote_execution/containers/podman/podman.go +++ b/enterprise/server/remote_execution/containers/podman/podman.go @@ -479,7 +479,32 @@ func (c *podmanCommandContainer) Create(ctx context.Context, workDir string) err return nil } +// As a precursor to https://github.com/buildbuddy-io/buildbuddy/pull/7126, see +// how much CPU usage has been incurred since we resumed the runner. Any usage +// here is being misattributed to the exec_duration, when the usage actually +// happened while downloading inputs and/or refreshing container credentials. +// TODO: remove after debugging CPU sizing issue +func (c *podmanCommandContainer) logCPUSinceUnpause(ctx context.Context) { + s := c.stats.Clone() + s.Reset() + cid, err := c.getCID(ctx) + if err != nil { + return + } + lifetimeStats, err := c.cgroupPaths.Stats(ctx, cid) + if err != nil { + return + } + s.Update(lifetimeStats) + cpuNanos := s.TaskStats().GetCpuNanos() + if cpuNanos > 0 { + log.CtxInfof(ctx, "%d CPU-nanos used between Unpause() and Exec()", cpuNanos) + } +} + func (c *podmanCommandContainer) Exec(ctx context.Context, cmd *repb.Command, stdio *interfaces.Stdio) *interfaces.CommandResult { + c.logCPUSinceUnpause(ctx) + // Reset usage stats since we're running a new task. Note: This throws away // any resource usage between the initial "Create" call and now, but that's // probably fine for our needs right now.