Skip to content

Commit

Permalink
oci runtime: set CPU weight according to task size (#7827)
Browse files Browse the repository at this point in the history
Behind a flag, sets cgroup2 CPU weight so that when the executor
doesn't have spare CPU capacity, it throttles the CPU usage of each OCI
container proportionally to estimated task size. This roughly
guarantees that each action gets at least the amount of CPU specified
in the task size.
  • Loading branch information
bduffany authored Oct 30, 2024
1 parent d73f747 commit 68b433a
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ go_test(
"//enterprise/server/remote_execution/workspace",
"//enterprise/server/util/oci",
"//proto:remote_execution_go_proto",
"//proto:scheduler_go_proto",
"//proto:worker_go_proto",
"//server/interfaces",
"//server/testutil/testenv",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ import (
)

var (
Runtime = flag.String("executor.oci.runtime", "", "OCI runtime")
runtimeRoot = flag.String("executor.oci.runtime_root", "", "Root directory for storage of container state (see <runtime> --help for default)")
pidsLimit = flag.Int64("executor.oci.pids_limit", 2048, "PID limit for OCI runtime. Set to -1 for unlimited PIDs.")
cpuLimit = flag.Int("executor.oci.cpu_limit", 0, "Hard limit for CPU resources, expressed as CPU count. Default (0) is no limit.")
dns = flag.String("executor.oci.dns", "8.8.8.8", "Specifies a custom DNS server for use inside OCI containers. If set to the empty string, mount /etc/resolv.conf from the host.")
netPoolSize = flag.Int("executor.oci.network_pool_size", 0, "Limit on the number of networks to be reused between containers. Setting to 0 disables pooling. Setting to -1 uses the recommended default.")
Runtime = flag.String("executor.oci.runtime", "", "OCI runtime")
runtimeRoot = flag.String("executor.oci.runtime_root", "", "Root directory for storage of container state (see <runtime> --help for default)")
pidsLimit = flag.Int64("executor.oci.pids_limit", 2048, "PID limit for OCI runtime. Set to -1 for unlimited PIDs.")
cpuLimit = flag.Int("executor.oci.cpu_limit", 0, "Hard limit for CPU resources, expressed as CPU count. Default (0) is no limit.")
cpuSharesEnabled = flag.Bool("executor.oci.cpu_shares_enabled", false, "Enable CPU weighting based on task size.")
dns = flag.String("executor.oci.dns", "8.8.8.8", "Specifies a custom DNS server for use inside OCI containers. If set to the empty string, mount /etc/resolv.conf from the host.")
netPoolSize = flag.Int("executor.oci.network_pool_size", 0, "Limit on the number of networks to be reused between containers. Setting to 0 disables pooling. Setting to -1 uses the recommended default.")
)

const (
Expand All @@ -69,6 +70,13 @@ const (

// Maximum length of overlayfs mount options string.
maxMntOptsLength = 4095

// The OCI spec only supports "shares" as units, and these are transformed
// to cgroup2 weights internally by crun using a simple linear mapping.
// These are the min/max values for "shares". See
// https://github.com/containers/crun/blob/main/crun.1.md#cpu-controller
cpuSharesMin = 2
cpuSharesMax = 262_144
)

//go:embed seccomp.json
Expand Down Expand Up @@ -194,6 +202,8 @@ func (p *provider) New(ctx context.Context, args *container.Init) (container.Com
networkEnabled: args.Props.DockerNetwork != "off",
user: args.Props.DockerUser,
forceRoot: args.Props.DockerForceRoot,

milliCPU: args.Task.GetSchedulingMetadata().GetTaskSize().GetEstimatedMilliCpu(),
}, nil
}

Expand All @@ -218,6 +228,8 @@ type ociContainer struct {
networkEnabled bool
user string
forceRoot bool

milliCPU int64 // milliCPU allocation from task size
}

// Returns the OCI bundle directory for the container.
Expand Down Expand Up @@ -695,10 +707,20 @@ func (c *ociContainer) createSpec(ctx context.Context, cmd *repb.Command) (*spec
cpuSpecs := &specs.LinuxCPU{}
if *cpuLimit != 0 {
period := 100 * time.Millisecond
cpuSpecs = &specs.LinuxCPU{
Quota: pointer(int64(*cpuLimit) * period.Microseconds()),
Period: pointer(uint64(period.Microseconds())),
}
cpuSpecs.Quota = pointer(int64(*cpuLimit) * period.Microseconds())
cpuSpecs.Period = pointer(uint64(period.Microseconds()))
}

if *cpuSharesEnabled {
// CPU shares are in the range [2, 262144] so milliCPU is an
// appropriate value here. Note: for cgroup2, crun internally maps these
// "share" units to CPU weight units, so if you look at the cpu.weight
// file, the value will be different than what is set here. See
// https://github.com/containers/crun/blob/main/crun.1.md#cpu-controller
cpuShares := c.milliCPU
cpuShares = min(cpuShares, cpuSharesMax)
cpuShares = max(cpuShares, cpuSharesMin)
cpuSpecs.Shares = pointer(uint64(cpuShares))
}

spec := specs.Spec{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/stretchr/testify/require"

repb "github.com/buildbuddy-io/buildbuddy/proto/remote_execution"
scpb "github.com/buildbuddy-io/buildbuddy/proto/scheduler"
wkpb "github.com/buildbuddy-io/buildbuddy/proto/worker"
containerregistry "github.com/google/go-containerregistry/pkg/v1"
)
Expand Down Expand Up @@ -1097,6 +1098,50 @@ func TestFileOwnership(t *testing.T) {
assert.Empty(t, string(res.Stderr))
}

func TestCPUShares(t *testing.T) {
setupNetworking(t)
image := manuallyProvisionedBusyboxImage(t)
ctx := context.Background()
env := testenv.GetTestEnv(t)
runtimeRoot := testfs.MakeTempDir(t)
flags.Set(t, "executor.oci.runtime_root", runtimeRoot)
// Enable CPU shares
flags.Set(t, "executor.oci.cpu_shares_enabled", true)
buildRoot := testfs.MakeTempDir(t)
cacheRoot := testfs.MakeTempDir(t)
provider, err := ociruntime.NewProvider(env, buildRoot, cacheRoot)
require.NoError(t, err)
wd := testfs.MakeDirAll(t, buildRoot, "work")
// Run a task requesting 2.5 CPU cores
c, err := provider.New(ctx, &container.Init{
Task: &repb.ScheduledTask{
SchedulingMetadata: &scpb.SchedulingMetadata{
TaskSize: &scpb.TaskSize{
EstimatedMilliCpu: 2_500,
},
},
},
Props: &platform.Properties{ContainerImage: image},
})
require.NoError(t, err)
t.Cleanup(func() {
err := c.Remove(ctx)
require.NoError(t, err)
})

// Read and assert on cgroup files
cmd := &repb.Command{Arguments: []string{"sh", "-c", `
cat /sys/fs/cgroup/cpu.weight
`}}
res := c.Run(ctx, cmd, wd, oci.Credentials{})

require.NoError(t, res.Error)
expectedCPUWeight := fmt.Sprintf("%d\n", ociCPUSharesToCgroup2Weight(2500))
assert.Equal(t, expectedCPUWeight, string(res.Stdout))
assert.Empty(t, string(res.Stderr))
assert.Equal(t, 0, res.ExitCode)
}

func TestPersistentWorker(t *testing.T) {
setupNetworking(t)

Expand Down Expand Up @@ -1297,3 +1342,8 @@ func hasMountPermissions(t *testing.T) bool {
require.NoError(t, err, "unmount")
return true
}

func ociCPUSharesToCgroup2Weight(shares int64) int64 {
// See https://github.com/containers/crun/blob/main/crun.1.md#cpu-controller
return (1 + ((shares-2)*9999)/262142)
}

0 comments on commit 68b433a

Please sign in to comment.