diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 22fe0f9b4c1..4436aaa0cfa 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -286,12 +286,6 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { }, nil } -var IOPrioClassMapping = map[specs.IOPriorityClass]int{ - specs.IOPRIO_CLASS_RT: 1, - specs.IOPRIO_CLASS_BE: 2, - specs.IOPRIO_CLASS_IDLE: 3, -} - type IOPriority = specs.LinuxIOPriority type ( diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 37ece0aebbd..7d17676ed18 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -406,5 +406,13 @@ func ioPriority(config *configs.Config) error { if priority < 0 || priority > 7 { return fmt.Errorf("invalid ioPriority.Priority: %d", priority) } + + switch class := config.IOPriority.Class; class { + case specs.IOPRIO_CLASS_RT, specs.IOPRIO_CLASS_BE, specs.IOPRIO_CLASS_IDLE: + // Valid class, do nothing. + default: + return fmt.Errorf("invalid ioPriority.Class: %q", class) + } + return nil } diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index b0b740a122d..d157feea5bc 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -847,15 +847,21 @@ func TestValidateIOPriority(t *testing.T) { testCases := []struct { isErr bool priority int + class specs.IOPriorityClass }{ - {isErr: false, priority: 0}, - {isErr: false, priority: 7}, - {isErr: true, priority: -1}, + {isErr: false, priority: 0, class: specs.IOPRIO_CLASS_IDLE}, + {isErr: false, priority: 7, class: specs.IOPRIO_CLASS_RT}, + {isErr: false, priority: 3, class: specs.IOPRIO_CLASS_BE}, + // Invalid priority. + {isErr: true, priority: -1, class: specs.IOPRIO_CLASS_BE}, + // Invalid class. + {isErr: true, priority: 3, class: specs.IOPriorityClass("IOPRIO_CLASS_WOW")}, } for _, tc := range testCases { ioPriroty := configs.IOPriority{ Priority: tc.priority, + Class: tc.class, } config := &configs.Config{ Rootfs: "/var", diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index d23153e9b3d..b218a6cb126 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -662,6 +662,9 @@ func setupRlimits(limits []configs.Rlimit, pid int) error { } func setupScheduler(config *configs.Config) error { + if config.Scheduler == nil { + return nil + } attr, err := configs.ToSchedAttr(config.Scheduler) if err != nil { return err @@ -675,6 +678,35 @@ func setupScheduler(config *configs.Config) error { return nil } +func setupIOPriority(config *configs.Config) error { + const ioprioWhoPgrp = 1 + + ioprio := config.IOPriority + if ioprio == nil { + return nil + } + class := 0 + switch ioprio.Class { + case specs.IOPRIO_CLASS_RT: + class = 1 + case specs.IOPRIO_CLASS_BE: + class = 2 + case specs.IOPRIO_CLASS_IDLE: + class = 3 + default: + return fmt.Errorf("invalid io priority class: %s", ioprio.Class) + } + + // Combine class and priority into a single value + // https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17 + iop := (class << 13) | ioprio.Priority + _, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop)) + if errno != 0 { + return fmt.Errorf("failed to set io priority: %w", errno) + } + return nil +} + func setupPersonality(config *configs.Config) error { return system.SetLinuxPersonality(config.Personality.Domain) } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index dde6aecf13a..9b20f2d6695 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -166,12 +166,6 @@ type setnsProcess struct { func (p *setnsProcess) start() (retErr error) { defer p.comm.closeParent() - if p.process.IOPriority != nil { - if err := setIOPriority(p.process.IOPriority); err != nil { - return err - } - } - // get the "before" value of oom kill count oom, _ := p.manager.OOMKillCount() err := p.cmd.Start() @@ -908,21 +902,3 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { } return i, nil } - -func setIOPriority(ioprio *configs.IOPriority) error { - const ioprioWhoPgrp = 1 - - class, ok := configs.IOPrioClassMapping[ioprio.Class] - if !ok { - return fmt.Errorf("invalid io priority class: %s", ioprio.Class) - } - - // Combine class and priority into a single value - // https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17 - iop := (class << 13) | ioprio.Priority - _, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop)) - if errno != 0 { - return fmt.Errorf("failed to set io priority: %w", errno) - } - return nil -} diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 92c6ef77030..462662a84ed 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -71,12 +71,13 @@ func (l *linuxSetnsInit) Init() error { unix.Umask(int(*l.config.Config.Umask)) } - if l.config.Config.Scheduler != nil { - if err := setupScheduler(l.config.Config); err != nil { - return err - } + if err := setupScheduler(l.config.Config); err != nil { + return err } + if err := setupIOPriority(l.config.Config); err != nil { + return err + } // Tell our parent that we're ready to exec. This must be done before the // Seccomp rules have been applied, because we need to be able to read and // write to a socket. diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 9f7fa45d533..65444f38ae0 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -155,15 +155,12 @@ func (l *linuxStandardInit) Init() error { } } - if l.config.Config.Scheduler != nil { - if err := setupScheduler(l.config.Config); err != nil { - return err - } + if err := setupScheduler(l.config.Config); err != nil { + return err } - if l.config.Config.IOPriority != nil { - if err := setIOPriority(l.config.Config.IOPriority); err != nil { - return err - } + + if err := setupIOPriority(l.config.Config); err != nil { + return err } // Tell our parent that we're ready to exec. This must be done before the diff --git a/utils_linux.go b/utils_linux.go index feb6ef80c4a..eef78ea3845 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -55,6 +55,8 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { Label: p.SelinuxLabel, NoNewPrivileges: &p.NoNewPrivileges, AppArmorProfile: p.ApparmorProfile, + Scheduler: p.Scheduler, + IOPriority: p.IOPriority, } if p.ConsoleSize != nil { @@ -62,16 +64,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { lp.ConsoleHeight = uint16(p.ConsoleSize.Height) } - if p.Scheduler != nil { - s := *p.Scheduler - lp.Scheduler = &s - } - - if p.IOPriority != nil { - ioPriority := *p.IOPriority - lp.IOPriority = &ioPriority - } - if p.Capabilities != nil { lp.Capabilities = &configs.Capabilities{} lp.Capabilities.Bounding = p.Capabilities.Bounding