mirror of
https://github.com/opencontainers/runc.git
synced 2025-09-27 03:46:19 +08:00
Support process.scheduler
Spec: https://github.com/opencontainers/runtime-spec/pull/1188 Fix: https://github.com/opencontainers/runc/issues/3895 Co-authored-by: lifubang <lifubang@acmcoder.com> Signed-off-by: utam0k <k0ma@utam0k.jp> Signed-off-by: lifubang <lifubang@acmcoder.com>
This commit is contained in:
@@ -12,7 +12,6 @@ v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack
|
||||
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
|
||||
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
|
||||
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
|
||||
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
|
||||
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)
|
||||
|
||||
|
||||
|
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
@@ -219,6 +220,68 @@ type Config struct {
|
||||
|
||||
// TimeOffsets specifies the offset for supporting time namespaces.
|
||||
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`
|
||||
|
||||
// Scheduler represents the scheduling attributes for a process.
|
||||
Scheduler *Scheduler `json:"scheduler,omitempty"`
|
||||
}
|
||||
|
||||
// Scheduler is based on the Linux sched_setattr(2) syscall.
|
||||
type Scheduler = specs.Scheduler
|
||||
|
||||
// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
|
||||
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
|
||||
var policy uint32
|
||||
switch scheduler.Policy {
|
||||
case specs.SchedOther:
|
||||
policy = 0
|
||||
case specs.SchedFIFO:
|
||||
policy = 1
|
||||
case specs.SchedRR:
|
||||
policy = 2
|
||||
case specs.SchedBatch:
|
||||
policy = 3
|
||||
case specs.SchedISO:
|
||||
policy = 4
|
||||
case specs.SchedIdle:
|
||||
policy = 5
|
||||
case specs.SchedDeadline:
|
||||
policy = 6
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
|
||||
}
|
||||
|
||||
var flags uint64
|
||||
for _, flag := range scheduler.Flags {
|
||||
switch flag {
|
||||
case specs.SchedFlagResetOnFork:
|
||||
flags |= 0x01
|
||||
case specs.SchedFlagReclaim:
|
||||
flags |= 0x02
|
||||
case specs.SchedFlagDLOverrun:
|
||||
flags |= 0x04
|
||||
case specs.SchedFlagKeepPolicy:
|
||||
flags |= 0x08
|
||||
case specs.SchedFlagKeepParams:
|
||||
flags |= 0x10
|
||||
case specs.SchedFlagUtilClampMin:
|
||||
flags |= 0x20
|
||||
case specs.SchedFlagUtilClampMax:
|
||||
flags |= 0x40
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
|
||||
}
|
||||
}
|
||||
|
||||
return &unix.SchedAttr{
|
||||
Size: unix.SizeofSchedAttr,
|
||||
Policy: policy,
|
||||
Flags: flags,
|
||||
Nice: scheduler.Nice,
|
||||
Priority: uint32(scheduler.Priority),
|
||||
Runtime: scheduler.Runtime,
|
||||
Deadline: scheduler.Deadline,
|
||||
Period: scheduler.Period,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type (
|
||||
|
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -30,6 +31,7 @@ func Validate(config *configs.Config) error {
|
||||
intelrdtCheck,
|
||||
rootlessEUIDCheck,
|
||||
mountsStrict,
|
||||
scheduler,
|
||||
}
|
||||
for _, c := range checks {
|
||||
if err := c(config); err != nil {
|
||||
@@ -353,3 +355,24 @@ func isHostNetNS(path string) (bool, error) {
|
||||
|
||||
return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
|
||||
}
|
||||
|
||||
// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
|
||||
func scheduler(config *configs.Config) error {
|
||||
s := config.Scheduler
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
if s.Policy == "" {
|
||||
return errors.New("scheduler policy is required")
|
||||
}
|
||||
if s.Nice < -20 || s.Nice > 19 {
|
||||
return fmt.Errorf("invalid scheduler.nice: %d", s.Nice)
|
||||
}
|
||||
if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) {
|
||||
return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy")
|
||||
}
|
||||
if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) {
|
||||
return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@@ -616,3 +616,53 @@ func TestValidateIDMapMounts(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateScheduler(t *testing.T) {
|
||||
testCases := []struct {
|
||||
isErr bool
|
||||
policy string
|
||||
niceValue int32
|
||||
priority int32
|
||||
runtime uint64
|
||||
deadline uint64
|
||||
period uint64
|
||||
}{
|
||||
{isErr: true, niceValue: 0},
|
||||
{isErr: false, policy: "SCHED_OTHER", niceValue: 19},
|
||||
{isErr: false, policy: "SCHED_OTHER", niceValue: -20},
|
||||
{isErr: true, policy: "SCHED_OTHER", niceValue: 20},
|
||||
{isErr: true, policy: "SCHED_OTHER", niceValue: -21},
|
||||
{isErr: true, policy: "SCHED_OTHER", priority: 100},
|
||||
{isErr: false, policy: "SCHED_FIFO", priority: 100},
|
||||
{isErr: true, policy: "SCHED_FIFO", runtime: 20},
|
||||
{isErr: true, policy: "SCHED_BATCH", deadline: 30},
|
||||
{isErr: true, policy: "SCHED_IDLE", period: 40},
|
||||
{isErr: true, policy: "SCHED_DEADLINE", priority: 100},
|
||||
{isErr: false, policy: "SCHED_DEADLINE", runtime: 200},
|
||||
{isErr: false, policy: "SCHED_DEADLINE", deadline: 300},
|
||||
{isErr: false, policy: "SCHED_DEADLINE", period: 400},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
scheduler := configs.Scheduler{
|
||||
Policy: specs.LinuxSchedulerPolicy(tc.policy),
|
||||
Nice: tc.niceValue,
|
||||
Priority: tc.priority,
|
||||
Runtime: tc.runtime,
|
||||
Deadline: tc.deadline,
|
||||
Period: tc.period,
|
||||
}
|
||||
config := &configs.Config{
|
||||
Rootfs: "/var",
|
||||
Scheduler: &scheduler,
|
||||
}
|
||||
|
||||
err := Validate(config)
|
||||
if tc.isErr && err == nil {
|
||||
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
|
||||
}
|
||||
if !tc.isErr && err != nil {
|
||||
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -640,6 +640,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupScheduler(config *configs.Config) error {
|
||||
attr, err := configs.ToSchedAttr(config.Scheduler)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := unix.SchedSetAttr(0, attr, 0); err != nil {
|
||||
if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" {
|
||||
return errors.New("process scheduler can't be used together with AllowedCPUs")
|
||||
}
|
||||
return fmt.Errorf("error setting scheduler: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// signalAllProcesses freezes then iterates over all the processes inside the
|
||||
// manager's cgroups sending the signal s to them.
|
||||
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
|
||||
|
@@ -95,6 +95,8 @@ type Process struct {
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "".
|
||||
SubCgroupPaths map[string]string
|
||||
|
||||
Scheduler *configs.Scheduler
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
|
@@ -65,6 +65,12 @@ func (l *linuxSetnsInit) Init() error {
|
||||
unix.Umask(int(*l.config.Config.Umask))
|
||||
}
|
||||
|
||||
if l.config.Config.Scheduler != nil {
|
||||
if err := setupScheduler(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@@ -494,6 +494,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
||||
Ambient: spec.Process.Capabilities.Ambient,
|
||||
}
|
||||
}
|
||||
if spec.Process.Scheduler != nil {
|
||||
s := *spec.Process.Scheduler
|
||||
config.Scheduler = &s
|
||||
}
|
||||
}
|
||||
createHooks(spec, config)
|
||||
config.Version = specs.Version
|
||||
|
@@ -159,6 +159,13 @@ func (l *linuxStandardInit) Init() error {
|
||||
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
if l.config.Config.Scheduler != nil {
|
||||
if err := setupScheduler(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
|
34
tests/integration/scheduler.bats
Normal file
34
tests/integration/scheduler.bats
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
load helpers
|
||||
|
||||
function setup() {
|
||||
requires root
|
||||
setup_debian
|
||||
}
|
||||
|
||||
function teardown() {
|
||||
teardown_bundle
|
||||
}
|
||||
|
||||
@test "scheduler is applied" {
|
||||
update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'
|
||||
|
||||
runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
runc exec test_scheduler chrt -p 1
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]]
|
||||
[[ "${lines[1]}" == *"priority: 0" ]]
|
||||
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]]
|
||||
}
|
||||
|
||||
@test "scheduler vs cpus" {
|
||||
update_config ' .linux.resources.cpu.cpus = "0"
|
||||
| .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'
|
||||
|
||||
runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
|
||||
[ "$status" -eq 1 ]
|
||||
}
|
@@ -61,6 +61,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
||||
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
|
||||
}
|
||||
|
||||
if p.Scheduler != nil {
|
||||
s := *p.Scheduler
|
||||
lp.Scheduler = &s
|
||||
}
|
||||
|
||||
if p.Capabilities != nil {
|
||||
lp.Capabilities = &configs.Capabilities{}
|
||||
lp.Capabilities.Bounding = p.Capabilities.Bounding
|
||||
|
Reference in New Issue
Block a user