libct: use manager.AddPid to add exec to cgroup

The main benefit here is when we are using a systemd cgroup driver,
we actually ask systemd to add a PID, rather than doing it ourselves.
This way, we can add rootless exec PID to a cgroup.

This requires newer opencontainers/cgroups and coreos/go-systemd.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin
2025-07-25 17:34:35 -07:00
parent 5730a141f1
commit 37b5acc2d7
12 changed files with 124 additions and 25 deletions

2
go.mod
View File

@@ -14,7 +14,7 @@ require (
github.com/moby/sys/user v0.4.0 github.com/moby/sys/user v0.4.0
github.com/moby/sys/userns v0.1.0 github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1 github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/cgroups v0.0.4 github.com/opencontainers/cgroups v0.0.5
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0
github.com/opencontainers/selinux v1.12.0 github.com/opencontainers/selinux v1.12.0
github.com/seccomp/libseccomp-golang v0.11.1 github.com/seccomp/libseccomp-golang v0.11.1

4
go.sum
View File

@@ -44,8 +44,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4= github.com/opencontainers/cgroups v0.0.5 h1:DRITAqcOnY0uSBzIpt1RYWLjh5DPDiqUs4fY6Y0ktls=
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs= github.com/opencontainers/cgroups v0.0.5/go.mod h1:oWVzJsKK0gG9SCRBfTpnn16WcGEqDI8PAcpMGbqWxcs=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 h1:RLn0YfUWkiqPGtgUANvJrcjIkCHGRl3jcz/c557M28M= github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 h1:RLn0YfUWkiqPGtgUANvJrcjIkCHGRl3jcz/c557M28M=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8= github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=

View File

@@ -32,6 +32,10 @@ func (m *mockCgroupManager) Apply(pid int) error {
return nil return nil
} }
func (m *mockCgroupManager) AddPid(_ string, _ int) error {
return nil
}
func (m *mockCgroupManager) Set(_ *cgroups.Resources) error { func (m *mockCgroupManager) Set(_ *cgroups.Resources) error {
return nil return nil
} }

View File

@@ -247,6 +247,18 @@ func (p *setnsProcess) setFinalCPUAffinity() error {
} }
func (p *setnsProcess) addIntoCgroupV1() error { func (p *setnsProcess) addIntoCgroupV1() error {
if sub, ok := p.process.SubCgroupPaths[""]; ok || len(p.process.SubCgroupPaths) == 0 {
// Either same sub-cgroup for all paths, or no sub-cgroup.
err := p.manager.AddPid(sub, p.pid())
if err != nil && !p.rootlessCgroups {
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
}
return nil
}
// Per-controller sub-cgroup paths. Not supported by AddPid (or systemd),
// so we have to calculate and check all sub-cgroup paths, and write
// directly to cgroupfs.
paths := maps.Clone(p.manager.GetPaths()) paths := maps.Clone(p.manager.GetPaths())
for ctrl, sub := range p.process.SubCgroupPaths { for ctrl, sub := range p.process.SubCgroupPaths {
base, ok := paths[ctrl] base, ok := paths[ctrl]
@@ -255,7 +267,7 @@ func (p *setnsProcess) addIntoCgroupV1() error {
} }
cgPath := path.Join(base, sub) cgPath := path.Join(base, sub)
if !strings.HasPrefix(cgPath, base) { if !strings.HasPrefix(cgPath, base) {
return fmt.Errorf("%s is not a sub cgroup path", sub) return fmt.Errorf("bad sub cgroup path: %s", sub)
} }
paths[ctrl] = cgPath paths[ctrl] = cgPath
} }
@@ -270,18 +282,10 @@ func (p *setnsProcess) addIntoCgroupV1() error {
} }
func (p *setnsProcess) addIntoCgroupV2() error { func (p *setnsProcess) addIntoCgroupV2() error {
base := p.manager.Path("") sub := p.process.SubCgroupPaths[""]
sub := "" err := p.manager.AddPid(sub, p.pid())
if p.process.SubCgroupPaths != nil { if err != nil && !p.rootlessCgroups {
sub = p.process.SubCgroupPaths[""] // On cgroup v2 + nesting + domain controllers, adding to initial cgroup may fail with EBUSY.
}
cgPath := path.Join(base, sub)
if !strings.HasPrefix(cgPath, base) {
return fmt.Errorf("%s is not a sub cgroup path", sub)
}
if err := cgroups.WriteCgroupProc(cgPath, p.pid()); err != nil && !p.rootlessCgroups {
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643 // https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set. // Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set.
if p.initProcessPid != 0 && sub == "" { if p.initProcessPid != 0 && sub == "" {
@@ -290,8 +294,8 @@ func (p *setnsProcess) addIntoCgroupV2() error {
if initCgErr == nil { if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok { if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath) initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Debugf("adding pid %d to cgroup %s failed (%v), attempting to join %s", logrus.Debugf("adding pid %d to cgroup failed (%v), attempting to join %s",
p.pid(), cgPath, err, initCgDirpath) p.pid(), err, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container. // NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid()) err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
} }

View File

@@ -226,17 +226,17 @@ function check_exec_debug() {
# Check we can't join parent cgroup. # Check we can't join parent cgroup.
runc exec --cgroup ".." test_busybox cat /proc/self/cgroup runc exec --cgroup ".." test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" .. is not a sub cgroup path"* ]] [[ "$output" == *"bad sub cgroup path"* ]]
# Check we can't join non-existing subcgroup. # Check we can't join non-existing subcgroup.
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]] [[ "$output" == *" adding pid "*"o such file or directory"* ]]
# Check we can't join non-existing subcgroup (for a particular controller). # Check we can't join non-existing subcgroup (for a particular controller).
runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]] [[ "$output" == *" adding pid "*"o such file or directory"* ]]
# Check we can't specify non-existent controller. # Check we can't specify non-existent controller.
runc exec --cgroup whaaat:/ test_busybox true runc exec --cgroup whaaat:/ test_busybox true
@@ -277,12 +277,12 @@ function check_exec_debug() {
# Check we can't join parent cgroup. # Check we can't join parent cgroup.
runc exec --cgroup ".." test_busybox cat /proc/self/cgroup runc exec --cgroup ".." test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" .. is not a sub cgroup path"* ]] [[ "$output" == *"bad sub cgroup path"* ]]
# Check we can't join non-existing subcgroup. # Check we can't join non-existing subcgroup.
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]] [[ "$output" == *" adding pid "*"o such file or directory"* ]]
# Check we can join top-level cgroup (implicit). # Check we can join top-level cgroup (implicit).
runc exec test_busybox grep '^0::/$' /proc/self/cgroup runc exec test_busybox grep '^0::/$' /proc/self/cgroup
@@ -318,7 +318,7 @@ function check_exec_debug() {
# Check that --cgroup / disables the init cgroup fallback. # Check that --cgroup / disables the init cgroup fallback.
runc exec --cgroup / test_busybox true runc exec --cgroup / test_busybox true
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*" to cgroups"*"/cgroup.procs: device or resource busy"* ]] [[ "$output" == *" adding pid "*" to cgroups"*"evice or resource busy"* ]]
# Check that explicit --cgroup foobar works. # Check that explicit --cgroup foobar works.
runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup

View File

@@ -29,6 +29,11 @@ type Manager interface {
// can be used to merely create a cgroup. // can be used to merely create a cgroup.
Apply(pid int) error Apply(pid int) error
// AddPid adds a process with a given pid to an existing cgroup.
// The subcgroup argument is either empty, or a path relative to
// a cgroup under under the manager's cgroup.
AddPid(subcgroup string, pid int) error
// GetPids returns the PIDs of all processes inside the cgroup. // GetPids returns the PIDs of all processes inside the cgroup.
GetPids() ([]int, error) GetPids() ([]int, error)

View File

@@ -4,6 +4,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"path"
"strings"
"sync" "sync"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
@@ -139,6 +141,33 @@ func (m *Manager) Apply(pid int) (retErr error) {
return retErr return retErr
} }
// AddPid adds a process with a given pid to an existing cgroup.
// The subcgroup argument is either empty, or a path relative to
// a cgroup under under the manager's cgroup.
func (m *Manager) AddPid(subcgroup string, pid int) (retErr error) {
m.mu.Lock()
defer m.mu.Unlock()
c := m.cgroups
for _, dir := range m.paths {
path := path.Join(dir, subcgroup)
if !strings.HasPrefix(path, dir) {
return fmt.Errorf("bad sub cgroup path: %s", subcgroup)
}
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
if isIgnorableError(c.Rootless, err) && c.Path == "" {
retErr = cgroups.ErrRootless
continue
}
return err
}
}
return retErr
}
func (m *Manager) Destroy() error { func (m *Manager) Destroy() error {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()

View File

@@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"path/filepath"
"strings" "strings"
"github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups"
@@ -83,6 +84,18 @@ func (m *Manager) Apply(pid int) error {
return nil return nil
} }
// AddPid adds a process with a given pid to an existing cgroup.
// The subcgroup argument is either empty, or a path relative to
// a cgroup under under the manager's cgroup.
func (m *Manager) AddPid(subcgroup string, pid int) error {
path := filepath.Join(m.dirPath, subcgroup)
if !strings.HasPrefix(path, m.dirPath) {
return fmt.Errorf("bad sub cgroup path: %s", subcgroup)
}
return cgroups.WriteCgroupProc(path, pid)
}
func (m *Manager) GetPids() ([]int, error) { func (m *Manager) GetPids() ([]int, error) {
return cgroups.GetPids(m.dirPath) return cgroups.GetPids(m.dirPath)
} }

View File

@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"math" "math"
"os" "os"
"path"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@@ -208,6 +209,20 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
return nil return nil
} }
func addPid(cm *dbusConnManager, unitName, subcgroup string, pid int) error {
absSubcgroup := subcgroup
if !path.IsAbs(absSubcgroup) {
absSubcgroup = "/" + subcgroup
}
if absSubcgroup != path.Clean(absSubcgroup) {
return fmt.Errorf("bad sub cgroup path: %s", subcgroup)
}
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
return c.AttachProcessesToUnit(context.TODO(), unitName, absSubcgroup, []uint32{uint32(pid)})
})
}
func resetFailedUnit(cm *dbusConnManager, name string) error { func resetFailedUnit(cm *dbusConnManager, name string) error {
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
return c.ResetFailedUnitContext(context.TODO(), name) return c.ResetFailedUnitContext(context.TODO(), name)

View File

@@ -215,6 +215,25 @@ func (m *LegacyManager) Apply(pid int) error {
return nil return nil
} }
// AddPid adds a process with a given pid to an existing cgroup.
// The subcgroup argument is either empty, or a path relative to
// a cgroup under under the manager's cgroup.
func (m *LegacyManager) AddPid(subcgroup string, pid int) error {
m.mu.Lock()
defer m.mu.Unlock()
if err := addPid(m.dbus, getUnitName(m.cgroups), subcgroup, pid); err != nil {
return err
}
// Since systemd only joins controllers it knows, use cgroupfs for the rest.
fsMgr, err := fs.NewManager(m.cgroups, m.paths)
if err != nil {
return err
}
return fsMgr.AddPid(subcgroup, pid)
}
func (m *LegacyManager) Destroy() error { func (m *LegacyManager) Destroy() error {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()

View File

@@ -383,6 +383,16 @@ func cgroupFilesToChown() ([]string, error) {
return filesToChown, nil return filesToChown, nil
} }
// AddPid adds a process with a given pid to an existing cgroup.
// The subcgroup argument is either empty, or a path relative to
// a cgroup under under the manager's cgroup.
func (m *UnifiedManager) AddPid(subcgroup string, pid int) error {
m.mu.Lock()
defer m.mu.Unlock()
return addPid(m.dbus, getUnitName(m.cgroups), subcgroup, pid)
}
func (m *UnifiedManager) Destroy() error { func (m *UnifiedManager) Destroy() error {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()

2
vendor/modules.txt vendored
View File

@@ -51,7 +51,7 @@ github.com/moby/sys/userns
# github.com/mrunalp/fileutils v0.5.1 # github.com/mrunalp/fileutils v0.5.1
## explicit; go 1.13 ## explicit; go 1.13
github.com/mrunalp/fileutils github.com/mrunalp/fileutils
# github.com/opencontainers/cgroups v0.0.4 # github.com/opencontainers/cgroups v0.0.5
## explicit; go 1.23.0 ## explicit; go 1.23.0
github.com/opencontainers/cgroups github.com/opencontainers/cgroups
github.com/opencontainers/cgroups/devices github.com/opencontainers/cgroups/devices