mirror of
https://github.com/opencontainers/runc.git
synced 2025-10-20 22:19:42 +08:00

Due to the fact that the init is implemented in Go (which seemingly randomly spawns new processes and loves eating memory), most cgroup configurations are required to have an arbitrary minimum dictated by the init. This confuses users and makes configuration more annoying than it should. An example of this is pids.max, where Go spawns multiple processes that then cause init to violate the pids cgroup constraint before the container can even start. Solve this problem by setting the cgroup configurations as late as possible, to avoid hitting as many of the resources hogged by the Go init as possible. This has to be done before seccomp rules are applied, as the parent and child must synchronise in order for the parent to correctly set the configurations (and writes might be blocked by seccomp). Signed-off-by: Aleksa Sarai <asarai@suse.com>
120 lines
3.2 KiB
Go
120 lines
3.2 KiB
Go
// +build linux
|
|
|
|
package libcontainer
|
|
|
|
import (
|
|
"io"
|
|
"os"
|
|
"syscall"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
)
|
|
|
|
type linuxStandardInit struct {
|
|
pipe io.ReadWriter
|
|
parentPid int
|
|
config *initConfig
|
|
}
|
|
|
|
func (l *linuxStandardInit) Init() error {
|
|
// join any namespaces via a path to the namespace fd if provided
|
|
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
|
return err
|
|
}
|
|
var console *linuxConsole
|
|
if l.config.Console != "" {
|
|
console = newConsoleFromPath(l.config.Console)
|
|
if err := console.dupStdio(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if _, err := syscall.Setsid(); err != nil {
|
|
return err
|
|
}
|
|
if console != nil {
|
|
if err := system.Setctty(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := setupNetwork(l.config); err != nil {
|
|
return err
|
|
}
|
|
if err := setupRoute(l.config.Config); err != nil {
|
|
return err
|
|
}
|
|
if err := setupRlimits(l.config.Config); err != nil {
|
|
return err
|
|
}
|
|
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
|
return err
|
|
}
|
|
label.Init()
|
|
// InitializeMountNamespace() can be executed only for a new mount namespace
|
|
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
|
if err := setupRootfs(l.config.Config, console); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if hostname := l.config.Config.Hostname; hostname != "" {
|
|
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
|
return err
|
|
}
|
|
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
|
return err
|
|
}
|
|
|
|
for key, value := range l.config.Config.Sysctl {
|
|
if err := writeSystemProperty(key, value); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, path := range l.config.Config.ReadonlyPaths {
|
|
if err := remountReadonly(path); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, path := range l.config.Config.MaskPaths {
|
|
if err := maskFile(path); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
pdeath, err := system.GetParentDeathSignal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Tell our parent that we're ready to Execv. This must be done before the
|
|
// Seccomp rules have been applied, because we need to be able to read and
|
|
// write to a socket.
|
|
if err := syncParentReady(l.pipe); err != nil {
|
|
return err
|
|
}
|
|
if l.config.Config.Seccomp != nil {
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := finalizeNamespace(l.config); err != nil {
|
|
return err
|
|
}
|
|
// finalizeNamespace can change user/group which clears the parent death
|
|
// signal, so we restore it here.
|
|
if err := pdeath.Restore(); err != nil {
|
|
return err
|
|
}
|
|
// compare the parent from the inital start of the init process and make sure that it did not change.
|
|
// if the parent changes that means it died and we were reparened to something else so we should
|
|
// just kill ourself and not cause problems for someone else.
|
|
if syscall.Getppid() != l.parentPid {
|
|
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
|
}
|
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
|
}
|