mirror of
https://github.com/opencontainers/runc.git
synced 2025-09-27 03:46:19 +08:00

This code is not in libcontainer, meaning it is only used by a short lived binary (runc start/run/exec). Unlike code in libcontainer (see CreateLibcontainerConfig), here we don't have to care about copying the structures supplied as input, meaning we can just reuse the pointers directly. Fixes:bfbd0305
("Add I/O priority") Fixes:770728e1
("Support `process.scheduler`") Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
439 lines
12 KiB
Go
439 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
|
|
"github.com/coreos/go-systemd/v22/activation"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
selinux "github.com/opencontainers/selinux/go-selinux"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/urfave/cli"
|
|
"golang.org/x/sys/unix"
|
|
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
|
"github.com/opencontainers/runc/libcontainer/system/kernelversion"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
)
|
|
|
|
var errEmptyID = errors.New("container id cannot be empty")
|
|
|
|
// getContainer returns the specified container instance by loading it from
|
|
// a state directory (root).
|
|
func getContainer(context *cli.Context) (*libcontainer.Container, error) {
|
|
id := context.Args().First()
|
|
if id == "" {
|
|
return nil, errEmptyID
|
|
}
|
|
root := context.GlobalString("root")
|
|
return libcontainer.Load(root, id)
|
|
}
|
|
|
|
func getDefaultImagePath() string {
|
|
cwd, err := os.Getwd()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return filepath.Join(cwd, "checkpoint")
|
|
}
|
|
|
|
// newProcess returns a new libcontainer Process with the arguments from the
|
|
// spec and stdio from the current process.
|
|
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
|
lp := &libcontainer.Process{
|
|
Args: p.Args,
|
|
Env: p.Env,
|
|
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
|
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
|
Cwd: p.Cwd,
|
|
Label: p.SelinuxLabel,
|
|
NoNewPrivileges: &p.NoNewPrivileges,
|
|
AppArmorProfile: p.ApparmorProfile,
|
|
Scheduler: p.Scheduler,
|
|
IOPriority: p.IOPriority,
|
|
}
|
|
|
|
if p.ConsoleSize != nil {
|
|
lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
|
|
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
|
|
}
|
|
|
|
if p.Capabilities != nil {
|
|
lp.Capabilities = &configs.Capabilities{}
|
|
lp.Capabilities.Bounding = p.Capabilities.Bounding
|
|
lp.Capabilities.Effective = p.Capabilities.Effective
|
|
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
|
|
lp.Capabilities.Permitted = p.Capabilities.Permitted
|
|
lp.Capabilities.Ambient = p.Capabilities.Ambient
|
|
}
|
|
for _, gid := range p.User.AdditionalGids {
|
|
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
|
|
}
|
|
for _, rlimit := range p.Rlimits {
|
|
rl, err := createLibContainerRlimit(rlimit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lp.Rlimits = append(lp.Rlimits, rl)
|
|
}
|
|
return lp, nil
|
|
}
|
|
|
|
// setupIO modifies the given process config according to the options.
|
|
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
|
|
if createTTY {
|
|
process.Stdin = nil
|
|
process.Stdout = nil
|
|
process.Stderr = nil
|
|
t := &tty{}
|
|
if !detach {
|
|
if err := t.initHostConsole(); err != nil {
|
|
return nil, err
|
|
}
|
|
parent, child, err := utils.NewSockPair("console")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
process.ConsoleSocket = child
|
|
t.postStart = append(t.postStart, parent, child)
|
|
t.consoleC = make(chan error, 1)
|
|
go func() {
|
|
t.consoleC <- t.recvtty(parent)
|
|
}()
|
|
} else {
|
|
// the caller of runc will handle receiving the console master
|
|
conn, err := net.Dial("unix", sockpath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
uc, ok := conn.(*net.UnixConn)
|
|
if !ok {
|
|
return nil, errors.New("casting to UnixConn failed")
|
|
}
|
|
t.postStart = append(t.postStart, uc)
|
|
socket, err := uc.File()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
t.postStart = append(t.postStart, socket)
|
|
process.ConsoleSocket = socket
|
|
}
|
|
return t, nil
|
|
}
|
|
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
|
|
// and the container's process inherits runc's stdio.
|
|
if detach {
|
|
inheritStdio(process)
|
|
return &tty{}, nil
|
|
}
|
|
return setupProcessPipes(process, rootuid, rootgid)
|
|
}
|
|
|
|
// createPidFile creates a file containing the PID,
|
|
// doing so atomically (via create and rename).
|
|
func createPidFile(path string, process *libcontainer.Process) error {
|
|
pid, err := process.Pid()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var (
|
|
tmpDir = filepath.Dir(path)
|
|
tmpName = filepath.Join(tmpDir, "."+filepath.Base(path))
|
|
)
|
|
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0o666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = f.WriteString(strconv.Itoa(pid))
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tmpName, path)
|
|
}
|
|
|
|
func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcontainer.Container, error) {
|
|
rootlessCg, err := shouldUseRootlessCgroupManager(context)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
|
CgroupName: id,
|
|
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
|
|
NoPivotRoot: context.Bool("no-pivot"),
|
|
NoNewKeyring: context.Bool("no-new-keyring"),
|
|
Spec: spec,
|
|
RootlessEUID: os.Geteuid() != 0,
|
|
RootlessCgroups: rootlessCg,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
root := context.GlobalString("root")
|
|
return libcontainer.Create(root, id, config)
|
|
}
|
|
|
|
type runner struct {
|
|
init bool
|
|
enableSubreaper bool
|
|
shouldDestroy bool
|
|
detach bool
|
|
listenFDs []*os.File
|
|
preserveFDs int
|
|
pidFile string
|
|
consoleSocket string
|
|
pidfdSocket string
|
|
container *libcontainer.Container
|
|
action CtAct
|
|
notifySocket *notifySocket
|
|
criuOpts *libcontainer.CriuOpts
|
|
subCgroupPaths map[string]string
|
|
}
|
|
|
|
func (r *runner) run(config *specs.Process) (int, error) {
|
|
var err error
|
|
defer func() {
|
|
if err != nil {
|
|
r.destroy()
|
|
}
|
|
}()
|
|
if err = r.checkTerminal(config); err != nil {
|
|
return -1, err
|
|
}
|
|
process, err := newProcess(*config)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
process.LogLevel = strconv.Itoa(int(logrus.GetLevel()))
|
|
// Populate the fields that come from runner.
|
|
process.Init = r.init
|
|
process.SubCgroupPaths = r.subCgroupPaths
|
|
if len(r.listenFDs) > 0 {
|
|
process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1")
|
|
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
|
}
|
|
baseFd := 3 + len(process.ExtraFiles)
|
|
procSelfFd, closer := utils.ProcThreadSelf("fd/")
|
|
defer closer()
|
|
for i := baseFd; i < baseFd+r.preserveFDs; i++ {
|
|
_, err = os.Stat(filepath.Join(procSelfFd, strconv.Itoa(i)))
|
|
if err != nil {
|
|
return -1, fmt.Errorf("unable to stat preserved-fd %d (of %d): %w", i-baseFd, r.preserveFDs, err)
|
|
}
|
|
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
|
|
}
|
|
rootuid, err := r.container.Config().HostRootUID()
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
rootgid, err := r.container.Config().HostRootGID()
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
detach := r.detach || (r.action == CT_ACT_CREATE)
|
|
// Setting up IO is a two stage process. We need to modify process to deal
|
|
// with detaching containers, and then we get a tty after the container has
|
|
// started.
|
|
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
|
|
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
defer tty.Close()
|
|
|
|
if r.pidfdSocket != "" {
|
|
connClose, err := setupPidfdSocket(process, r.pidfdSocket)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
defer connClose()
|
|
}
|
|
|
|
switch r.action {
|
|
case CT_ACT_CREATE:
|
|
err = r.container.Start(process)
|
|
case CT_ACT_RESTORE:
|
|
err = r.container.Restore(process, r.criuOpts)
|
|
case CT_ACT_RUN:
|
|
err = r.container.Run(process)
|
|
default:
|
|
panic("Unknown action")
|
|
}
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
if err = tty.waitConsole(); err != nil {
|
|
r.terminate(process)
|
|
return -1, err
|
|
}
|
|
tty.ClosePostStart()
|
|
if r.pidFile != "" {
|
|
if err = createPidFile(r.pidFile, process); err != nil {
|
|
r.terminate(process)
|
|
return -1, err
|
|
}
|
|
}
|
|
status, err := handler.forward(process, tty, detach)
|
|
if err != nil {
|
|
r.terminate(process)
|
|
}
|
|
if detach {
|
|
return 0, nil
|
|
}
|
|
if err == nil {
|
|
r.destroy()
|
|
}
|
|
return status, err
|
|
}
|
|
|
|
func (r *runner) destroy() {
|
|
if r.shouldDestroy {
|
|
if err := r.container.Destroy(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *runner) terminate(p *libcontainer.Process) {
|
|
_ = p.Signal(unix.SIGKILL)
|
|
_, _ = p.Wait()
|
|
}
|
|
|
|
func (r *runner) checkTerminal(config *specs.Process) error {
|
|
detach := r.detach || (r.action == CT_ACT_CREATE)
|
|
// Check command-line for sanity.
|
|
if detach && config.Terminal && r.consoleSocket == "" {
|
|
return errors.New("cannot allocate tty if runc will detach without setting console socket")
|
|
}
|
|
if (!detach || !config.Terminal) && r.consoleSocket != "" {
|
|
return errors.New("cannot use console socket if runc will not detach or allocate tty")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateProcessSpec(spec *specs.Process) error {
|
|
if spec == nil {
|
|
return errors.New("process property must not be empty")
|
|
}
|
|
if spec.Cwd == "" {
|
|
return errors.New("Cwd property must not be empty")
|
|
}
|
|
if !filepath.IsAbs(spec.Cwd) {
|
|
return errors.New("Cwd must be an absolute path")
|
|
}
|
|
if len(spec.Args) == 0 {
|
|
return errors.New("args must not be empty")
|
|
}
|
|
if spec.SelinuxLabel != "" && !selinux.GetEnabled() {
|
|
return errors.New("selinux label is specified in config, but selinux is disabled or not supported")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type CtAct uint8
|
|
|
|
const (
|
|
CT_ACT_CREATE CtAct = iota + 1
|
|
CT_ACT_RUN
|
|
CT_ACT_RESTORE
|
|
)
|
|
|
|
func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.CriuOpts) (int, error) {
|
|
if err := revisePidFile(context); err != nil {
|
|
return -1, err
|
|
}
|
|
spec, err := setupSpec(context)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
id := context.Args().First()
|
|
if id == "" {
|
|
return -1, errEmptyID
|
|
}
|
|
|
|
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
|
|
if notifySocket != nil {
|
|
notifySocket.setupSpec(spec)
|
|
}
|
|
|
|
container, err := createContainer(context, id, spec)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
if notifySocket != nil {
|
|
if err := notifySocket.setupSocketDirectory(); err != nil {
|
|
return -1, err
|
|
}
|
|
if action == CT_ACT_RUN {
|
|
if err := notifySocket.bindSocket(); err != nil {
|
|
return -1, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Support on-demand socket activation by passing file descriptors into the container init process.
|
|
listenFDs := []*os.File{}
|
|
if os.Getenv("LISTEN_FDS") != "" {
|
|
listenFDs = activation.Files(false)
|
|
}
|
|
|
|
r := &runner{
|
|
enableSubreaper: !context.Bool("no-subreaper"),
|
|
shouldDestroy: !context.Bool("keep"),
|
|
container: container,
|
|
listenFDs: listenFDs,
|
|
notifySocket: notifySocket,
|
|
consoleSocket: context.String("console-socket"),
|
|
pidfdSocket: context.String("pidfd-socket"),
|
|
detach: context.Bool("detach"),
|
|
pidFile: context.String("pid-file"),
|
|
preserveFDs: context.Int("preserve-fds"),
|
|
action: action,
|
|
criuOpts: criuOpts,
|
|
init: true,
|
|
}
|
|
return r.run(spec.Process)
|
|
}
|
|
|
|
func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) {
|
|
linux530 := kernelversion.KernelVersion{Kernel: 5, Major: 3}
|
|
ok, err := kernelversion.GreaterEqualThan(linux530)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !ok {
|
|
return nil, fmt.Errorf("--pidfd-socket requires >= v5.3 kernel")
|
|
}
|
|
|
|
conn, err := net.Dial("unix", sockpath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err)
|
|
}
|
|
|
|
uc, ok := conn.(*net.UnixConn)
|
|
if !ok {
|
|
conn.Close()
|
|
return nil, errors.New("failed to cast to UnixConn")
|
|
}
|
|
|
|
socket, err := uc.File()
|
|
if err != nil {
|
|
conn.Close()
|
|
return nil, fmt.Errorf("failed to dup socket: %w", err)
|
|
}
|
|
|
|
process.PidfdSocket = socket
|
|
return func() {
|
|
conn.Close()
|
|
}, nil
|
|
}
|