mirror of
https://github.com/opencontainers/runc.git
synced 2025-09-27 03:46:19 +08:00

This enables the support for the rootless container mode. There are many restrictions on what rootless containers can do, so many different runC commands have been disabled: * runc checkpoint * runc events * runc pause * runc ps * runc restore * runc resume * runc update The following commands work: * runc create * runc delete * runc exec * runc kill * runc list * runc run * runc spec * runc state In addition, any specification options that imply joining cgroups have also been disabled. This is due to support for unprivileged subtree management not being available from Linux upstream. Signed-off-by: Aleksa Sarai <asarai@suse.de>
379 lines
10 KiB
Go
379 lines
10 KiB
Go
// +build linux
|
|
|
|
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"syscall"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/coreos/go-systemd/activation"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/urfave/cli"
|
|
)
|
|
|
|
var errEmptyID = errors.New("container id cannot be empty")
|
|
|
|
var container libcontainer.Container
|
|
|
|
// loadFactory returns the configured factory instance for execing containers.
|
|
func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
|
root := context.GlobalString("root")
|
|
abs, err := filepath.Abs(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cgroupManager := libcontainer.Cgroupfs
|
|
if context.GlobalBool("systemd-cgroup") {
|
|
if systemd.UseSystemd() {
|
|
cgroupManager = libcontainer.SystemdCgroups
|
|
} else {
|
|
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
|
|
}
|
|
}
|
|
return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu")))
|
|
}
|
|
|
|
// getContainer returns the specified container instance by loading it from state
|
|
// with the default factory.
|
|
func getContainer(context *cli.Context) (libcontainer.Container, error) {
|
|
id := context.Args().First()
|
|
if id == "" {
|
|
return nil, errEmptyID
|
|
}
|
|
factory, err := loadFactory(context)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return factory.Load(id)
|
|
}
|
|
|
|
func fatalf(t string, v ...interface{}) {
|
|
fatal(fmt.Errorf(t, v...))
|
|
}
|
|
|
|
func getDefaultImagePath(context *cli.Context) string {
|
|
cwd, err := os.Getwd()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return filepath.Join(cwd, "checkpoint")
|
|
}
|
|
|
|
// newProcess returns a new libcontainer Process with the arguments from the
|
|
// spec and stdio from the current process.
|
|
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
|
lp := &libcontainer.Process{
|
|
Args: p.Args,
|
|
Env: p.Env,
|
|
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
|
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
|
Cwd: p.Cwd,
|
|
Label: p.SelinuxLabel,
|
|
NoNewPrivileges: &p.NoNewPrivileges,
|
|
AppArmorProfile: p.ApparmorProfile,
|
|
}
|
|
if p.Capabilities != nil {
|
|
lp.Capabilities = &configs.Capabilities{}
|
|
lp.Capabilities.Bounding = p.Capabilities.Bounding
|
|
lp.Capabilities.Effective = p.Capabilities.Effective
|
|
lp.Capabilities.Inheritable = p.Capabilities.Inheritable
|
|
lp.Capabilities.Permitted = p.Capabilities.Permitted
|
|
lp.Capabilities.Ambient = p.Capabilities.Ambient
|
|
}
|
|
for _, gid := range p.User.AdditionalGids {
|
|
lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
|
|
}
|
|
for _, rlimit := range p.Rlimits {
|
|
rl, err := createLibContainerRlimit(rlimit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lp.Rlimits = append(lp.Rlimits, rl)
|
|
}
|
|
return lp, nil
|
|
}
|
|
|
|
func destroy(container libcontainer.Container) {
|
|
if err := container.Destroy(); err != nil {
|
|
logrus.Error(err)
|
|
}
|
|
}
|
|
|
|
// setupIO modifies the given process config according to the options.
|
|
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
|
|
if createTTY {
|
|
process.Stdin = nil
|
|
process.Stdout = nil
|
|
process.Stderr = nil
|
|
t := &tty{}
|
|
if !detach {
|
|
parent, child, err := utils.NewSockPair("console")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
process.ConsoleSocket = child
|
|
t.postStart = append(t.postStart, parent, child)
|
|
t.consoleC = make(chan error, 1)
|
|
go func() {
|
|
if err := t.recvtty(process, parent); err != nil {
|
|
t.consoleC <- err
|
|
}
|
|
t.consoleC <- nil
|
|
}()
|
|
} else {
|
|
// the caller of runc will handle receiving the console master
|
|
conn, err := net.Dial("unix", sockpath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
uc, ok := conn.(*net.UnixConn)
|
|
if !ok {
|
|
return nil, fmt.Errorf("casting to UnixConn failed")
|
|
}
|
|
t.postStart = append(t.postStart, uc)
|
|
socket, err := uc.File()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
t.postStart = append(t.postStart, socket)
|
|
process.ConsoleSocket = socket
|
|
}
|
|
return t, nil
|
|
}
|
|
// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
|
|
// and the container's process inherits runc's stdio.
|
|
if detach {
|
|
if err := inheritStdio(process); err != nil {
|
|
return nil, err
|
|
}
|
|
return &tty{}, nil
|
|
}
|
|
return setupProcessPipes(process, rootuid, rootgid)
|
|
}
|
|
|
|
// createPidFile creates a file with the processes pid inside it atomically
|
|
// it creates a temp file with the paths filename + '.' infront of it
|
|
// then renames the file
|
|
func createPidFile(path string, process *libcontainer.Process) error {
|
|
pid, err := process.Pid()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var (
|
|
tmpDir = filepath.Dir(path)
|
|
tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path)))
|
|
)
|
|
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = fmt.Fprintf(f, "%d", pid)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tmpName, path)
|
|
}
|
|
|
|
// XXX: Currently we autodetect rootless mode.
|
|
func isRootless() bool {
|
|
return os.Geteuid() != 0
|
|
}
|
|
|
|
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
|
|
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
|
CgroupName: id,
|
|
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
|
|
NoPivotRoot: context.Bool("no-pivot"),
|
|
NoNewKeyring: context.Bool("no-new-keyring"),
|
|
Spec: spec,
|
|
Rootless: isRootless(),
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
factory, err := loadFactory(context)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return factory.Create(id, config)
|
|
}
|
|
|
|
type runner struct {
|
|
enableSubreaper bool
|
|
shouldDestroy bool
|
|
detach bool
|
|
listenFDs []*os.File
|
|
preserveFDs int
|
|
pidFile string
|
|
consoleSocket string
|
|
container libcontainer.Container
|
|
create bool
|
|
notifySocket *notifySocket
|
|
}
|
|
|
|
func (r *runner) run(config *specs.Process) (int, error) {
|
|
if err := r.checkTerminal(config); err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
process, err := newProcess(*config)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
if len(r.listenFDs) > 0 {
|
|
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
|
|
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
|
}
|
|
baseFd := 3 + len(process.ExtraFiles)
|
|
for i := baseFd; i < baseFd+r.preserveFDs; i++ {
|
|
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
|
|
}
|
|
rootuid, err := r.container.Config().HostUID()
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
rootgid, err := r.container.Config().HostGID()
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
var (
|
|
detach = r.detach || r.create
|
|
startFn = r.container.Start
|
|
)
|
|
if !r.create {
|
|
startFn = r.container.Run
|
|
}
|
|
// Setting up IO is a two stage process. We need to modify process to deal
|
|
// with detaching containers, and then we get a tty after the container has
|
|
// started.
|
|
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
|
|
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
defer tty.Close()
|
|
if err = startFn(process); err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
if err := tty.waitConsole(); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
if err = tty.ClosePostStart(); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
if r.pidFile != "" {
|
|
if err = createPidFile(r.pidFile, process); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
}
|
|
status, err := handler.forward(process, tty, detach)
|
|
if err != nil {
|
|
r.terminate(process)
|
|
}
|
|
if detach {
|
|
return 0, nil
|
|
}
|
|
r.destroy()
|
|
return status, err
|
|
}
|
|
|
|
func (r *runner) destroy() {
|
|
if r.shouldDestroy {
|
|
destroy(r.container)
|
|
}
|
|
}
|
|
|
|
func (r *runner) terminate(p *libcontainer.Process) {
|
|
_ = p.Signal(syscall.SIGKILL)
|
|
_, _ = p.Wait()
|
|
}
|
|
|
|
func (r *runner) checkTerminal(config *specs.Process) error {
|
|
detach := r.detach || r.create
|
|
// Check command-line for sanity.
|
|
if detach && config.Terminal && r.consoleSocket == "" {
|
|
return fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
|
|
}
|
|
if (!detach || !config.Terminal) && r.consoleSocket != "" {
|
|
return fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateProcessSpec(spec *specs.Process) error {
|
|
if spec.Cwd == "" {
|
|
return fmt.Errorf("Cwd property must not be empty")
|
|
}
|
|
if !filepath.IsAbs(spec.Cwd) {
|
|
return fmt.Errorf("Cwd must be an absolute path")
|
|
}
|
|
if len(spec.Args) == 0 {
|
|
return fmt.Errorf("args must not be empty")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, error) {
|
|
id := context.Args().First()
|
|
if id == "" {
|
|
return -1, errEmptyID
|
|
}
|
|
|
|
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
|
|
if notifySocket != nil {
|
|
notifySocket.setupSpec(context, spec)
|
|
}
|
|
|
|
container, err := createContainer(context, id, spec)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
if notifySocket != nil {
|
|
notifySocket.setupSocket()
|
|
}
|
|
|
|
// Support on-demand socket activation by passing file descriptors into the container init process.
|
|
listenFDs := []*os.File{}
|
|
if os.Getenv("LISTEN_FDS") != "" {
|
|
listenFDs = activation.Files(false)
|
|
}
|
|
r := &runner{
|
|
enableSubreaper: !context.Bool("no-subreaper"),
|
|
shouldDestroy: true,
|
|
container: container,
|
|
listenFDs: listenFDs,
|
|
notifySocket: notifySocket,
|
|
consoleSocket: context.String("console-socket"),
|
|
detach: context.Bool("detach"),
|
|
pidFile: context.String("pid-file"),
|
|
preserveFDs: context.Int("preserve-fds"),
|
|
create: create,
|
|
}
|
|
return r.run(&spec.Process)
|
|
}
|