Files
runc/libcontainer/setns_init_linux.go
Kir Kolyshkin 201d60c51d runc run/start/exec: fix init log forwarding race
Sometimes debug.bats test cases are failing like this:

> not ok 27 global --debug to --log --log-format 'json'
> # (in test file tests/integration/debug.bats, line 77)
> #   `[[ "${output}" == *"child process in init()"* ]]' failed

It happens more when writing to disk.

This issue is caused by the fact that runc spawns log forwarding goroutine
(ForwardLogs) but does not wait for it to finish, resulting in missing
debug lines from nsexec.

ForwardLogs itself, though, never finishes, because it reads from a
reading side of a pipe which writing side is not closed. This is
especially true in case of runc create, which spawns runc init and
exits; meanwhile runc init waits on exec fifo for arbitrarily long
time before doing execve.

So, to fix the failure described above, we need to:

 1. Make runc create/run/exec wait for ForwardLogs to finish;

 2. Make runc init close its log pipe file descriptor (i.e.
    the one which value is passed in _LIBCONTAINER_LOGPIPE
    environment variable).

This is exactly what this commit does:

 1. Amend ForwardLogs to return a channel, and wait for it in start().

 2. In runc init, save the log fd and close it as late as possible.

PS I have to admit I still do not understand why an explicit close of
log pipe fd is required in e.g. (*linuxSetnsInit).Init, right before
the execve which (thanks to CLOEXEC) closes the fd anyway.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2021-03-25 19:18:55 -07:00

97 lines
2.9 KiB
Go

// +build linux
package libcontainer
import (
"os"
"runtime"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
pipe *os.File
consoleSocket *os.File
config *initConfig
logFd int
}
func (l *linuxSetnsInit) getSessionRingName() string {
return "_ses." + l.config.ContainerId
}
func (l *linuxSetnsInit) Init() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if !l.config.Config.NoNewKeyring {
if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
return err
}
defer selinux.SetKeyLabel("")
// Do not inherit the parent's session keyring.
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
// Same justification as in standart_init_linux.go as to why we
// don't bail on ENOSYS.
//
// TODO(cyphar): And we should have logging here too.
if errors.Cause(err) != unix.ENOSYS {
return errors.Wrap(err, "join session keyring")
}
}
}
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
return err
}
defer selinux.SetExecLabel("")
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
// Close the log pipe fd so the parent's ForwardLogs can exit.
if err := unix.Close(l.logFd); err != nil {
return newSystemErrorWithCause(err, "closing log pipe fd")
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}