Files
runc/libcontainer/setns_init_linux.go
Daniel J Walsh cd96170c10 Need to setup labeling of kernel keyrings.
Work is ongoing in the kernel to support different kernel
keyrings per user namespace.  We want to allow SELinux to manage
kernel keyrings inside of the container.

Currently when runc creates the kernel keyring it gets the label which runc is
running with ususally `container_runtime_t`, with this change the kernel keyring
will be labeled with the container process label container_t:s0:C1,c2.

Container running as container_t:s0:c1,c2 can manage keyrings with the same label.

This change required a revendoring or the SELinux go bindings.

github.com/opencontainers/selinux.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
2019-03-13 17:57:30 -04:00

93 lines
2.7 KiB
Go

// +build linux
package libcontainer
import (
"fmt"
"os"
"runtime"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
pipe *os.File
consoleSocket *os.File
config *initConfig
}
func (l *linuxSetnsInit) getSessionRingName() string {
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
}
func (l *linuxSetnsInit) Init() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if !l.config.Config.NoNewKeyring {
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
return err
}
defer label.SetKeyLabel("")
// Do not inherit the parent's session keyring.
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
// Same justification as in standart_init_linux.go as to why we
// don't bail on ENOSYS.
//
// TODO(cyphar): And we should have logging here too.
if errors.Cause(err) != unix.ENOSYS {
return errors.Wrap(err, "join session keyring")
}
}
}
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
defer label.SetProcessLabel("")
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}