mirror of
https://github.com/opencontainers/runc.git
synced 2025-10-06 16:07:09 +08:00

Work is ongoing in the kernel to support different kernel keyrings per user namespace. We want to allow SELinux to manage kernel keyrings inside of the container. Currently when runc creates the kernel keyring it gets the label which runc is running with ususally `container_runtime_t`, with this change the kernel keyring will be labeled with the container process label container_t:s0:C1,c2. Container running as container_t:s0:c1,c2 can manage keyrings with the same label. This change required a revendoring or the SELinux go bindings. github.com/opencontainers/selinux. Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
93 lines
2.7 KiB
Go
93 lines
2.7 KiB
Go
// +build linux
|
|
|
|
package libcontainer
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
"github.com/opencontainers/runc/libcontainer/keys"
|
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// linuxSetnsInit performs the container's initialization for running a new process
|
|
// inside an existing container.
|
|
type linuxSetnsInit struct {
|
|
pipe *os.File
|
|
consoleSocket *os.File
|
|
config *initConfig
|
|
}
|
|
|
|
func (l *linuxSetnsInit) getSessionRingName() string {
|
|
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
|
|
}
|
|
|
|
func (l *linuxSetnsInit) Init() error {
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
|
|
if !l.config.Config.NoNewKeyring {
|
|
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
|
return err
|
|
}
|
|
defer label.SetKeyLabel("")
|
|
// Do not inherit the parent's session keyring.
|
|
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
|
// Same justification as in standart_init_linux.go as to why we
|
|
// don't bail on ENOSYS.
|
|
//
|
|
// TODO(cyphar): And we should have logging here too.
|
|
if errors.Cause(err) != unix.ENOSYS {
|
|
return errors.Wrap(err, "join session keyring")
|
|
}
|
|
}
|
|
}
|
|
if l.config.CreateConsole {
|
|
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
|
|
return err
|
|
}
|
|
if err := system.Setctty(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if l.config.NoNewPrivileges {
|
|
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
|
return err
|
|
}
|
|
defer label.SetProcessLabel("")
|
|
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
|
// do this before dropping capabilities; otherwise do it as late as possible
|
|
// just before execve so as few syscalls take place after it as possible.
|
|
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := finalizeNamespace(l.config); err != nil {
|
|
return err
|
|
}
|
|
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
|
return err
|
|
}
|
|
// Set seccomp as close to execve as possible, so as few syscalls take
|
|
// place afterward (reducing the amount of syscalls that users need to
|
|
// enable in their seccomp profiles).
|
|
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
|
return newSystemErrorWithCause(err, "init seccomp")
|
|
}
|
|
}
|
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
|
}
|