mirror of
				https://github.com/opencontainers/runc.git
				synced 2025-10-31 02:56:25 +08:00 
			
		
		
		
	 5586d7caa1
			
		
	
	5586d7caa1
	
	
	
		
			
			This was added by commit f2f16213e when runc-dmz was still a thing.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
		
	
		
			
				
	
	
		
			158 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package libcontainer
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 
 | |
| 	"github.com/opencontainers/selinux/go-selinux"
 | |
| 	"github.com/sirupsen/logrus"
 | |
| 	"golang.org/x/sys/unix"
 | |
| 
 | |
| 	"github.com/opencontainers/runc/libcontainer/apparmor"
 | |
| 	"github.com/opencontainers/runc/libcontainer/keys"
 | |
| 	"github.com/opencontainers/runc/libcontainer/seccomp"
 | |
| 	"github.com/opencontainers/runc/libcontainer/system"
 | |
| 	"github.com/opencontainers/runc/libcontainer/utils"
 | |
| )
 | |
| 
 | |
| // linuxSetnsInit performs the container's initialization for running a new process
 | |
| // inside an existing container.
 | |
| type linuxSetnsInit struct {
 | |
| 	pipe          *syncSocket
 | |
| 	consoleSocket *os.File
 | |
| 	pidfdSocket   *os.File
 | |
| 	config        *initConfig
 | |
| 	logPipe       *os.File
 | |
| }
 | |
| 
 | |
| func (l *linuxSetnsInit) getSessionRingName() string {
 | |
| 	return "_ses." + l.config.ContainerID
 | |
| }
 | |
| 
 | |
| func (l *linuxSetnsInit) Init() error {
 | |
| 	if !l.config.Config.NoNewKeyring {
 | |
| 		if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		defer selinux.SetKeyLabel("") //nolint: errcheck
 | |
| 		// Do not inherit the parent's session keyring.
 | |
| 		if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
 | |
| 			// Same justification as in standart_init_linux.go as to why we
 | |
| 			// don't bail on ENOSYS.
 | |
| 			//
 | |
| 			// TODO(cyphar): And we should have logging here too.
 | |
| 			if !errors.Is(err, unix.ENOSYS) {
 | |
| 				return fmt.Errorf("unable to join session keyring: %w", err)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if l.config.CreateConsole {
 | |
| 		if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if err := system.Setctty(); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	if l.pidfdSocket != nil {
 | |
| 		if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
 | |
| 			return fmt.Errorf("failed to setup pidfd: %w", err)
 | |
| 		}
 | |
| 	}
 | |
| 	if l.config.NoNewPrivileges {
 | |
| 		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	if l.config.Config.Umask != nil {
 | |
| 		unix.Umask(int(*l.config.Config.Umask))
 | |
| 	}
 | |
| 
 | |
| 	if l.config.Config.Scheduler != nil {
 | |
| 		if err := setupScheduler(l.config.Config); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Tell our parent that we're ready to exec. This must be done before the
 | |
| 	// Seccomp rules have been applied, because we need to be able to read and
 | |
| 	// write to a socket.
 | |
| 	if err := syncParentReady(l.pipe); err != nil {
 | |
| 		return fmt.Errorf("sync ready: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	defer selinux.SetExecLabel("") //nolint: errcheck
 | |
| 	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
 | |
| 	// do this before dropping capabilities; otherwise do it as late as possible
 | |
| 	// just before execve so as few syscalls take place after it as possible.
 | |
| 	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
 | |
| 		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	if err := finalizeNamespace(l.config); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if l.config.Config.Personality != nil {
 | |
| 		if err := setupPersonality(l.config.Config); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	// Check for the arg early to make sure it exists.
 | |
| 	name, err := exec.LookPath(l.config.Args[0])
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	// Set seccomp as close to execve as possible, so as few syscalls take
 | |
| 	// place afterward (reducing the amount of syscalls that users need to
 | |
| 	// enable in their seccomp profiles).
 | |
| 	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
 | |
| 		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("unable to init seccomp: %w", err)
 | |
| 		}
 | |
| 		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Close the pipe to signal that we have completed our init.
 | |
| 	// Please keep this because we don't want to get a pipe write error if
 | |
| 	// there is an error from `execve` after all fds closed.
 | |
| 	_ = l.pipe.Close()
 | |
| 
 | |
| 	// Close the log pipe fd so the parent's ForwardLogs can exit.
 | |
| 	logrus.Debugf("setns_init: about to exec")
 | |
| 	if err := l.logPipe.Close(); err != nil {
 | |
| 		return fmt.Errorf("close log pipe: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	// Close all file descriptors we are not passing to the container. This is
 | |
| 	// necessary because the execve target could use internal runc fds as the
 | |
| 	// execve path, potentially giving access to binary files from the host
 | |
| 	// (which can then be opened by container processes, leading to container
 | |
| 	// escapes). Note that because this operation will close any open file
 | |
| 	// descriptors that are referenced by (*os.File) handles from underneath
 | |
| 	// the Go runtime, we must not do any file operations after this point
 | |
| 	// (otherwise the (*os.File) finaliser could close the wrong file). See
 | |
| 	// CVE-2024-21626 for more information as to why this protection is
 | |
| 	// necessary.
 | |
| 	if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	return system.Exec(name, l.config.Args, os.Environ())
 | |
| }
 |