mirror of
https://github.com/opencontainers/runc.git
synced 2025-12-24 11:50:58 +08:00
While we use SecureJoin to try to make all of our target paths inside the container safe, SecureJoin is not safe against an attacker than can change the path after we "resolve" it. os.MkdirAll can inadvertently follow symlinks and thus an attacker could end up tricking runc into creating empty directories on the host (note that the container doesn't get access to these directories, and the host just sees empty directories). However, this could potentially cause DoS issues by (for instance) creating a directory in a conf.d directory for a daemon that doesn't handle subdirectories properly. In addition, the handling for creating file bind-mounts did a plain open(O_CREAT) on the SecureJoin'd path, which is even more obviously unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an attacker to cause data loss...). Regardless of the symlink issue, opening an untrusted file could result in a DoS if the file is a hung tty or some other "nasty" file. We can use mknodat to safely create a regular file without opening anything anyway (O_CREAT|O_EXCL would also work but it makes the logic a bit more complicated, and we don't want to open the file for any particular reason anyway). libpathrs[1] is the long-term solution for these kinds of problems, but for now we can patch this particular issue by creating a more restricted MkdirAll that refuses to resolve symlinks and does the creation using file descriptors. This is loosely based on a more secure version that filepath-securejoin now has[2] and will be added to libpathrs soon[3]. [1]: https://github.com/openSUSE/libpathrs [2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0 [3]: https://github.com/openSUSE/libpathrs/issues/10 Fixes: CVE-2024-45310 Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
258 lines
6.7 KiB
Go
258 lines
6.7 KiB
Go
//go:build linux
|
|
|
|
package system
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"unsafe"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type ParentDeathSignal int
|
|
|
|
func (p ParentDeathSignal) Restore() error {
|
|
if p == 0 {
|
|
return nil
|
|
}
|
|
current, err := GetParentDeathSignal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if p == current {
|
|
return nil
|
|
}
|
|
return p.Set()
|
|
}
|
|
|
|
func (p ParentDeathSignal) Set() error {
|
|
return SetParentDeathSignal(uintptr(p))
|
|
}
|
|
|
|
func Exec(cmd string, args []string, env []string) error {
|
|
for {
|
|
err := unix.Exec(cmd, args, env)
|
|
if err != unix.EINTR {
|
|
return &os.PathError{Op: "exec", Path: cmd, Err: err}
|
|
}
|
|
}
|
|
}
|
|
|
|
func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
|
|
pathnamep, err := syscall.BytePtrFromString(pathname)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
argvp, err := syscall.SlicePtrFromStrings(args)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
envp, err := syscall.SlicePtrFromStrings(env)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, _, errno := syscall.Syscall6(
|
|
unix.SYS_EXECVEAT,
|
|
fd,
|
|
uintptr(unsafe.Pointer(pathnamep)),
|
|
uintptr(unsafe.Pointer(&argvp[0])),
|
|
uintptr(unsafe.Pointer(&envp[0])),
|
|
uintptr(flags),
|
|
0,
|
|
)
|
|
return errno
|
|
}
|
|
|
|
func Fexecve(fd uintptr, args []string, env []string) error {
|
|
var err error
|
|
for {
|
|
err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
|
|
if err != unix.EINTR { // nolint:errorlint // unix errors are bare
|
|
break
|
|
}
|
|
}
|
|
if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare
|
|
// Fallback to classic /proc/self/fd/... exec.
|
|
return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env)
|
|
}
|
|
return os.NewSyscallError("execveat", err)
|
|
}
|
|
|
|
func SetParentDeathSignal(sig uintptr) error {
|
|
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
|
var sig int
|
|
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
|
|
return -1, err
|
|
}
|
|
return ParentDeathSignal(sig), nil
|
|
}
|
|
|
|
func SetKeepCaps() error {
|
|
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func ClearKeepCaps() error {
|
|
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func Setctty() error {
|
|
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
|
func SetSubreaper(i int) error {
|
|
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
|
}
|
|
|
|
// GetSubreaper returns the subreaper setting for the calling process
|
|
func GetSubreaper() (int, error) {
|
|
var i uintptr
|
|
|
|
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
return int(i), nil
|
|
}
|
|
|
|
func ExecutableMemfd(comment string, flags int) (*os.File, error) {
|
|
// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
|
|
// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
|
|
// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
|
|
// The original vm.memfd_noexec=2 implementation incorrectly silently
|
|
// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
|
|
// kernels, we will get -EACCES if we try to use MFD_EXEC with
|
|
// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
|
|
//
|
|
// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
|
|
// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
|
|
// kernels where -EINVAL is actually a security denial.
|
|
memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
|
|
if err == unix.EINVAL {
|
|
memfd, err = unix.MemfdCreate(comment, flags)
|
|
}
|
|
if err != nil {
|
|
if err == unix.EACCES {
|
|
logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
|
|
}
|
|
err := os.NewSyscallError("memfd_create", err)
|
|
return nil, fmt.Errorf("failed to create executable memfd: %w", err)
|
|
}
|
|
return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
|
|
}
|
|
|
|
// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
|
|
// both (*os.File) as an optimisation to make copies faster.
|
|
func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
|
|
dstFile, _ := dst.(*os.File)
|
|
srcFile, _ := src.(*os.File)
|
|
|
|
if dstFile != nil && srcFile != nil {
|
|
fi, err := srcFile.Stat()
|
|
if err != nil {
|
|
goto fallback
|
|
}
|
|
size := fi.Size()
|
|
for size > 0 {
|
|
n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
|
|
if n > 0 {
|
|
size -= int64(n)
|
|
copied += int64(n)
|
|
}
|
|
if err == unix.EINTR {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
if copied == 0 {
|
|
// If we haven't copied anything so far, we can safely just
|
|
// fallback to io.Copy. We could always do the fallback but
|
|
// it's safer to error out in the case of a partial copy
|
|
// followed by an error (which should never happen).
|
|
goto fallback
|
|
}
|
|
return copied, fmt.Errorf("partial sendfile copy: %w", err)
|
|
}
|
|
}
|
|
return copied, nil
|
|
}
|
|
|
|
fallback:
|
|
return io.Copy(dst, src)
|
|
}
|
|
|
|
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
|
|
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
|
|
func SetLinuxPersonality(personality int) error {
|
|
_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
|
|
if errno != 0 {
|
|
return &os.SyscallError{Syscall: "set_personality", Err: errno}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func prepareAt(dir *os.File, path string) (int, string) {
|
|
if dir == nil {
|
|
return unix.AT_FDCWD, path
|
|
}
|
|
|
|
// Rather than just filepath.Join-ing path here, do it manually so the
|
|
// error and handle correctly indicate cases like path=".." as being
|
|
// relative to the correct directory. The handle.Name() might end up being
|
|
// wrong but because this is (currently) only used in MkdirAllInRoot, that
|
|
// isn't a problem.
|
|
dirName := dir.Name()
|
|
if !strings.HasSuffix(dirName, "/") {
|
|
dirName += "/"
|
|
}
|
|
fullPath := dirName + path
|
|
|
|
return int(dir.Fd()), fullPath
|
|
}
|
|
|
|
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
dirFd, fullPath := prepareAt(dir, path)
|
|
fd, err := unix.Openat(dirFd, path, flags, mode)
|
|
if err != nil {
|
|
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
|
|
}
|
|
runtime.KeepAlive(dir)
|
|
return os.NewFile(uintptr(fd), fullPath), nil
|
|
}
|
|
|
|
func Mkdirat(dir *os.File, path string, mode uint32) error {
|
|
dirFd, fullPath := prepareAt(dir, path)
|
|
err := unix.Mkdirat(dirFd, path, mode)
|
|
if err != nil {
|
|
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
|
|
}
|
|
runtime.KeepAlive(dir)
|
|
return err
|
|
}
|