mirror of
https://github.com/opencontainers/runc.git
synced 2025-12-24 11:50:58 +08:00
rootfs: switch to fd-based handling of mountpoint targets
An attacker could race with us during mount configuration in order to trick us into mounting over an unexpected path. This would bypass checkProcMount() and would allow for security profiles to be left unapplied by mounting over /proc/self/attr/... (or even more serious outcomes such as killing the entire system by tricking runc into writing strings to /proc/sysrq-trigger). This is a larger issue with our current mount infrastructure, and the ideal solution would be to rewrite it all to be fd-based (which would also allow us to support the "new" mount API, which also avoids a bunch of other issues with mount(8)). However, such a rewrite is not really workable as a security fix, so this patch is a bit of a compromise approach to fix the issue while also moving us a bit towards that eventual end-goal. The core issue in CVE-2025-52881 is that we currently use the (insecure) SecureJoin to re-resolve mountpoint target paths multiple times during mounting. Rather than generating a string from createMountpoint(), we instead open an *os.File handle to the target mountpoint directly and then operate on that handle. This will make it easier to remove utils.WithProcfd() and rework mountViaFds() in the future. The only real issue we need to work around is that we need to re-open the mount target after doing the mount in order to get a handle to the mountpoint -- pathrs.Reopen() doesn't work in this case (it just re-opens the inode under the mountpoint) so we need to do a naive re-open using the full path. Note that if we used move_mount(2) this wouldn't be a problem because we would have a handle to the mountpoint itself. Note that this is still somewhat of a temporary solution -- ideally mountViaFds would use *os.File directly to let us avoid some other issues with using bare /proc/... paths, as well as also letting us more easily use the new mount API on modern kernels. Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 Co-developed-by: lifubang <lifubang@acmcoder.com> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
71
internal/pathrs/root_pathrslite.go
Normal file
71
internal/pathrs/root_pathrslite.go
Normal file
@@ -0,0 +1,71 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Aleksa Sarai <cyphar@cyphar.com>
|
||||
* Copyright (C) 2024-2025 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package pathrs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin/pathrs-lite"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/internal/linux"
|
||||
)
|
||||
|
||||
// OpenInRoot opens the given path inside the root with the provided flags. It
|
||||
// is effectively shorthand for [securejoin.OpenInRoot] followed by
|
||||
// [securejoin.Reopen].
|
||||
func OpenInRoot(root, subpath string, flags int) (*os.File, error) {
|
||||
handle, err := pathrs.OpenInRoot(root, subpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer handle.Close()
|
||||
return pathrs.Reopen(handle, flags)
|
||||
}
|
||||
|
||||
// CreateInRoot creates a new file inside a root (as well as any missing parent
|
||||
// directories) and returns a handle to said file. This effectively has
|
||||
// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL,
|
||||
// include it in the passed flags. The fileMode argument uses unix.* mode bits,
|
||||
// *not* os.FileMode.
|
||||
func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) {
|
||||
dir, filename := filepath.Split(subpath)
|
||||
if filepath.Join("/", filename) == "/" {
|
||||
return nil, fmt.Errorf("create in root subpath %q has bad trailing component %q", subpath, filename)
|
||||
}
|
||||
|
||||
dirFd, err := MkdirAllInRootOpen(root, dir, 0o755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer dirFd.Close()
|
||||
|
||||
// We know that the filename does not have any "/" components, and that
|
||||
// dirFd is inside the root. O_NOFOLLOW will stop us from following
|
||||
// trailing symlinks, so this is safe to do. libpathrs's Root::create_file
|
||||
// works the same way.
|
||||
flags |= unix.O_CREAT | unix.O_NOFOLLOW
|
||||
fd, err := linux.Openat(int(dirFd.Fd()), filename, flags, fileMode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fd), root+"/"+subpath), nil
|
||||
}
|
||||
@@ -582,8 +582,12 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
|
||||
if isOnTmpfs(m.Destination, mounts) {
|
||||
continue
|
||||
}
|
||||
if _, err := createMountpoint(c.config.Rootfs, mountEntry{Mount: m}); err != nil {
|
||||
return fmt.Errorf("create criu restore mountpoint for %s mount: %w", m.Destination, err)
|
||||
me := mountEntry{Mount: m}
|
||||
if err := me.createOpenMountpoint(c.config.Rootfs); err != nil {
|
||||
return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err)
|
||||
}
|
||||
if me.dstFile != nil {
|
||||
defer me.dstFile.Close()
|
||||
}
|
||||
// If the mount point is a bind mount, we need to mount
|
||||
// it now so that runc can create the necessary mount
|
||||
@@ -595,13 +599,20 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
|
||||
// because during initial container creation mounts are
|
||||
// set up in the order they are configured.
|
||||
if m.Device == "bind" {
|
||||
if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(dstFd string) error {
|
||||
if err := utils.WithProcfdFile(me.dstFile, func(dstFd string) error {
|
||||
return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "")
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
umounts = append(umounts, m.Destination)
|
||||
}
|
||||
if me.dstFile != nil {
|
||||
// As this is being done in a loop, the defer earlier will be
|
||||
// delayed until all mountpoints are handled -- for a config with
|
||||
// many mountpoints this could result in a lot of open files. So we
|
||||
// opportunistically close the file as well as deferring it.
|
||||
_ = me.dstFile.Close()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"time"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/cyphar/filepath-securejoin/pathrs-lite/procfs"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/moby/sys/userns"
|
||||
"github.com/mrunalp/fileutils"
|
||||
@@ -46,6 +47,7 @@ type mountConfig struct {
|
||||
type mountEntry struct {
|
||||
*configs.Mount
|
||||
srcFile *mountSource
|
||||
dstFile *os.File
|
||||
}
|
||||
|
||||
// srcName is only meant for error messages, it returns a "friendly" name.
|
||||
@@ -369,7 +371,7 @@ func mountCgroupV1(m mountEntry, c *mountConfig) error {
|
||||
}
|
||||
|
||||
func mountCgroupV2(m mountEntry, c *mountConfig) error {
|
||||
err := utils.WithProcfd(c.root, m.Destination, func(dstFd string) error {
|
||||
err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error {
|
||||
return mountViaFds(m.Source, nil, m.Destination, dstFd, "cgroup2", uintptr(m.Flags), m.Data)
|
||||
})
|
||||
if err == nil || (!errors.Is(err, unix.EPERM) && !errors.Is(err, unix.EBUSY)) {
|
||||
@@ -398,14 +400,14 @@ func mountCgroupV2(m mountEntry, c *mountConfig) error {
|
||||
//
|
||||
// Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
|
||||
// with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
|
||||
err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
err = utils.WithProcfdFile(m.dstFile, func(procfd string) error {
|
||||
return maskPaths([]string{procfd}, c.label)
|
||||
})
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) {
|
||||
func doTmpfsCopyUp(m mountEntry, mountLabel string) (Err error) {
|
||||
// Set up a scratch dir for the tmpfs on the host.
|
||||
tmpdir, err := prepareTmp("/tmp")
|
||||
if err != nil {
|
||||
@@ -418,13 +420,19 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) {
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
// Configure the *host* tmpdir as if it's the container mount. We change
|
||||
// m.Destination since we are going to mount *on the host*.
|
||||
oldDest := m.Destination
|
||||
m.Destination = tmpDir
|
||||
err = mountPropagate(m, "/", mountLabel)
|
||||
m.Destination = oldDest
|
||||
tmpDirFile, err := os.OpenFile(tmpDir, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("tmpcopyup: %w", err)
|
||||
}
|
||||
defer tmpDirFile.Close()
|
||||
|
||||
// Configure the *host* tmpdir as if it's the container mount. We change
|
||||
// m.dstFile since we are going to mount *on the host*.
|
||||
hostMount := mountEntry{
|
||||
Mount: m.Mount,
|
||||
dstFile: tmpDirFile,
|
||||
}
|
||||
if err := hostMount.mountPropagate("/", mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
@@ -435,7 +443,7 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) {
|
||||
}
|
||||
}()
|
||||
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(dstFd string) (Err error) {
|
||||
return utils.WithProcfdFile(m.dstFile, func(dstFd string) (Err error) {
|
||||
// Copy the container data to the host tmpdir. We append "/" to force
|
||||
// CopyDirectory to resolve the symlink rather than trying to copy the
|
||||
// symlink itself.
|
||||
@@ -497,72 +505,76 @@ func statfsToMountFlags(st unix.Statfs_t) int {
|
||||
|
||||
var errRootfsToFile = errors.New("config tries to change rootfs to file")
|
||||
|
||||
func createMountpoint(rootfs string, m mountEntry) (string, error) {
|
||||
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) {
|
||||
unsafePath := utils.StripRoot(rootfs, m.Destination)
|
||||
dstFile, err := pathrs.OpenInRoot(rootfs, unsafePath, unix.O_PATH)
|
||||
defer func() {
|
||||
if dstFile != nil && Err != nil {
|
||||
_ = dstFile.Close()
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := checkProcMount(rootfs, dest, m); err != nil {
|
||||
return "", fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err)
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
fi, _, err := m.srcStat()
|
||||
if err != nil {
|
||||
// Error out if the source of a bind mount does not exist as we
|
||||
// will be unable to bind anything to it.
|
||||
return "", err
|
||||
if !errors.Is(err, unix.ENOENT) {
|
||||
return fmt.Errorf("lookup mountpoint target: %w", err)
|
||||
}
|
||||
// If the original source is not a directory, make the target a file.
|
||||
if !fi.IsDir() {
|
||||
// Make sure we aren't tricked into trying to make the root a file.
|
||||
if rootfs == dest {
|
||||
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
|
||||
}
|
||||
// Make the parent directory.
|
||||
destDir, destBase := filepath.Split(dest)
|
||||
destDirFd, err := pathrs.MkdirAllInRootOpen(rootfs, destDir, 0o755)
|
||||
|
||||
// If the mountpoint doesn't already exist, we want to create a mountpoint
|
||||
// that makes sense for the source. For file bind-mounts this is an empty
|
||||
// file, for everything else it's a directory.
|
||||
dstIsFile := false
|
||||
if m.Device == "bind" {
|
||||
fi, _, err := m.srcStat()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
|
||||
// Error out if the source of a bind mount does not exist as we
|
||||
// will be unable to bind anything to it.
|
||||
return err
|
||||
}
|
||||
defer destDirFd.Close()
|
||||
// Make the target file. We want to avoid opening any file that is
|
||||
// already there because it could be a "bad" file like an invalid
|
||||
// device or hung tty that might cause a DoS, so we use mknodat.
|
||||
// destBase does not contain any "/" components, and mknodat does
|
||||
// not follow trailing symlinks, so we can safely just call mknodat
|
||||
// here.
|
||||
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
|
||||
// If we get EEXIST, there was already an inode there and
|
||||
// we can consider that a success.
|
||||
if !errors.Is(err, unix.EEXIST) {
|
||||
err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
|
||||
return "", fmt.Errorf("create target of file bind-mount: %w", err)
|
||||
}
|
||||
}
|
||||
// Nothing left to do.
|
||||
return dest, nil
|
||||
dstIsFile = !fi.IsDir()
|
||||
}
|
||||
|
||||
case "tmpfs":
|
||||
if dstIsFile {
|
||||
dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644)
|
||||
} else {
|
||||
dstFile, err = pathrs.MkdirAllInRootOpen(rootfs, unsafePath, 0o755)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("make mountpoint %q: %w", m.Destination, err)
|
||||
}
|
||||
}
|
||||
|
||||
if m.Device == "tmpfs" {
|
||||
// If the original target exists, copy the mode for the tmpfs mount.
|
||||
if stat, err := os.Stat(dest); err == nil {
|
||||
dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode()))
|
||||
if m.Data != "" {
|
||||
dt = dt + "," + m.Data
|
||||
}
|
||||
m.Data = dt
|
||||
|
||||
// Nothing left to do.
|
||||
return dest, nil
|
||||
stat, err := dstFile.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("check tmpfs source mode: %w", err)
|
||||
}
|
||||
dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode()))
|
||||
if m.Data != "" {
|
||||
dt = dt + "," + m.Data
|
||||
}
|
||||
m.Data = dt
|
||||
}
|
||||
|
||||
if err := pathrs.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
|
||||
return "", err
|
||||
dstFullPath, err := procfs.ProcSelfFdReadlink(dstFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get mount destination real path: %w", err)
|
||||
}
|
||||
return dest, nil
|
||||
if !pathrs.IsLexicallyInRoot(rootfs, dstFullPath) {
|
||||
return fmt.Errorf("mountpoint %q is outside of rootfs %q", dstFullPath, rootfs)
|
||||
}
|
||||
if relPath, err := filepath.Rel(rootfs, dstFullPath); err != nil {
|
||||
return fmt.Errorf("get relative path of %q: %w", dstFullPath, err)
|
||||
} else if relPath == "." {
|
||||
return fmt.Errorf("mountpoint %q is on the top of rootfs %q", dstFullPath, rootfs)
|
||||
}
|
||||
// TODO: Make checkProcMount use dstFile directly to avoid the need to
|
||||
// operate on paths here.
|
||||
if err := checkProcMount(rootfs, dstFullPath, *m); err != nil {
|
||||
return fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err)
|
||||
}
|
||||
// Update mountEntry.
|
||||
m.dstFile = dstFile
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountToRootfs(c *mountConfig, m mountEntry) error {
|
||||
@@ -592,36 +604,47 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
|
||||
} else if !fi.IsDir() {
|
||||
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
|
||||
}
|
||||
if err := pathrs.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
|
||||
dstFile, err := pathrs.MkdirAllInRootOpen(rootfs, dest, 0o755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Selinux kernels do not support labeling of /proc or /sys.
|
||||
return mountPropagate(m, rootfs, "")
|
||||
defer dstFile.Close()
|
||||
// "proc" and "sys" mounts need special handling (without resolving the
|
||||
// destination) to avoid attacks.
|
||||
m.dstFile = dstFile
|
||||
return m.mountPropagate(rootfs, "")
|
||||
}
|
||||
|
||||
dest, err := createMountpoint(rootfs, m)
|
||||
if err != nil {
|
||||
mountLabel := c.label
|
||||
if err := m.createOpenMountpoint(rootfs); err != nil {
|
||||
return fmt.Errorf("create mountpoint for %s mount: %w", m.Destination, err)
|
||||
}
|
||||
mountLabel := c.label
|
||||
defer func() {
|
||||
if m.dstFile != nil {
|
||||
_ = m.dstFile.Close()
|
||||
m.dstFile = nil
|
||||
}
|
||||
}()
|
||||
|
||||
switch m.Device {
|
||||
case "mqueue":
|
||||
if err := mountPropagate(m, rootfs, ""); err != nil {
|
||||
if err := m.mountPropagate(rootfs, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
return utils.WithProcfdFile(m.dstFile, func(dstFd string) error {
|
||||
return label.SetFileLabel(dstFd, mountLabel)
|
||||
})
|
||||
case "tmpfs":
|
||||
var err error
|
||||
if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||
err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||
err = doTmpfsCopyUp(m, mountLabel)
|
||||
} else {
|
||||
err = mountPropagate(m, rootfs, mountLabel)
|
||||
err = m.mountPropagate(rootfs, mountLabel)
|
||||
}
|
||||
|
||||
return err
|
||||
case "bind":
|
||||
// open_tree()-related shenanigans are all handled in mountViaFds.
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
if err := m.mountPropagate(rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -635,7 +658,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
|
||||
// contrast to mount(8)'s current behaviour, but is what users probably
|
||||
// expect. See <https://github.com/util-linux/util-linux/issues/2433>.
|
||||
if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 {
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error {
|
||||
if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error {
|
||||
flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT
|
||||
// The runtime-spec says we SHOULD map to the relevant mount(8)
|
||||
// behaviour. However, it's not clear whether we want the
|
||||
@@ -736,14 +759,14 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return setRecAttr(m.Mount, rootfs)
|
||||
return setRecAttr(m)
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return mountCgroupV2(m, c)
|
||||
}
|
||||
return mountCgroupV1(m, c)
|
||||
default:
|
||||
return mountPropagate(m, rootfs, mountLabel)
|
||||
return m.mountPropagate(rootfs, mountLabel)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -939,11 +962,9 @@ func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
dstFd, closer := utils.ProcThreadSelfFd(dstFile.Fd())
|
||||
defer closer()
|
||||
|
||||
dstPath := filepath.Join(destDir.Name(), destName)
|
||||
return mountViaFds(node.Path, nil, dstPath, dstFd, "bind", unix.MS_BIND, "")
|
||||
return utils.WithProcfdFile(dstFile, func(dstFd string) error {
|
||||
return mountViaFds(node.Path, nil, dstFile.Name(), dstFd, "bind", unix.MS_BIND, "")
|
||||
})
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
@@ -1350,9 +1371,46 @@ func maskPaths(paths []string, mountLabel string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err error) {
|
||||
fullPath, err := procfs.ProcSelfFdReadlink(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get full path: %w", err)
|
||||
}
|
||||
if !pathrs.IsLexicallyInRoot(rootfs, fullPath) {
|
||||
return nil, fmt.Errorf("mountpoint %q is outside of rootfs %q", fullPath, rootfs)
|
||||
}
|
||||
unsafePath := utils.StripRoot(rootfs, fullPath)
|
||||
reopened, err := pathrs.OpenInRoot(rootfs, unsafePath, flags)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("re-open mountpoint %q: %w", unsafePath, err)
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
_ = reopened.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
// NOTE: The best we can do here is confirm that the new mountpoint handle
|
||||
// matches the original target handle, but an attacker could've swapped a
|
||||
// different path to replace it. In the worst case this could result in us
|
||||
// applying later vfsmount flags onto the wrong mount.
|
||||
//
|
||||
// This is far from ideal, but the only way of doing this in a race-free
|
||||
// way is to switch the new mount API (move_mount(2) does not require this
|
||||
// re-opening step, and thus no such races are possible).
|
||||
reopenedFullPath, err := procfs.ProcSelfFdReadlink(reopened)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("check full path of re-opened mountpoint: %w", err)
|
||||
}
|
||||
if reopenedFullPath != fullPath {
|
||||
return nil, fmt.Errorf("mountpoint %q was moved while re-opening", unsafePath)
|
||||
}
|
||||
return reopened, nil
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags. This will always be scoped inside the container rootfs.
|
||||
func mountPropagate(m mountEntry, rootfs string, mountLabel string) error {
|
||||
func (m *mountEntry) mountPropagate(rootfs string, mountLabel string) error {
|
||||
var (
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
@@ -1365,19 +1423,30 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
}
|
||||
|
||||
// Because the destination is inside a container path which might be
|
||||
// mutating underneath us, we verify that we are actually going to mount
|
||||
// inside the container with WithProcfd() -- mounting through a procfd
|
||||
// mounts on the target.
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error {
|
||||
if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error {
|
||||
return mountViaFds(m.Source, m.srcFile, m.Destination, dstFd, m.Device, uintptr(flags), data)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// We need to re-open the mountpoint after doing the mount, in order for us
|
||||
// to operate on the new mount we just created. However, we cannot use
|
||||
// pathrs.Reopen because we need to re-resolve from the parent directory to
|
||||
// get a new handle to the top mount.
|
||||
//
|
||||
// TODO: Use move_mount(2) on newer kernels so that this is no longer
|
||||
// necessary on modern systems.
|
||||
newDstFile, err := reopenAfterMount(rootfs, m.dstFile, unix.O_PATH)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reopen mountpoint after mount: %w", err)
|
||||
}
|
||||
_ = m.dstFile.Close()
|
||||
m.dstFile = newDstFile
|
||||
|
||||
// We have to apply mount propagation flags in a separate WithProcfd() call
|
||||
// because the previous call invalidates the passed procfd -- the mount
|
||||
// target needs to be re-opened.
|
||||
if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error {
|
||||
if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error {
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := mountViaFds("", nil, m.Destination, dstFd, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
@@ -1390,11 +1459,11 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setRecAttr(m *configs.Mount, rootfs string) error {
|
||||
func setRecAttr(m mountEntry) error {
|
||||
if m.RecAttr == nil {
|
||||
return nil
|
||||
}
|
||||
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||
return utils.WithProcfdFile(m.dstFile, func(procfd string) error {
|
||||
return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -66,11 +66,11 @@ func CleanPath(path string) string {
|
||||
return path
|
||||
}
|
||||
|
||||
// stripRoot returns the passed path, stripping the root path if it was
|
||||
// StripRoot returns the passed path, stripping the root path if it was
|
||||
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
// as absolute, and the returned path will also always be absolute. In
|
||||
// addition, the paths are cleaned before stripping the root.
|
||||
func stripRoot(root, path string) string {
|
||||
func StripRoot(root, path string) string {
|
||||
// Make the paths clean and absolute.
|
||||
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
switch {
|
||||
|
||||
@@ -131,9 +131,9 @@ func TestStripRoot(t *testing.T) {
|
||||
{"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
|
||||
{"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
|
||||
} {
|
||||
got := stripRoot(test.root, test.path)
|
||||
got := StripRoot(test.root, test.path)
|
||||
if got != test.out {
|
||||
t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
||||
t.Errorf("StripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,8 +153,8 @@ func NewSockPair(name string) (parent, child *os.File, err error) {
|
||||
// the passed closure (the file handle will be freed once the closure returns).
|
||||
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Remove the root then forcefully resolve inside the root.
|
||||
unsafePath = stripRoot(root, unsafePath)
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
unsafePath = StripRoot(root, unsafePath)
|
||||
fullPath, err := securejoin.SecureJoin(root, unsafePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
||||
}
|
||||
@@ -163,7 +163,7 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
defer closer()
|
||||
|
||||
// Open the target path.
|
||||
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
fh, err := os.OpenFile(fullPath, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open o_path procfd: %w", err)
|
||||
}
|
||||
@@ -173,13 +173,24 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Double-check the path is the one we expected.
|
||||
if realpath, err := os.Readlink(procfd); err != nil {
|
||||
return fmt.Errorf("procfd verification failed: %w", err)
|
||||
} else if realpath != path {
|
||||
} else if realpath != fullPath {
|
||||
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
}
|
||||
|
||||
return fn(procfd)
|
||||
}
|
||||
|
||||
// WithProcfdFile is a very minimal wrapper around [ProcThreadSelfFd], intended
|
||||
// to make migrating from [WithProcfd] and [WithProcfdPath] usage easier. The
|
||||
// caller is responsible for making sure that the provided file handle is
|
||||
// actually safe to operate on.
|
||||
func WithProcfdFile(file *os.File, fn func(procfd string) error) error {
|
||||
fdpath, closer := ProcThreadSelfFd(file.Fd())
|
||||
defer closer()
|
||||
|
||||
return fn(fdpath)
|
||||
}
|
||||
|
||||
type ProcThreadSelfCloser func()
|
||||
|
||||
var (
|
||||
|
||||
Reference in New Issue
Block a user