mirror of
https://github.com/opencontainers/runc.git
synced 2025-10-08 17:00:13 +08:00
rootfs: add mount destination validation
Because the target of a mount is inside a container (which may be a volume that is shared with another container), there exists a race condition where the target of the mount may change to a path containing a symlink after we have sanitised the path -- resulting in us inadvertently mounting the path outside of the container. This is not immediately useful because we are in a mount namespace with MS_SLAVE mount propagation applied to "/", so we cannot mount on top of host paths in the host namespace. However, if any subsequent mountpoints in the configuration use a subdirectory of that host path as a source, those subsequent mounts will use an attacker-controlled source path (resolved within the host rootfs) -- allowing the bind-mounting of "/" into the container. While arguably configuration issues like this are not entirely within runc's threat model, within the context of Kubernetes (and possibly other container managers that provide semi-arbitrary container creation privileges to untrusted users) this is a legitimate issue. Since we cannot block mounting from the host into the container, we need to block the first stage of this attack (mounting onto a path outside the container). The long-term plan to solve this would be to migrate to libpathrs, but as a stop-gap we implement libpathrs-like path verification through readlink(/proc/self/fd/$n) and then do mount operations through the procfd once it's been verified to be inside the container. The target could move after we've checked it, but if it is inside the container then we can assume that it is safe for the same reason that libpathrs operations would be safe. A slight wrinkle is the "copyup" functionality we provide for tmpfs, which is the only case where we want to do a mount on the host filesystem. To facilitate this, I split out the copy-up functionality entirely so that the logic isn't interspersed with the regular tmpfs logic. In addition, all dependencies on m.Destination being overwritten have been removed since that pattern was just begging to be a source of more mount-target bugs (we do still have to modify m.Destination for tmpfs-copyup but we only do it temporarily). Fixes: CVE-2021-30465 Reported-by: Etienne Champetier <champetier.etienne@gmail.com> Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com> Reviewed-by: Samuel Karp <skarp@amazon.com> Reviewed-by: Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin) Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
This commit is contained in:
@@ -1217,7 +1217,6 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
|||||||
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
|
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
m.Destination = dest
|
|
||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1257,13 +1256,16 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
|||||||
umounts := []string{}
|
umounts := []string{}
|
||||||
defer func() {
|
defer func() {
|
||||||
for _, u := range umounts {
|
for _, u := range umounts {
|
||||||
if e := unix.Unmount(u, unix.MNT_DETACH); e != nil {
|
_ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error {
|
||||||
if e != unix.EINVAL {
|
if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil {
|
||||||
// Ignore EINVAL as it means 'target is not a mount point.'
|
if e != unix.EINVAL {
|
||||||
// It probably has already been unmounted.
|
// Ignore EINVAL as it means 'target is not a mount point.'
|
||||||
logrus.Warnf("Error during cleanup unmounting of %q (%v)", u, e)
|
// It probably has already been unmounted.
|
||||||
|
logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
return nil
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
for _, m := range mounts {
|
for _, m := range mounts {
|
||||||
@@ -1281,8 +1283,13 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
|||||||
// because during initial container creation mounts are
|
// because during initial container creation mounts are
|
||||||
// set up in the order they are configured.
|
// set up in the order they are configured.
|
||||||
if m.Device == "bind" {
|
if m.Device == "bind" {
|
||||||
if err := unix.Mount(m.Source, m.Destination, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error {
|
||||||
return errorsf.Wrapf(err, "unable to bind mount %q to %q", m.Source, m.Destination)
|
if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||||
|
return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
umounts = append(umounts, m.Destination)
|
umounts = append(umounts, m.Destination)
|
||||||
}
|
}
|
||||||
|
@@ -25,6 +25,7 @@ import (
|
|||||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/opencontainers/selinux/go-selinux/label"
|
"github.com/opencontainers/selinux/go-selinux/label"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -228,8 +229,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
|||||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// update the mount with the correct dest after symlinks are resolved.
|
|
||||||
m.Destination = dest
|
|
||||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -266,18 +265,21 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
|
|||||||
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
flags := defaultMountFlags
|
if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
|
||||||
if m.Flags&unix.MS_RDONLY != 0 {
|
flags := defaultMountFlags
|
||||||
flags = flags | unix.MS_RDONLY
|
if m.Flags&unix.MS_RDONLY != 0 {
|
||||||
}
|
flags = flags | unix.MS_RDONLY
|
||||||
cgroupmount := &configs.Mount{
|
}
|
||||||
Source: "cgroup",
|
var (
|
||||||
Device: "cgroup", // this is actually fstype
|
source = "cgroup"
|
||||||
Destination: subsystemPath,
|
data = filepath.Base(subsystemPath)
|
||||||
Flags: flags,
|
)
|
||||||
Data: filepath.Base(subsystemPath),
|
if data == "systemd" {
|
||||||
}
|
data = cgroups.CgroupNamePrefix + data
|
||||||
if err := mountNewCgroup(cgroupmount); err != nil {
|
source = "systemd"
|
||||||
|
}
|
||||||
|
return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
||||||
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -307,33 +309,79 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
|||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||||
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||||
if err == unix.EPERM || err == unix.EBUSY {
|
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||||
src := fs2.UnifiedMountpoint
|
if err == unix.EPERM || err == unix.EBUSY {
|
||||||
if c.cgroupns && c.cgroup2Path != "" {
|
src := fs2.UnifiedMountpoint
|
||||||
// Emulate cgroupns by bind-mounting
|
if c.cgroupns && c.cgroup2Path != "" {
|
||||||
// the container cgroup path rather than
|
// Emulate cgroupns by bind-mounting
|
||||||
// the whole /sys/fs/cgroup.
|
// the container cgroup path rather than
|
||||||
src = c.cgroup2Path
|
// the whole /sys/fs/cgroup.
|
||||||
}
|
src = c.cgroup2Path
|
||||||
err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
}
|
||||||
if err == unix.ENOENT && c.rootlessCgroups {
|
err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||||
err = nil
|
if err == unix.ENOENT && c.rootlessCgroups {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||||
|
// Set up a scratch dir for the tmpfs on the host.
|
||||||
|
tmpdir, err := prepareTmp("/tmp")
|
||||||
|
if err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||||
|
}
|
||||||
|
defer cleanupTmp(tmpdir)
|
||||||
|
tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
||||||
|
if err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
|
// Configure the *host* tmpdir as if it's the container mount. We change
|
||||||
|
// m.Destination since we are going to mount *on the host*.
|
||||||
|
oldDest := m.Destination
|
||||||
|
m.Destination = tmpDir
|
||||||
|
err = mountPropagate(m, "/", mountLabel)
|
||||||
|
m.Destination = oldDest
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
defer func() {
|
||||||
|
if Err != nil {
|
||||||
|
if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||||
|
logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
|
||||||
|
// Copy the container data to the host tmpdir. We append "/" to force
|
||||||
|
// CopyDirectory to resolve the symlink rather than trying to copy the
|
||||||
|
// symlink itself.
|
||||||
|
if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
|
||||||
|
return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
|
||||||
|
}
|
||||||
|
// Now move the mount into the container.
|
||||||
|
if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||||
|
return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||||
rootfs := c.root
|
rootfs := c.root
|
||||||
mountLabel := c.label
|
mountLabel := c.label
|
||||||
dest := m.Destination
|
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||||
if !strings.HasPrefix(dest, rootfs) {
|
if err != nil {
|
||||||
dest = filepath.Join(rootfs, dest)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch m.Device {
|
switch m.Device {
|
||||||
@@ -364,53 +412,21 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
|||||||
}
|
}
|
||||||
return label.SetFileLabel(dest, mountLabel)
|
return label.SetFileLabel(dest, mountLabel)
|
||||||
case "tmpfs":
|
case "tmpfs":
|
||||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
|
||||||
tmpDir := ""
|
|
||||||
// dest might be an absolute symlink, so it needs
|
|
||||||
// to be resolved under rootfs.
|
|
||||||
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Destination = dest
|
|
||||||
stat, err := os.Stat(dest)
|
stat, err := os.Stat(dest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if copyUp {
|
|
||||||
tmpdir, err := prepareTmp("/tmp")
|
if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||||
if err != nil {
|
err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
} else {
|
||||||
}
|
err = mountPropagate(m, rootfs, mountLabel)
|
||||||
defer cleanupTmp(tmpdir)
|
|
||||||
tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
|
||||||
if err != nil {
|
|
||||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
|
||||||
}
|
|
||||||
defer os.RemoveAll(tmpDir)
|
|
||||||
m.Destination = tmpDir
|
|
||||||
}
|
}
|
||||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if copyUp {
|
|
||||||
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
|
||||||
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
|
||||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
|
||||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
|
||||||
}
|
|
||||||
return errMsg
|
|
||||||
}
|
|
||||||
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
|
||||||
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
|
||||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
|
||||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
|
||||||
}
|
|
||||||
return errMsg
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if stat != nil {
|
if stat != nil {
|
||||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -454,19 +470,9 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
|||||||
}
|
}
|
||||||
return mountCgroupV1(m, c)
|
return mountCgroupV1(m, c)
|
||||||
default:
|
default:
|
||||||
// ensure that the destination of the mount is resolved of symlinks at mount time because
|
|
||||||
// any previous mounts can invalidate the next mount's destination.
|
|
||||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
|
||||||
// evil stuff to try to escape the container's rootfs.
|
|
||||||
var err error
|
|
||||||
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// update the mount with the correct dest after symlinks are resolved.
|
|
||||||
m.Destination = dest
|
|
||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -649,7 +655,7 @@ func createDevices(config *configs.Config) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func bindMountDeviceNode(dest string, node *devices.Device) error {
|
func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
|
||||||
f, err := os.Create(dest)
|
f, err := os.Create(dest)
|
||||||
if err != nil && !os.IsExist(err) {
|
if err != nil && !os.IsExist(err) {
|
||||||
return err
|
return err
|
||||||
@@ -657,7 +663,9 @@ func bindMountDeviceNode(dest string, node *devices.Device) error {
|
|||||||
if f != nil {
|
if f != nil {
|
||||||
f.Close()
|
f.Close()
|
||||||
}
|
}
|
||||||
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
||||||
|
return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates the device node in the rootfs of the container.
|
// Creates the device node in the rootfs of the container.
|
||||||
@@ -666,18 +674,21 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
|
|||||||
// The node only exists for cgroup reasons, ignore it here.
|
// The node only exists for cgroup reasons, ignore it here.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
dest := filepath.Join(rootfs, node.Path)
|
dest, err := securejoin.SecureJoin(rootfs, node.Path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if bind {
|
if bind {
|
||||||
return bindMountDeviceNode(dest, node)
|
return bindMountDeviceNode(rootfs, dest, node)
|
||||||
}
|
}
|
||||||
if err := mknodDevice(dest, node); err != nil {
|
if err := mknodDevice(dest, node); err != nil {
|
||||||
if os.IsExist(err) {
|
if os.IsExist(err) {
|
||||||
return nil
|
return nil
|
||||||
} else if os.IsPermission(err) {
|
} else if os.IsPermission(err) {
|
||||||
return bindMountDeviceNode(dest, node)
|
return bindMountDeviceNode(rootfs, dest, node)
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1024,61 +1035,47 @@ func writeSystemProperty(key, value string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func remount(m *configs.Mount, rootfs string) error {
|
func remount(m *configs.Mount, rootfs string) error {
|
||||||
var (
|
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
dest = m.Destination
|
return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||||
)
|
})
|
||||||
if !strings.HasPrefix(dest, rootfs) {
|
|
||||||
dest = filepath.Join(rootfs, dest)
|
|
||||||
}
|
|
||||||
return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do the mount operation followed by additional mounts required to take care
|
// Do the mount operation followed by additional mounts required to take care
|
||||||
// of propagation flags.
|
// of propagation flags. This will always be scoped inside the container rootfs.
|
||||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||||
var (
|
var (
|
||||||
dest = m.Destination
|
|
||||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||||
flags = m.Flags
|
flags = m.Flags
|
||||||
)
|
)
|
||||||
if libcontainerUtils.CleanPath(dest) == "/dev" {
|
// Delay mounting the filesystem read-only if we need to do further
|
||||||
|
// operations on it. We need to set up files in "/dev" and tmpfs mounts may
|
||||||
|
// need to be chmod-ed after mounting. The mount will be remounted ro later
|
||||||
|
// in finalizeRootfs() if necessary.
|
||||||
|
if libcontainerUtils.CleanPath(m.Destination) == "/dev" || m.Device == "tmpfs" {
|
||||||
flags &= ^unix.MS_RDONLY
|
flags &= ^unix.MS_RDONLY
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mount it rw to allow chmod operation. A remount will be performed
|
// Because the destination is inside a container path which might be
|
||||||
// later to make it ro if set.
|
// mutating underneath us, we verify that we are actually going to mount
|
||||||
if m.Device == "tmpfs" {
|
// inside the container with WithProcfd() -- mounting through a procfd
|
||||||
flags &= ^unix.MS_RDONLY
|
// mounts on the target.
|
||||||
|
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
|
return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("mount through procfd: %w", err)
|
||||||
}
|
}
|
||||||
|
// We have to apply mount propagation flags in a separate WithProcfd() call
|
||||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
// because the previous call invalidates the passed procfd -- the mount
|
||||||
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
// target needs to be re-opened.
|
||||||
dest = filepath.Join(rootfs, dest)
|
if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
}
|
for _, pflag := range m.PropagationFlags {
|
||||||
|
if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
||||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
return err
|
||||||
return err
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for _, pflag := range m.PropagationFlags {
|
|
||||||
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
}
|
return nil
|
||||||
return nil
|
}); err != nil {
|
||||||
}
|
return fmt.Errorf("change mount propagation through procfd: %w", err)
|
||||||
|
|
||||||
func mountNewCgroup(m *configs.Mount) error {
|
|
||||||
var (
|
|
||||||
data = m.Data
|
|
||||||
source = m.Source
|
|
||||||
)
|
|
||||||
if data == "systemd" {
|
|
||||||
data = cgroups.CgroupNamePrefix + data
|
|
||||||
source = "systemd"
|
|
||||||
}
|
|
||||||
if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@@ -3,12 +3,15 @@ package utils
|
|||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/cyphar/filepath-securejoin"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -88,6 +91,57 @@ func CleanPath(path string) string {
|
|||||||
return filepath.Clean(path)
|
return filepath.Clean(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stripRoot returns the passed path, stripping the root path if it was
|
||||||
|
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||||
|
// as absolute, and the returned path will also always be absolute. In
|
||||||
|
// addition, the paths are cleaned before stripping the root.
|
||||||
|
func stripRoot(root, path string) string {
|
||||||
|
// Make the paths clean and absolute.
|
||||||
|
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||||
|
switch {
|
||||||
|
case path == root:
|
||||||
|
path = "/"
|
||||||
|
case root == "/":
|
||||||
|
// do nothing
|
||||||
|
case strings.HasPrefix(path, root+"/"):
|
||||||
|
path = strings.TrimPrefix(path, root+"/")
|
||||||
|
}
|
||||||
|
return CleanPath("/" + path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||||
|
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||||
|
// fd, this path is verified to have been inside the root -- so operating on it
|
||||||
|
// through the passed fdpath should be safe. Do not access this path through
|
||||||
|
// the original path strings, and do not attempt to use the pathname outside of
|
||||||
|
// the passed closure (the file handle will be freed once the closure returns).
|
||||||
|
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||||
|
// Remove the root then forcefully resolve inside the root.
|
||||||
|
unsafePath = stripRoot(root, unsafePath)
|
||||||
|
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("resolving path inside rootfs failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the target path.
|
||||||
|
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open o_path procfd: %w", err)
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
// Double-check the path is the one we expected.
|
||||||
|
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||||
|
if realpath, err := os.Readlink(procfd); err != nil {
|
||||||
|
return fmt.Errorf("procfd verification failed: %w", err)
|
||||||
|
} else if realpath != path {
|
||||||
|
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the closure.
|
||||||
|
return fn(procfd)
|
||||||
|
}
|
||||||
|
|
||||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||||
// returns the corresponding value. The pairs must be separated with '='.
|
// returns the corresponding value. The pairs must be separated with '='.
|
||||||
func SearchLabels(labels []string, query string) string {
|
func SearchLabels(labels []string, query string) string {
|
||||||
|
@@ -143,3 +143,38 @@ func TestCleanPath(t *testing.T) {
|
|||||||
t.Errorf("expected to receive '/foo' and received %s", path)
|
t.Errorf("expected to receive '/foo' and received %s", path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStripRoot(t *testing.T) {
|
||||||
|
for _, test := range []struct {
|
||||||
|
root, path, out string
|
||||||
|
}{
|
||||||
|
// Works with multiple components.
|
||||||
|
{"/a/b", "/a/b/c", "/c"},
|
||||||
|
{"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"},
|
||||||
|
// '/' must be a no-op.
|
||||||
|
{"/", "/a/b/c", "/a/b/c"},
|
||||||
|
// Must be the correct order.
|
||||||
|
{"/a/b", "/a/c/b", "/a/c/b"},
|
||||||
|
// Must be at start.
|
||||||
|
{"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"},
|
||||||
|
// Must be a lexical parent.
|
||||||
|
{"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"},
|
||||||
|
// Must only strip the root once.
|
||||||
|
{"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"},
|
||||||
|
// Deal with .. in a fairly sane way.
|
||||||
|
{"/foo/bar", "/foo/bar/../baz", "/foo/baz"},
|
||||||
|
{"/foo/bar", "../../../../../../foo/bar/baz", "/baz"},
|
||||||
|
{"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"},
|
||||||
|
{"/foo/bar/../baz", "/foo/baz/bar", "/bar"},
|
||||||
|
{"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"},
|
||||||
|
// All paths are made absolute before stripping.
|
||||||
|
{"foo/bar", "/foo/bar/baz/bee", "/baz/bee"},
|
||||||
|
{"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
|
||||||
|
{"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
|
||||||
|
} {
|
||||||
|
got := stripRoot(test.root, test.path)
|
||||||
|
if got != test.out {
|
||||||
|
t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user