mirror of
https://github.com/opencontainers/runc.git
synced 2025-09-27 03:46:19 +08:00

When reading mount errors, it is quite hard to make sense of mount flags in their hex form. As this is the error path, the minor performance impact of constructing a string is probably not worth hyper-optimising. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
340 lines
10 KiB
Go
340 lines
10 KiB
Go
package libcontainer
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/internal/userns"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
)
|
|
|
|
// mountSourceType indicates what type of file descriptor is being returned. It
|
|
// is used to tell rootfs_linux.go whether or not to use move_mount(2) to
|
|
// install the mount.
|
|
type mountSourceType string
|
|
|
|
const (
|
|
// An open_tree(2)-style file descriptor that needs to be installed using
|
|
// move_mount(2) to install.
|
|
mountSourceOpenTree mountSourceType = "open_tree"
|
|
// A plain file descriptor that can be mounted through /proc/thread-self/fd.
|
|
mountSourcePlain mountSourceType = "plain-open"
|
|
)
|
|
|
|
type mountSource struct {
|
|
Type mountSourceType `json:"type"`
|
|
file *os.File `json:"-"`
|
|
}
|
|
|
|
// mountError holds an error from a failed mount or unmount operation.
|
|
type mountError struct {
|
|
op string
|
|
source string
|
|
srcFile *mountSource
|
|
target string
|
|
dstFd string
|
|
flags uintptr
|
|
data string
|
|
err error
|
|
}
|
|
|
|
// int32plus is a collection of int types with >=32 bits.
|
|
type int32plus interface {
|
|
int | uint | int32 | uint32 | int64 | uint64 | uintptr
|
|
}
|
|
|
|
// stringifyMountFlags converts mount(2) flags to a string that you can use in
|
|
// error messages.
|
|
func stringifyMountFlags[Int int32plus](flags Int) string {
|
|
flagNames := []struct {
|
|
name string
|
|
bits Int
|
|
}{
|
|
{"MS_RDONLY", unix.MS_RDONLY},
|
|
{"MS_NOSUID", unix.MS_NOSUID},
|
|
{"MS_NODEV", unix.MS_NODEV},
|
|
{"MS_NOEXEC", unix.MS_NOEXEC},
|
|
{"MS_SYNCHRONOUS", unix.MS_SYNCHRONOUS},
|
|
{"MS_REMOUNT", unix.MS_REMOUNT},
|
|
{"MS_MANDLOCK", unix.MS_MANDLOCK},
|
|
{"MS_DIRSYNC", unix.MS_DIRSYNC},
|
|
{"MS_NOSYMFOLLOW", unix.MS_NOSYMFOLLOW},
|
|
// No (1 << 9) flag.
|
|
{"MS_NOATIME", unix.MS_NOATIME},
|
|
{"MS_NODIRATIME", unix.MS_NODIRATIME},
|
|
{"MS_BIND", unix.MS_BIND},
|
|
{"MS_MOVE", unix.MS_MOVE},
|
|
{"MS_REC", unix.MS_REC},
|
|
// MS_VERBOSE was deprecated and swapped to MS_SILENT.
|
|
{"MS_SILENT", unix.MS_SILENT},
|
|
{"MS_POSIXACL", unix.MS_POSIXACL},
|
|
{"MS_UNBINDABLE", unix.MS_UNBINDABLE},
|
|
{"MS_PRIVATE", unix.MS_PRIVATE},
|
|
{"MS_SLAVE", unix.MS_SLAVE},
|
|
{"MS_SHARED", unix.MS_SHARED},
|
|
{"MS_RELATIME", unix.MS_RELATIME},
|
|
// MS_KERNMOUNT (1 << 22) is internal to the kernel.
|
|
{"MS_I_VERSION", unix.MS_I_VERSION},
|
|
{"MS_STRICTATIME", unix.MS_STRICTATIME},
|
|
{"MS_LAZYTIME", unix.MS_LAZYTIME},
|
|
}
|
|
var (
|
|
flagSet []string
|
|
seenBits Int
|
|
)
|
|
for _, flag := range flagNames {
|
|
if flags&flag.bits == flag.bits {
|
|
seenBits |= flag.bits
|
|
flagSet = append(flagSet, flag.name)
|
|
}
|
|
}
|
|
// If there were any remaining flags specified we don't know the name of,
|
|
// just add them in an 0x... format.
|
|
if remaining := flags &^ seenBits; remaining != 0 {
|
|
flagSet = append(flagSet, "0x"+strconv.FormatUint(uint64(remaining), 16))
|
|
}
|
|
return strings.Join(flagSet, "|")
|
|
}
|
|
|
|
// Error provides a string error representation.
|
|
func (e *mountError) Error() string {
|
|
out := e.op + " "
|
|
|
|
if e.source != "" {
|
|
out += "src=" + e.source + ", "
|
|
if e.srcFile != nil {
|
|
out += "srcType=" + string(e.srcFile.Type) + ", "
|
|
out += "srcFd=" + strconv.Itoa(int(e.srcFile.file.Fd())) + ", "
|
|
}
|
|
}
|
|
out += "dst=" + e.target
|
|
if e.dstFd != "" {
|
|
out += ", dstFd=" + e.dstFd
|
|
}
|
|
|
|
if e.flags != uintptr(0) {
|
|
out += ", flags=" + stringifyMountFlags(e.flags)
|
|
}
|
|
if e.data != "" {
|
|
out += ", data=" + e.data
|
|
}
|
|
|
|
out += ": " + e.err.Error()
|
|
return out
|
|
}
|
|
|
|
// Unwrap returns the underlying error.
|
|
// This is a convention used by Go 1.13+ standard library.
|
|
func (e *mountError) Unwrap() error {
|
|
return e.err
|
|
}
|
|
|
|
// mount is a simple unix.Mount wrapper, returning an error with more context
|
|
// in case it failed.
|
|
func mount(source, target, fstype string, flags uintptr, data string) error {
|
|
return mountViaFds(source, nil, target, "", fstype, flags, data)
|
|
}
|
|
|
|
// mountViaFds is a unix.Mount wrapper which uses srcFile instead of source,
|
|
// and dstFd instead of target, unless those are empty.
|
|
//
|
|
// If srcFile is non-nil and flags does not contain MS_REMOUNT, mountViaFds
|
|
// will mount it according to the mountSourceType of the file descriptor.
|
|
//
|
|
// The dstFd argument, if non-empty, is expected to be in the form of a path to
|
|
// an opened file descriptor on procfs (i.e. "/proc/thread-self/fd/NN").
|
|
//
|
|
// If a file descriptor is used instead of a source or a target path, the
|
|
// corresponding path is only used to add context to an error in case the mount
|
|
// operation has failed.
|
|
func mountViaFds(source string, srcFile *mountSource, target, dstFd, fstype string, flags uintptr, data string) error {
|
|
// MS_REMOUNT and srcFile don't make sense together.
|
|
if srcFile != nil && flags&unix.MS_REMOUNT != 0 {
|
|
logrus.Debugf("mount source passed along with MS_REMOUNT -- ignoring srcFile")
|
|
srcFile = nil
|
|
}
|
|
dst := target
|
|
if dstFd != "" {
|
|
dst = dstFd
|
|
}
|
|
src := source
|
|
isMoveMount := srcFile != nil && srcFile.Type == mountSourceOpenTree
|
|
if srcFile != nil {
|
|
// If we're going to use the /proc/thread-self/... path for classic
|
|
// mount(2), we need to get a safe handle to /proc/thread-self. This
|
|
// isn't needed for move_mount(2) because in that case the path is just
|
|
// a dummy string used for error info.
|
|
srcFileFd := srcFile.file.Fd()
|
|
if isMoveMount {
|
|
src = "/proc/self/fd/" + strconv.Itoa(int(srcFileFd))
|
|
} else {
|
|
var closer utils.ProcThreadSelfCloser
|
|
src, closer = utils.ProcThreadSelfFd(srcFileFd)
|
|
defer closer()
|
|
}
|
|
}
|
|
|
|
var op string
|
|
var err error
|
|
if isMoveMount {
|
|
op = "move_mount"
|
|
err = unix.MoveMount(int(srcFile.file.Fd()), "",
|
|
unix.AT_FDCWD, dstFd,
|
|
unix.MOVE_MOUNT_F_EMPTY_PATH|unix.MOVE_MOUNT_T_SYMLINKS)
|
|
} else {
|
|
op = "mount"
|
|
err = unix.Mount(src, dst, fstype, flags, data)
|
|
}
|
|
if err != nil {
|
|
return &mountError{
|
|
op: op,
|
|
source: source,
|
|
srcFile: srcFile,
|
|
target: target,
|
|
dstFd: dstFd,
|
|
flags: flags,
|
|
data: data,
|
|
err: err,
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// unmount is a simple unix.Unmount wrapper.
|
|
func unmount(target string, flags int) error {
|
|
err := unix.Unmount(target, flags)
|
|
if err != nil {
|
|
return &mountError{
|
|
op: "unmount",
|
|
target: target,
|
|
flags: uintptr(flags),
|
|
err: err,
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// syscallMode returns the syscall-specific mode bits from Go's portable mode bits.
|
|
// Copy from https://cs.opensource.google/go/go/+/refs/tags/go1.20.7:src/os/file_posix.go;l=61-75
|
|
func syscallMode(i fs.FileMode) (o uint32) {
|
|
o |= uint32(i.Perm())
|
|
if i&fs.ModeSetuid != 0 {
|
|
o |= unix.S_ISUID
|
|
}
|
|
if i&fs.ModeSetgid != 0 {
|
|
o |= unix.S_ISGID
|
|
}
|
|
if i&fs.ModeSticky != 0 {
|
|
o |= unix.S_ISVTX
|
|
}
|
|
// No mapping for Go's ModeTemporary (plan9 only).
|
|
return
|
|
}
|
|
|
|
// mountFd creates a "mount source fd" (either through open_tree(2) or just
|
|
// open(O_PATH)) based on the provided configuration. This function must be
|
|
// called from within the container's mount namespace.
|
|
//
|
|
// In the case of idmapped mount configurations, the returned mount source will
|
|
// be an open_tree(2) file with MOUNT_ATTR_IDMAP applied. For other
|
|
// bind-mounts, it will be an O_PATH. If the type of mount cannot be handled,
|
|
// the returned mountSource will be nil, indicating that the container init
|
|
// process will need to do an old-fashioned mount(2) themselves.
|
|
//
|
|
// This helper is only intended to be used by goCreateMountSources.
|
|
func mountFd(nsHandles *userns.Handles, m *configs.Mount) (*mountSource, error) {
|
|
if !m.IsBind() {
|
|
return nil, errors.New("new mount api: only bind-mounts are supported")
|
|
}
|
|
if nsHandles == nil {
|
|
nsHandles = new(userns.Handles)
|
|
defer nsHandles.Release()
|
|
}
|
|
|
|
var mountFile *os.File
|
|
var sourceType mountSourceType
|
|
|
|
// Ideally, we would use OPEN_TREE_CLONE for everything, because we can
|
|
// be sure that the file descriptor cannot be used to escape outside of
|
|
// the mount root. Unfortunately, OPEN_TREE_CLONE is far more expensive
|
|
// than open(2) because it requires doing mounts inside a new anonymous
|
|
// mount namespace. So we use open(2) for standard bind-mounts, and
|
|
// OPEN_TREE_CLONE when we need to set mount attributes here.
|
|
//
|
|
// While passing open(2)'d paths from the host rootfs isn't exactly the
|
|
// safest thing in the world, the files will not survive across
|
|
// execve(2) and "runc init" is non-dumpable so it should not be
|
|
// possible for a malicious container process to gain access to the
|
|
// file descriptors. We also don't do any of this for "runc exec",
|
|
// lessening the risk even further.
|
|
if m.IsIDMapped() {
|
|
flags := uint(unix.OPEN_TREE_CLONE | unix.OPEN_TREE_CLOEXEC)
|
|
if m.Flags&unix.MS_REC == unix.MS_REC {
|
|
flags |= unix.AT_RECURSIVE
|
|
}
|
|
fd, err := unix.OpenTree(unix.AT_FDCWD, m.Source, flags)
|
|
if err != nil {
|
|
return nil, &os.PathError{Op: "open_tree(OPEN_TREE_CLONE)", Path: m.Source, Err: err}
|
|
}
|
|
mountFile = os.NewFile(uintptr(fd), m.Source)
|
|
sourceType = mountSourceOpenTree
|
|
|
|
// Configure the id mapping.
|
|
var usernsFile *os.File
|
|
if m.IDMapping.UserNSPath == "" {
|
|
usernsFile, err = nsHandles.Get(userns.Mapping{
|
|
UIDMappings: m.IDMapping.UIDMappings,
|
|
GIDMappings: m.IDMapping.GIDMappings,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create userns for %s id-mapping: %w", m.Source, err)
|
|
}
|
|
} else {
|
|
usernsFile, err = os.Open(m.IDMapping.UserNSPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open existing userns for %s id-mapping: %w", m.Source, err)
|
|
}
|
|
}
|
|
defer usernsFile.Close()
|
|
|
|
setAttrFlags := uint(unix.AT_EMPTY_PATH)
|
|
// If the mount has "ridmap" set, we apply the configuration
|
|
// recursively. This allows you to create "rbind" mounts where only
|
|
// the top-level mount has an idmapping. I'm not sure why you'd
|
|
// want that, but still...
|
|
if m.IDMapping.Recursive {
|
|
setAttrFlags |= unix.AT_RECURSIVE
|
|
}
|
|
if err := unix.MountSetattr(int(mountFile.Fd()), "", setAttrFlags, &unix.MountAttr{
|
|
Attr_set: unix.MOUNT_ATTR_IDMAP,
|
|
Userns_fd: uint64(usernsFile.Fd()),
|
|
}); err != nil {
|
|
extraMsg := ""
|
|
if err == unix.EINVAL {
|
|
extraMsg = " (maybe the filesystem used doesn't support idmap mounts on this kernel?)"
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed to set MOUNT_ATTR_IDMAP on %s: %w%s", m.Source, err, extraMsg)
|
|
}
|
|
} else {
|
|
var err error
|
|
mountFile, err = os.OpenFile(m.Source, unix.O_PATH|unix.O_CLOEXEC, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
sourceType = mountSourcePlain
|
|
}
|
|
return &mountSource{
|
|
Type: sourceType,
|
|
file: mountFile,
|
|
}, nil
|
|
}
|