Files
runc/libcontainer/utils/utils_unix.go
Aleksa Sarai 8da42aaec2 sync: split init config (stream) and synchronisation (seqpacket) pipes
We have different requirements for the initial configuration and
initWaiter pipe (just send netlink and JSON blobs with no complicated
handling needed for message coalescing) and the packet-based
synchronisation pipe.

Tests with switching everything to SOCK_SEQPACKET lead to endless issues
with runc hanging on start-up because random things would try to do
short reads (which SOCK_SEQPACKET will not allow and the Go stdlib
explicitly treats as a streaming source), so splitting it was the only
reasonable solution. Even doing somewhat dodgy tricks such as adding a
Read() wrapper which actually calls ReadPacket() and makes it seem like
a stream source doesn't work -- and is a bit too magical.

One upside is that doing it this way makes the difference between the
modes clearer -- INITPIPE is still used for initWaiter syncrhonisation
but aside from that all other synchronisation is done by SYNCPIPE.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
2023-09-24 20:31:14 +08:00

101 lines
2.6 KiB
Go

//go:build !windows
// +build !windows
package utils
import (
"fmt"
"math"
"os"
"strconv"
"sync"
"golang.org/x/sys/unix"
)
// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
}
return nil
}
var (
haveCloseRangeCloexecBool bool
haveCloseRangeCloexecOnce sync.Once
)
func haveCloseRangeCloexec() bool {
haveCloseRangeCloexecOnce.Do(func() {
// Make sure we're not closing a random file descriptor.
tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return
}
defer unix.Close(tmpFd)
err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
// other potential error would imply that even the most basic close
// operation wouldn't work.
haveCloseRangeCloexecBool = err == nil
})
return haveCloseRangeCloexecBool
}
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
// the process (except for those below the given fd value).
func CloseExecFrom(minFd int) error {
if haveCloseRangeCloexec() {
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
return os.NewSyscallError("close_range", err)
}
fdDir, err := os.Open("/proc/self/fd")
if err != nil {
return err
}
defer fdDir.Close()
if err := EnsureProcHandle(fdDir); err != nil {
return err
}
fdList, err := fdDir.Readdirnames(-1)
if err != nil {
return err
}
for _, fdStr := range fdList {
fd, err := strconv.Atoi(fdStr)
// Ignore non-numeric file names.
if err != nil {
continue
}
// Ignore descriptors lower than our specified minimum.
if fd < minFd {
continue
}
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
// this might fail are basically file descriptors that have already
// been closed (including and especially the one that was created when
// os.ReadDir did the "opendir" syscall).
unix.CloseOnExec(fd)
}
return nil
}
// NewSockPair returns a new SOCK_STREAM unix socket pair.
func NewSockPair(name string) (parent, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}