diff --git a/internal/sys/sysctl_linux.go b/internal/sys/sysctl_linux.go new file mode 100644 index 000000000..96876a55f --- /dev/null +++ b/internal/sys/sysctl_linux.go @@ -0,0 +1,54 @@ +package sys + +import ( + "fmt" + "io" + "os" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func procfsOpenRoot(proc *procfs.Handle, subpath string, flags int) (*os.File, error) { + handle, err := proc.OpenRoot(subpath) + if err != nil { + return nil, err + } + defer handle.Close() + + return pathrs.Reopen(handle, flags) +} + +// WriteSysctls sets the given sysctls to the requested values. +func WriteSysctls(sysctls map[string]string) error { + // We are going to write multiple sysctls, which require writing to an + // unmasked procfs which is not going to be cached. To avoid creating a new + // procfs instance for each one, just allocate one handle for all of them. + proc, err := procfs.OpenUnsafeProcRoot() + if err != nil { + return err + } + defer proc.Close() + + for key, value := range sysctls { + keyPath := strings.ReplaceAll(key, ".", "/") + + sysctlFile, err := procfsOpenRoot(proc, "sys/"+keyPath, unix.O_WRONLY|unix.O_TRUNC|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("open sysctl %s file: %w", key, err) + } + defer sysctlFile.Close() + + n, err := io.WriteString(sysctlFile, value) + if n != len(value) && err == nil { + err = fmt.Errorf("short write to file (%d bytes != %d bytes)", n, len(value)) + } + if err != nil { + return fmt.Errorf("failed to write sysctl %s = %q: %w", key, value, err) + } + } + return nil +} diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 8788f1d3f..398b40c3d 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "os" - "path" "path/filepath" "runtime" "strconv" @@ -1351,13 +1350,6 @@ func maskPaths(paths []string, mountLabel string) error { return nil } -// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. -// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. -func writeSystemProperty(key, value string) error { - keyPath := strings.ReplaceAll(key, ".", "/") - return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) -} - // Do the mount operation followed by additional mounts required to take care // of propagation flags. This will always be scoped inside the container rootfs. func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index d3e519593..570472bd4 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -13,6 +13,7 @@ import ( "github.com/opencontainers/runc/internal/linux" "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/keys" @@ -132,10 +133,8 @@ func (l *linuxStandardInit) Init() error { return fmt.Errorf("unable to apply apparmor profile: %w", err) } - for key, value := range l.config.Config.Sysctl { - if err := writeSystemProperty(key, value); err != nil { - return err - } + if err := sys.WriteSysctls(l.config.Config.Sysctl); err != nil { + return err } for _, path := range l.config.Config.ReadonlyPaths { if err := readonlyPath(path); err != nil {