mirror of
https://github.com/opencontainers/runc.git
synced 2025-10-08 17:00:13 +08:00

The idea is to remove the need for cloning the entire runc binary by
replacing the final execve() call of the container process with an
execve() call to a clone of a small C binary which just does an execve()
of its arguments.
This provides similar protection against CVE-2019-5736 but without
requiring a >10MB binary copy for each "runc init". When compiled with
musl, runc-dmz is 13kB (though unfortunately with glibc, it is 1.1MB
which is still quite large).
It should be noted that there is still a window where the container
processes could get access to the host runc binary, but because we set
ourselves as non-dumpable the container would need CAP_SYS_PTRACE (which
is not enabled by default in Docker) in order to get around the
proc_fd_access_allowed() checks. In addition, since Linux 4.10[1] the
kernel blocks access entirely for user namespaced containers in this
scenario. For those cases we cannot use runc-dmz, but most containers
won't have this issue.
This new runc-dmz binary can be opted out of at compile time by setting
the "runc_nodmz" buildtag, and at runtime by setting the RUNC_DMZ=legacy
environment variable. In both cases, runc will fall back to the classic
/proc/self/exe-based cloning trick. If /proc/self/exe is already a
sealed memfd (namely if the user is using contrib/cmd/memfd-bind to
create a persistent sealed memfd for runc), neither runc-dmz nor
/proc/self/exe cloning will be used because they are not necessary.
[1]: bfedb58925
Co-authored-by: lifubang <lifubang@acmcoder.com>
Signed-off-by: lifubang <lifubang@acmcoder.com>
[cyphar: address various review nits]
[cyphar: fix runc-dmz cross-compilation]
[cyphar: embed runc-dmz into runc binary and clone in Go code]
[cyphar: make runc-dmz optional, with fallback to /proc/self/exe cloning]
[cyphar: do not use runc-dmz when the container has certain privs]
Co-authored-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
95 lines
2.9 KiB
Go
95 lines
2.9 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
File copied and customized based on
|
|
https://github.com/moby/moby/tree/v20.10.14/profiles/seccomp/kernel_linux.go
|
|
|
|
File copied from
|
|
https://github.com/containerd/containerd/blob/v1.7.5/contrib/seccomp/kernelversion/kernel_linux.go
|
|
*/
|
|
|
|
package kernelversion
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// KernelVersion holds information about the kernel.
|
|
type KernelVersion struct {
|
|
Kernel uint64 // Version of the Kernel (i.e., the "4" in "4.1.2-generic")
|
|
Major uint64 // Major revision of the Kernel (i.e., the "1" in "4.1.2-generic")
|
|
}
|
|
|
|
func (k *KernelVersion) String() string {
|
|
if k.Kernel > 0 || k.Major > 0 {
|
|
return fmt.Sprintf("%d.%d", k.Kernel, k.Major)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
var (
|
|
currentKernelVersion *KernelVersion
|
|
kernelVersionError error
|
|
once sync.Once
|
|
)
|
|
|
|
// getKernelVersion gets the current kernel version.
|
|
func getKernelVersion() (*KernelVersion, error) {
|
|
once.Do(func() {
|
|
var uts unix.Utsname
|
|
if err := unix.Uname(&uts); err != nil {
|
|
return
|
|
}
|
|
// Remove the \x00 from the release for Atoi to parse correctly
|
|
currentKernelVersion, kernelVersionError = parseRelease(string(uts.Release[:bytes.IndexByte(uts.Release[:], 0)]))
|
|
})
|
|
return currentKernelVersion, kernelVersionError
|
|
}
|
|
|
|
// parseRelease parses a string and creates a KernelVersion based on it.
|
|
func parseRelease(release string) (*KernelVersion, error) {
|
|
var version KernelVersion
|
|
|
|
// We're only make sure we get the "kernel" and "major revision". Sometimes we have
|
|
// 3.12.25-gentoo, but sometimes we just have 3.12-1-amd64.
|
|
_, err := fmt.Sscanf(release, "%d.%d", &version.Kernel, &version.Major)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse kernel version %q: %w", release, err)
|
|
}
|
|
return &version, nil
|
|
}
|
|
|
|
// GreaterEqualThan checks if the host's kernel version is greater than, or
|
|
// equal to the given kernel version v. Only "kernel version" and "major revision"
|
|
// can be specified (e.g., "3.12") and will be taken into account, which means
|
|
// that 3.12.25-gentoo and 3.12-1-amd64 are considered equal (kernel: 3, major: 12).
|
|
func GreaterEqualThan(minVersion KernelVersion) (bool, error) {
|
|
kv, err := getKernelVersion()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if kv.Kernel > minVersion.Kernel {
|
|
return true, nil
|
|
}
|
|
if kv.Kernel == minVersion.Kernel && kv.Major >= minVersion.Major {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|