mirror of
				https://github.com/opencontainers/runc.git
				synced 2025-11-01 03:22:38 +08:00 
			
		
		
		
	 dac4171746
			
		
	
	dac4171746
	
	
	
		
			
			The idea is to remove the need for cloning the entire runc binary by
replacing the final execve() call of the container process with an
execve() call to a clone of a small C binary which just does an execve()
of its arguments.
This provides similar protection against CVE-2019-5736 but without
requiring a >10MB binary copy for each "runc init". When compiled with
musl, runc-dmz is 13kB (though unfortunately with glibc, it is 1.1MB
which is still quite large).
It should be noted that there is still a window where the container
processes could get access to the host runc binary, but because we set
ourselves as non-dumpable the container would need CAP_SYS_PTRACE (which
is not enabled by default in Docker) in order to get around the
proc_fd_access_allowed() checks. In addition, since Linux 4.10[1] the
kernel blocks access entirely for user namespaced containers in this
scenario. For those cases we cannot use runc-dmz, but most containers
won't have this issue.
This new runc-dmz binary can be opted out of at compile time by setting
the "runc_nodmz" buildtag, and at runtime by setting the RUNC_DMZ=legacy
environment variable. In both cases, runc will fall back to the classic
/proc/self/exe-based cloning trick. If /proc/self/exe is already a
sealed memfd (namely if the user is using contrib/cmd/memfd-bind to
create a persistent sealed memfd for runc), neither runc-dmz nor
/proc/self/exe cloning will be used because they are not necessary.
[1]: bfedb58925
Co-authored-by: lifubang <lifubang@acmcoder.com>
Signed-off-by: lifubang <lifubang@acmcoder.com>
[cyphar: address various review nits]
[cyphar: fix runc-dmz cross-compilation]
[cyphar: embed runc-dmz into runc binary and clone in Go code]
[cyphar: make runc-dmz optional, with fallback to /proc/self/exe cloning]
[cyphar: do not use runc-dmz when the container has certain privs]
Co-authored-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
		
	
		
			
				
	
	
		
			141 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| 
 | |
|    File copied and customized based on
 | |
|    https://github.com/moby/moby/tree/v20.10.14/profiles/seccomp/kernel_linux_test.go
 | |
| */
 | |
| 
 | |
| package kernelversion
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"testing"
 | |
| )
 | |
| 
 | |
| func TestGetKernelVersion(t *testing.T) {
 | |
| 	version, err := getKernelVersion()
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 	if version == nil {
 | |
| 		t.Fatal("version is nil")
 | |
| 	}
 | |
| 	if version.Kernel == 0 {
 | |
| 		t.Fatal("no kernel version")
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func TestParseRelease(t *testing.T) {
 | |
| 	tests := []struct {
 | |
| 		in          string
 | |
| 		out         KernelVersion
 | |
| 		expectedErr error
 | |
| 	}{
 | |
| 		{in: "3.8", out: KernelVersion{Kernel: 3, Major: 8}},
 | |
| 		{in: "3.8.0", out: KernelVersion{Kernel: 3, Major: 8}},
 | |
| 		{in: "3.8.0-19-generic", out: KernelVersion{Kernel: 3, Major: 8}},
 | |
| 		{in: "3.4.54.longterm-1", out: KernelVersion{Kernel: 3, Major: 4}},
 | |
| 		{in: "3.10.0-862.2.3.el7.x86_64", out: KernelVersion{Kernel: 3, Major: 10}},
 | |
| 		{in: "3.12.8tag", out: KernelVersion{Kernel: 3, Major: 12}},
 | |
| 		{in: "3.12-1-amd64", out: KernelVersion{Kernel: 3, Major: 12}},
 | |
| 		{in: "3.12foobar", out: KernelVersion{Kernel: 3, Major: 12}},
 | |
| 		{in: "99.999.999-19-generic", out: KernelVersion{Kernel: 99, Major: 999}},
 | |
| 		{in: "", expectedErr: fmt.Errorf(`failed to parse kernel version "": EOF`)},
 | |
| 		{in: "3", expectedErr: fmt.Errorf(`failed to parse kernel version "3": unexpected EOF`)},
 | |
| 		{in: "3.", expectedErr: fmt.Errorf(`failed to parse kernel version "3.": EOF`)},
 | |
| 		{in: "3a", expectedErr: fmt.Errorf(`failed to parse kernel version "3a": input does not match format`)},
 | |
| 		{in: "3.a", expectedErr: fmt.Errorf(`failed to parse kernel version "3.a": expected integer`)},
 | |
| 		{in: "a", expectedErr: fmt.Errorf(`failed to parse kernel version "a": expected integer`)},
 | |
| 		{in: "a.a", expectedErr: fmt.Errorf(`failed to parse kernel version "a.a": expected integer`)},
 | |
| 		{in: "a.a.a-a", expectedErr: fmt.Errorf(`failed to parse kernel version "a.a.a-a": expected integer`)},
 | |
| 		{in: "-3", expectedErr: fmt.Errorf(`failed to parse kernel version "-3": expected integer`)},
 | |
| 		{in: "-3.", expectedErr: fmt.Errorf(`failed to parse kernel version "-3.": expected integer`)},
 | |
| 		{in: "-3.8", expectedErr: fmt.Errorf(`failed to parse kernel version "-3.8": expected integer`)},
 | |
| 		{in: "-3.-8", expectedErr: fmt.Errorf(`failed to parse kernel version "-3.-8": expected integer`)},
 | |
| 		{in: "3.-8", expectedErr: fmt.Errorf(`failed to parse kernel version "3.-8": expected integer`)},
 | |
| 	}
 | |
| 	for _, tc := range tests {
 | |
| 		tc := tc
 | |
| 		t.Run(tc.in, func(t *testing.T) {
 | |
| 			version, err := parseRelease(tc.in)
 | |
| 			if tc.expectedErr != nil {
 | |
| 				if err == nil {
 | |
| 					t.Fatal("expected an error")
 | |
| 				}
 | |
| 				if err.Error() != tc.expectedErr.Error() {
 | |
| 					t.Fatalf("expected: %s, got: %s", tc.expectedErr, err)
 | |
| 				}
 | |
| 				return
 | |
| 			}
 | |
| 			if err != nil {
 | |
| 				t.Fatal("unexpected error:", err)
 | |
| 			}
 | |
| 			if version == nil {
 | |
| 				t.Fatal("version is nil")
 | |
| 			}
 | |
| 			if version.Kernel != tc.out.Kernel || version.Major != tc.out.Major {
 | |
| 				t.Fatalf("expected: %d.%d, got: %d.%d", tc.out.Kernel, tc.out.Major, version.Kernel, version.Major)
 | |
| 			}
 | |
| 		})
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func TestGreaterEqualThan(t *testing.T) {
 | |
| 	// Get the current kernel version, so that we can make test relative to that
 | |
| 	v, err := getKernelVersion()
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	tests := []struct {
 | |
| 		doc      string
 | |
| 		in       KernelVersion
 | |
| 		expected bool
 | |
| 	}{
 | |
| 		{
 | |
| 			doc:      "same version",
 | |
| 			in:       KernelVersion{v.Kernel, v.Major},
 | |
| 			expected: true,
 | |
| 		},
 | |
| 		{
 | |
| 			doc:      "kernel minus one",
 | |
| 			in:       KernelVersion{v.Kernel - 1, v.Major},
 | |
| 			expected: true,
 | |
| 		},
 | |
| 		{
 | |
| 			doc:      "kernel plus one",
 | |
| 			in:       KernelVersion{v.Kernel + 1, v.Major},
 | |
| 			expected: false,
 | |
| 		},
 | |
| 		{
 | |
| 			doc:      "major plus one",
 | |
| 			in:       KernelVersion{v.Kernel, v.Major + 1},
 | |
| 			expected: false,
 | |
| 		},
 | |
| 	}
 | |
| 	for _, tc := range tests {
 | |
| 		tc := tc
 | |
| 		t.Run(tc.doc+": "+tc.in.String(), func(t *testing.T) {
 | |
| 			ok, err := GreaterEqualThan(tc.in)
 | |
| 			if err != nil {
 | |
| 				t.Fatal("unexpected error:", err)
 | |
| 			}
 | |
| 			if ok != tc.expected {
 | |
| 				t.Fatalf("expected: %v, got: %v", tc.expected, ok)
 | |
| 			}
 | |
| 		})
 | |
| 	}
 | |
| }
 |