contrib: add sample seccomp agent

Implement sample seccomp agent. It's also used in integration tests in
the following commit.

Instructions how to use it in contrib/cmd/seccompagent/README.md

Signed-off-by: Alban Crequy <alban@kinvolk.io>
Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
Co-authored-by: Rodrigo Campos <rodrigo@kinvolk.io>
This commit is contained in:
Alban Crequy
2020-09-15 21:45:28 +02:00
committed by Rodrigo Campos
parent c64aaf0e0b
commit e21a9ee813
6 changed files with 564 additions and 1 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@ vendor/pkg
/runc-*
contrib/cmd/recvtty/recvtty
contrib/cmd/sd-helper/sd-helper
contrib/cmd/seccompagent/seccompagent
man/man8
release
Vagrantfile

View File

@@ -30,11 +30,14 @@ GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -trimpath $(EXTRA_FLAGS) -tags "$(B
runc:
$(GO_BUILD) -o runc .
all: runc recvtty sd-helper
all: runc recvtty sd-helper seccompagent
recvtty sd-helper:
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
seccompagent:
$(GO_BUILD) -o contrib/cmd/seccompagent/seccompagent ./contrib/cmd/seccompagent
static:
$(GO_BUILD_STATIC) -o runc .
$(GO_BUILD_STATIC) -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty

View File

@@ -0,0 +1,62 @@
# Seccomp Agent
## Warning
Please note this is an example agent, as such it is possible that specially
crafted messages can produce bad behaviour. Please use it as an example only.
Also, this agent is used for integration tests. Be aware that changing the
behaviour can break the integration tests.
## Get started
Compile runc and seccompagent:
```bash
make all
```
Run the seccomp agent in the background:
```bash
sudo ./contrib/cmd/seccompagent/seccompagent &
```
Prepare a container:
```bash
mkdir container-seccomp-notify
cd container-seccomp-notify
mkdir rootfs
docker export $(docker create busybox) | tar -C rootfs -xvf -
```
Copy the example `config.json` file from the directory where this README.md is
to the container directory you prepared earlier (`container-seccomp-notify`).
This is a config.json as generated by `runc spec` at time of writing, with only
the `args` and `seccomp` sections modified.
Then start the container:
```bash
runc run mycontainerid
```
The container will output something like this:
```bash
+ cd /dev/shm
+ mkdir test-dir
+ touch test-file
+ chmod 777 test-file
chmod: changing permissions of 'test-file': No medium found
+ ls -l /dev/shm
total 0
drwxr-xr-x 2 root root 40 Jul 21 14:09 test-dir-foo
-rw-r--r-- 1 root root 0 Jul 21 14:09 test-file
+ echo Note the agent added a suffix for the directory name and chmod fails
Note the agent added a suffix for the directory name and chmod fails
```
This shows a simple example that runs in /dev/shm just because it is a tmpfs in
the example config.json.
The agent makes all chmod calls fail with ENOMEDIUM, as the example output shows.
For mkdir, the agent adds a "-foo" suffix: the container runs "mkdir test-dir"
but the directory created is "test-dir-foo".

View File

@@ -0,0 +1,197 @@
{
"ociVersion": "1.0.2-dev",
"process": {
"terminal": true,
"user": {
"uid": 0,
"gid": 0
},
"args": [
"sh",
"-c",
"set -x; cd /dev/shm; mkdir test-dir; touch test-file; chmod 777 test-file; ls -l /dev/shm; echo \"Note the agent added a suffix for the directory name and chmod fails\" "
],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
"cwd": "/",
"capabilities": {
"bounding": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"effective": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"inheritable": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"permitted": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
],
"ambient": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
]
},
"rlimits": [
{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}
],
"noNewPrivileges": true
},
"root": {
"path": "rootfs",
"readonly": true
},
"hostname": "runc",
"mounts": [
{
"destination": "/proc",
"type": "proc",
"source": "proc"
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/dev/shm",
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
},
{
"destination": "/sys/fs/cgroup",
"type": "cgroup",
"source": "cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
]
}
],
"linux": {
"seccomp": {
"defaultAction": "SCMP_ACT_ALLOW",
"listenerPath": "/run/seccomp-agent.socket",
"listenerMetadata": "foo",
"architectures": [ "SCMP_ARCH_X86", "SCMP_ARCH_X32" ],
"syscalls": [
{
"names": [ "chmod", "fchmod", "fchmodat", "mkdir" ],
"action": "SCMP_ACT_NOTIFY"
}
]
},
"resources": {
"devices": [
{
"allow": false,
"access": "rwm"
}
]
},
"namespaces": [
{
"type": "pid"
},
{
"type": "network"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
},
{
"type": "cgroup"
}
],
"maskedPaths": [
"/proc/acpi",
"/proc/asound",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
"/proc/scsi"
],
"readonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}

View File

@@ -0,0 +1,291 @@
// +build linux,cgo,seccomp
package main
import (
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
"net"
"os"
"path/filepath"
"strings"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runtime-spec/specs-go"
libseccomp "github.com/seccomp/libseccomp-golang"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
socketFile string
pidFile string
)
func closeStateFds(recvFds []int) {
for i := range recvFds {
unix.Close(i)
}
}
// parseStateFds returns the seccomp-fd and closes the rest of the fds in recvFds.
// In case of error, no fd is closed.
// StateFds is assumed to be formatted as specs.ContainerProcessState.Fds and
// recvFds the corresponding list of received fds in the same SCM_RIGHT message.
func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) {
// Let's find the index in stateFds of the seccomp-fd.
idx := -1
err := false
for i, name := range stateFds {
if name == specs.SeccompFdName && idx == -1 {
idx = i
continue
}
// We found the seccompFdName twice. Error out!
if name == specs.SeccompFdName && idx != -1 {
err = true
}
}
if idx == -1 || err {
return 0, errors.New("seccomp fd not found or malformed containerProcessState.Fds")
}
if idx >= len(recvFds) || idx < 0 {
return 0, errors.New("seccomp fd index out of range")
}
fd := uintptr(recvFds[idx])
for i := range recvFds {
if i == idx {
continue
}
unix.Close(recvFds[i])
}
return fd, nil
}
func handleNewMessage(sockfd int) (uintptr, string, error) {
const maxNameLen = 4096
stateBuf := make([]byte, maxNameLen)
oobSpace := unix.CmsgSpace(4)
oob := make([]byte, oobSpace)
n, oobn, _, _, err := unix.Recvmsg(sockfd, stateBuf, oob, 0)
if err != nil {
return 0, "", err
}
if n >= maxNameLen || oobn != oobSpace {
return 0, "", fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
stateBuf = stateBuf[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return 0, "", err
}
if len(scms) != 1 {
return 0, "", fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return 0, "", err
}
containerProcessState := &specs.ContainerProcessState{}
err = json.Unmarshal(stateBuf, containerProcessState)
if err != nil {
closeStateFds(fds)
return 0, "", fmt.Errorf("cannot parse OCI state: %w", err)
}
fd, err := parseStateFds(containerProcessState.Fds, fds)
if err != nil {
closeStateFds(fds)
return 0, "", err
}
return fd, containerProcessState.Metadata, nil
}
func readArgString(pid uint32, offset int64) (string, error) {
buffer := make([]byte, 4096) // PATH_MAX
memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777)
if err != nil {
return "", err
}
defer unix.Close(memfd)
_, err = unix.Pread(memfd, buffer, offset)
if err != nil {
return "", err
}
buffer[len(buffer)-1] = 0
s := buffer[:bytes.IndexByte(buffer, 0)]
return string(s), nil
}
func runMkdirForContainer(pid uint32, fileName string, mode uint32, metadata string) error {
// We validated before that metadata is not a string that can make
// newFile a file in a different location other than root.
newFile := fmt.Sprintf("%s-%s", fileName, metadata)
root := fmt.Sprintf("/proc/%d/cwd/", pid)
if strings.HasPrefix(fileName, "/") {
// If it starts with /, use the rootfs as base
root = fmt.Sprintf("/proc/%d/root/", pid)
}
path, err := securejoin.SecureJoin(root, newFile)
if err != nil {
return err
}
return unix.Mkdir(path, mode)
}
// notifHandler handles seccomp notifications and responses
func notifHandler(fd libseccomp.ScmpFd, metadata string) {
defer unix.Close(int(fd))
for {
req, err := libseccomp.NotifReceive(fd)
if err != nil {
logrus.Errorf("Error in NotifReceive(): %s", err)
continue
}
syscallName, err := req.Data.Syscall.GetName()
if err != nil {
logrus.Errorf("Error decoding syscall %v(): %s", req.Data.Syscall, err)
continue
}
logrus.Debugf("Received syscall %q, pid %v, arch %q, args %+v", syscallName, req.Pid, req.Data.Arch, req.Data.Args)
resp := &libseccomp.ScmpNotifResp{
ID: req.ID,
Error: 0,
Val: 0,
Flags: libseccomp.NotifRespFlagContinue,
}
// TOCTOU check
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
continue
}
switch syscallName {
case "mkdir":
fileName, err := readArgString(req.Pid, int64(req.Data.Args[0]))
if err != nil {
logrus.Errorf("Cannot read argument: %s", err)
resp.Error = int32(unix.ENOSYS)
resp.Val = ^uint64(0) // -1
goto sendResponse
}
logrus.Debugf("mkdir: %q", fileName)
// TOCTOU check
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
continue
}
err = runMkdirForContainer(req.Pid, fileName, uint32(req.Data.Args[1]), metadata)
if err != nil {
resp.Error = int32(unix.ENOSYS)
resp.Val = ^uint64(0) // -1
}
resp.Flags = 0
case "chmod", "fchmod", "fchmodat":
resp.Error = int32(unix.ENOMEDIUM)
resp.Val = ^uint64(0) // -1
resp.Flags = 0
}
sendResponse:
if err = libseccomp.NotifRespond(fd, resp); err != nil {
logrus.Errorf("Error in notification response: %s", err)
continue
}
}
}
func main() {
flag.StringVar(&socketFile, "socketfile", "/run/seccomp-agent.socket", "Socket file")
flag.StringVar(&pidFile, "pid-file", "", "Pid file")
logrus.SetLevel(logrus.DebugLevel)
// Parse arguments
flag.Parse()
if flag.NArg() > 0 {
flag.PrintDefaults()
logrus.Fatal("Invalid command")
}
if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) {
logrus.Fatalf("Cannot cleanup socket file: %v", err)
}
if pidFile != "" {
pid := fmt.Sprintf("%d", os.Getpid())
if err := ioutil.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
logrus.Fatalf("Cannot write pid file: %v", err)
}
}
logrus.Info("Waiting for seccomp file descriptors")
l, err := net.Listen("unix", socketFile)
if err != nil {
logrus.Fatalf("Cannot listen: %s", err)
}
defer l.Close()
for {
conn, err := l.Accept()
if err != nil {
logrus.Errorf("Cannot accept connection: %s", err)
continue
}
socket, err := conn.(*net.UnixConn).File()
conn.Close()
if err != nil {
logrus.Errorf("Cannot get socket: %v", err)
continue
}
newFd, metadata, err := handleNewMessage(int(socket.Fd()))
socket.Close()
if err != nil {
logrus.Errorf("Error receiving seccomp file descriptor: %v", err)
continue
}
// Make sure we don't allow strings like "/../p", as that means
// a file in a different location than expected. We just want
// safe things to use as a suffix for a file name.
metadata = filepath.Base(metadata)
if strings.Contains(metadata, "/") {
// Fallback to a safe string.
metadata = "agent-generated-suffix"
}
logrus.Infof("Received new seccomp fd: %v", newFd)
go notifHandler(libseccomp.ScmpFd(newFd), metadata)
}
}

View File

@@ -0,0 +1,9 @@
// +build !linux !cgo !seccomp
package main
import "fmt"
func main() {
fmt.Println("Not supported, to use this compile with build tag: seccomp.")
}