diff --git a/libcontainer/nsenter/log.h b/libcontainer/nsenter/log.h index 3e18de687..264e32667 100644 --- a/libcontainer/nsenter/log.h +++ b/libcontainer/nsenter/log.h @@ -26,15 +26,20 @@ bool log_enabled_for(int level); void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3))); extern int logfd; -#define bail(fmt, ...) \ - do { \ - if (logfd < 0) \ - fprintf(stderr, "FATAL: " fmt ": %m\n", \ - ##__VA_ARGS__); \ - else \ - write_log(FATAL, fmt ": %m", ##__VA_ARGS__); \ - exit(1); \ + +/* bailx logs a message to logfd (or stderr, if logfd is not available) + * and terminates the program. + */ +#define bailx(fmt, ...) \ + do { \ + if (logfd < 0) \ + fprintf(stderr, "FATAL: " fmt "\n", ##__VA_ARGS__); \ + else \ + write_log(FATAL, fmt, ##__VA_ARGS__); \ + exit(1); \ } while(0) +/* bail is the same as bailx, except it also adds ": %m" (errno). */ +#define bail(fmt, ...) bailx(fmt ": %m", ##__VA_ARGS__) #endif /* NSENTER_LOG_H */ diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 728e68bb0..59b785109 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -208,7 +208,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) * or programming issue. */ if (!app) - bail("mapping tool not present"); + bailx("mapping tool not present"); child = fork(); if (child < 0) @@ -274,7 +274,7 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len) bail("failed to update /proc/%d/uid_map", pid); write_log(DEBUG, "update /proc/%d/uid_map got -EPERM (trying %s)", pid, path); if (try_mapping_tool(path, pid, map, map_len)) - bail("failed to use newuid map on %d", pid); + bailx("failed to use newuid map on %d", pid); } } @@ -289,7 +289,7 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len) bail("failed to update /proc/%d/gid_map", pid); write_log(DEBUG, "update /proc/%d/gid_map got -EPERM (trying %s)", pid, path); if (try_mapping_tool(path, pid, map, map_len)) - bail("failed to use newgid map on %d", pid); + bailx("failed to use newgid map on %d", pid); } } @@ -332,22 +332,59 @@ static uint8_t readint8(char *buf) return *(uint8_t *) buf; } +static inline void sane_kill(pid_t pid, int signum) +{ + if (pid <= 0) + return; + + int saved_errno = errno; + kill(pid, signum); + errno = saved_errno; +} + +__attribute__((noreturn)) +static void iobail(int got, int want, const char *errmsg, int pid1, int pid2) +{ + sane_kill(pid1, SIGKILL); + sane_kill(pid2, SIGKILL); + if (got < 0) + bail("%s", errmsg); + /* Short read or write. */ + bailx("%s (got %d of %d bytes)", errmsg, got, want); +} + +static void xread(int fd, void *buf, size_t nbytes, const char *errmsg, int pid1, int pid2) +{ + ssize_t len; + + len = read(fd, buf, nbytes); + if (len != nbytes) + iobail(len, nbytes, errmsg, pid1, pid2); +} + +static void xwrite(int fd, void *buf, size_t nbytes, const char *errmsg, int pid1, int pid2) +{ + ssize_t len; + + len = write(fd, buf, nbytes); + if (len != nbytes) + iobail(len, nbytes, errmsg, pid1, pid2); +} + static void nl_parse(int fd, struct nlconfig_t *config) { - size_t len, size; + size_t size; struct nlmsghdr hdr; char *data, *current; /* Retrieve the netlink header. */ - len = read(fd, &hdr, NLMSG_HDRLEN); - if (len != NLMSG_HDRLEN) - bail("invalid netlink header length %zu", len); + xread(fd, &hdr, NLMSG_HDRLEN, "failed to read netlink header", -1, -1); if (hdr.nlmsg_type == NLMSG_ERROR) - bail("failed to read netlink message"); + bailx("failed to read netlink message"); if (hdr.nlmsg_type != INIT_MSG) - bail("unexpected msg type %d", hdr.nlmsg_type); + bailx("unexpected msg type %d", hdr.nlmsg_type); /* Retrieve data. */ size = NLMSG_PAYLOAD(&hdr, 0); @@ -355,9 +392,7 @@ static void nl_parse(int fd, struct nlconfig_t *config) if (!data) bail("failed to allocate %zu bytes of memory for nl_payload", size); - len = read(fd, data, size); - if (len != size) - bail("failed to read netlink payload, %zu != %zu", len, size); + xread(fd, data, size, "failed to read netlink payload", -1, -1); /* Parse the netlink payload. */ config->data = data; @@ -456,7 +491,7 @@ static int nstype(char *name) * without corresponding handling could result in broken behaviour) and * the rest of runc doesn't allow unknown namespace types anyway. */ - bail("unknown namespace type %s", name); + bailx("unknown namespace type %s", name); } static nsset_t __open_namespaces(char *nsspec, struct namespace_t **ns_list, size_t *ns_len) @@ -469,7 +504,7 @@ static nsset_t __open_namespaces(char *nsspec, struct namespace_t **ns_list, siz namespace = strtok_r(nsspec, ",", &saveptr); if (!namespace || !strlen(namespace) || !strlen(nsspec)) - bail("ns paths are empty"); + bailx("ns paths are empty"); do { int fd; @@ -485,7 +520,7 @@ static nsset_t __open_namespaces(char *nsspec, struct namespace_t **ns_list, siz /* Split 'ns:path'. */ path = strstr(namespace, ":"); if (!path) - bail("failed to parse %s", namespace); + bailx("failed to parse %s", namespace); *path++ = '\0'; fd = open(path, O_RDONLY); @@ -530,7 +565,7 @@ static nsset_t __join_namespaces(nsset_t allow, struct namespace_t *ns_list, siz /* Skip permission errors. */ if (saved_errno == EPERM) continue; - bail("failed to setns into %s namespace", ns->type); + bailx("failed to setns into %s namespace: %s", ns->type, strerror(saved_errno)); } joined |= type; @@ -597,7 +632,7 @@ static void __close_namespaces(nsset_t to_join, nsset_t joined, struct namespace /* Make sure we joined the namespaces we planned to. */ if (failed_to_join) - bail("failed to join {%s} namespaces: %s", nsset_to_str(failed_to_join), strerror(EPERM)); + bailx("failed to join {%s} namespaces: %s", nsset_to_str(failed_to_join), strerror(EPERM)); free(ns_list); } @@ -637,14 +672,6 @@ void join_namespaces(char *nsspec) __close_namespaces(to_join, joined, ns_list, ns_len); } -static inline int sane_kill(pid_t pid, int signum) -{ - if (pid > 0) - return kill(pid, signum); - else - return 0; -} - void try_unshare(int flags, const char *msg) { write_log(DEBUG, "unshare %s", msg); @@ -842,8 +869,10 @@ void nsexec(void) bail("unable to spawn stage-1"); syncfd = sync_child_pipe[1]; - if (close(sync_child_pipe[0]) < 0) + if (close(sync_child_pipe[0]) < 0) { + sane_kill(stage1_pid, SIGKILL); bail("failed to close sync_child_pipe[0] fd"); + } /* * State machine for synchronisation with the children. We only @@ -854,8 +883,8 @@ void nsexec(void) while (!stage1_complete) { enum sync_t s; - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with stage-1: next state"); + xread(syncfd, &s, sizeof(s), + "failed to sync with stage-1: next state", stage1_pid, stage2_pid); switch (s) { case SYNC_USERMAP_PLS: @@ -879,28 +908,21 @@ void nsexec(void) update_gidmap(config.gidmappath, stage1_pid, config.gidmap, config.gidmap_len); s = SYNC_USERMAP_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage1_pid, SIGKILL); - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with stage-1: write(SYNC_USERMAP_ACK)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with stage-1: write(SYNC_USERMAP_ACK)", stage1_pid, -1); break; case SYNC_RECVPID_PLS: write_log(DEBUG, "stage-1 requested pid to be forwarded"); /* Get the stage-2 pid. */ - if (read(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) { - sane_kill(stage1_pid, SIGKILL); - bail("failed to sync with stage-1: read(stage2_pid)"); - } + xread(syncfd, &stage2_pid, sizeof(stage2_pid), + "failed to sync with stage-1: read(stage2_pid)", stage1_pid, -1); /* Send ACK. */ s = SYNC_RECVPID_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage1_pid, SIGKILL); - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with stage-1: write(SYNC_RECVPID_ACK)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with stage-1: write(SYNC_RECVPID_ACK)", + stage1_pid, stage2_pid); /* * Send both the stage-1 and stage-2 pids back to runc. @@ -914,35 +936,38 @@ void nsexec(void) len = dprintf(pipenum, "{\"stage1_pid\":%d,\"stage2_pid\":%d}\n", stage1_pid, stage2_pid); - if (len < 0) { - sane_kill(stage1_pid, SIGKILL); - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with runc: write(pid-JSON)"); - } + if (len < 0) + iobail(len, len, + "failed to sync with runc: write(pid-JSON)", + stage1_pid, stage2_pid); break; case SYNC_TIMEOFFSETS_PLS: write_log(DEBUG, "stage-1 requested timens offsets to be configured"); update_timens_offsets(stage1_pid, config.timensoffset, config.timensoffset_len); s = SYNC_TIMEOFFSETS_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage1_pid, SIGKILL); - bail("failed to sync with child: write(SYNC_TIMEOFFSETS_ACK)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with child: write(SYNC_TIMEOFFSETS_ACK)", + stage1_pid, -1); break; case SYNC_CHILD_FINISH: write_log(DEBUG, "stage-1 complete"); stage1_complete = true; + stage1_pid = -1; break; default: - bail("unexpected sync value: %u", s); + sane_kill(stage1_pid, SIGKILL); + sane_kill(stage2_pid, SIGKILL); + bailx("unexpected sync value: %u", s); } } write_log(DEBUG, "<- stage-1 synchronisation loop"); /* Now sync with grandchild. */ syncfd = sync_grandchild_pipe[1]; - if (close(sync_grandchild_pipe[0]) < 0) + if (close(sync_grandchild_pipe[0]) < 0) { + sane_kill(stage2_pid, SIGKILL); bail("failed to close sync_grandchild_pipe[0] fd"); + } write_log(DEBUG, "-> stage-2 synchronisation loop"); stage2_complete = false; @@ -951,21 +976,20 @@ void nsexec(void) write_log(DEBUG, "signalling stage-2 to run"); s = SYNC_GRANDCHILD; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with child: write(SYNC_GRANDCHILD)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with child: write(SYNC_GRANDCHILD)", -1, stage2_pid); - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with child: next state"); + xread(syncfd, &s, sizeof(s), "failed to sync with child: next state", -1, stage2_pid); switch (s) { case SYNC_CHILD_FINISH: write_log(DEBUG, "stage-2 complete"); stage2_complete = true; + stage2_pid = -1; break; default: - bail("unexpected sync value: %u", s); + sane_kill(stage2_pid, SIGKILL); + bailx("unexpected sync value: %u", s); } } write_log(DEBUG, "<- stage-2 synchronisation loop"); @@ -1048,15 +1072,15 @@ void nsexec(void) */ write_log(DEBUG, "request stage-0 to map user namespace"); s = SYNC_USERMAP_PLS; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); + xwrite(syncfd, &s, sizeof(s), + "failed to sync with parent: write(SYNC_USERMAP_PLS)", -1, -1); /* ... wait for mapping ... */ write_log(DEBUG, "waiting stage-0 to complete the mapping of user namespace"); - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); + xread(syncfd, &s, sizeof(s), + "failed to sync with parent: read(SYNC_USERMAP_ACK)", -1, -1); if (s != SYNC_USERMAP_ACK) - bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); + bailx("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); /* Revert temporary re-dumpable setting. */ if (config.namespaces) { @@ -1086,13 +1110,13 @@ void nsexec(void) write_log(DEBUG, "request stage-0 to write timens offsets"); s = SYNC_TIMEOFFSETS_PLS; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: write(SYNC_TIMEOFFSETS_PLS)"); + xwrite(syncfd, &s, sizeof(s), + "failed to sync with parent: write(SYNC_TIMEOFFSETS_PLS)", -1, -1); - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: read(SYNC_TIMEOFFSETS_ACK)"); + xread(syncfd, &s, sizeof(s), + "failed to sync with parent: read(SYNC_TIMEOFFSETS_ACK)", -1, -1); if (s != SYNC_TIMEOFFSETS_ACK) - bail("failed to sync with parent: SYNC_TIMEOFFSETS_ACK: got %u", s); + bailx("failed to sync with parent: SYNC_TIMEOFFSETS_ACK: got %u", s); } /* @@ -1112,31 +1136,23 @@ void nsexec(void) /* Send the child to our parent, which knows what it's doing. */ write_log(DEBUG, "request stage-0 to forward stage-2 pid (%d)", stage2_pid); s = SYNC_RECVPID_PLS; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); - } - if (write(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) { - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with parent: write(stage2_pid)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with parent: write(SYNC_RECVPID_PLS)", -1, stage2_pid); + xwrite(syncfd, &stage2_pid, sizeof(stage2_pid), + "failed to sync with parent: write(stage2_pid)", -1, stage2_pid); /* ... wait for parent to get the pid ... */ - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); - } + xread(syncfd, &s, sizeof(s), + "failed to sync with parent: read(SYNC_RECVPID_ACK)", -1, stage2_pid); if (s != SYNC_RECVPID_ACK) { sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); + bailx("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); } write_log(DEBUG, "signal completion to stage-0"); s = SYNC_CHILD_FINISH; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - sane_kill(stage2_pid, SIGKILL); - bail("failed to sync with parent: write(SYNC_CHILD_FINISH)"); - } + xwrite(syncfd, &s, sizeof(s), + "failed to sync with parent: write(SYNC_CHILD_FINISH)", -1, stage2_pid); /* Our work is done. [Stage 2: STAGE_INIT] is doing the rest of the work. */ write_log(DEBUG, "<~ nsexec stage-1"); @@ -1172,10 +1188,9 @@ void nsexec(void) prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0); write_log(DEBUG, "~> nsexec stage-2"); - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); + xread(syncfd, &s, sizeof(s), "failed to sync with parent: read(SYNC_GRANDCHILD)", -1, -1); if (s != SYNC_GRANDCHILD) - bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); + bailx("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); if (setsid() < 0) bail("setsid failed"); @@ -1193,8 +1208,7 @@ void nsexec(void) write_log(DEBUG, "signal completion to stage-0"); s = SYNC_CHILD_FINISH; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with parent: write(SYNC_CHILD_FINISH)"); + xwrite(syncfd, &s, sizeof(s), "failed to sync with parent: write(SYNC_CHILD_FINISH)", -1, -1); /* Close sync pipes. */ if (close(sync_grandchild_pipe[0]) < 0) @@ -1210,9 +1224,9 @@ void nsexec(void) } break; default: - bail("unexpected jump value"); + bailx("unexpected jump value"); } /* Should never be reached. */ - bail("should never be reached"); + bailx("should never be reached"); }