diff options
Diffstat (limited to 'sandbox/main.c')
-rw-r--r-- | sandbox/main.c | 438 |
1 files changed, 298 insertions, 140 deletions
diff --git a/sandbox/main.c b/sandbox/main.c index 44b1742..2e225c6 100644 --- a/sandbox/main.c +++ b/sandbox/main.c @@ -1,8 +1,9 @@ #include "sandbox.h" +#include <getopt.h> static struct timespec start_timespec = { 0 }; -static noreturn void finish(enum poe_exit_reason reason, int status, const char *fmt, ...) +static _Noreturn void finish(enum poe_exit_reason reason, int status, const char *fmt, ...) { if (!start_timespec.tv_sec && !start_timespec.tv_nsec) bug("start_timespec not set?"); @@ -23,175 +24,332 @@ static noreturn void finish(enum poe_exit_reason reason, int status, const char exit(0); } -static void handle_stdout(int fd, int orig_fd) +static void handle_stdout(int in_fd, int fd) { - assert(PIPE_BUF % 4 == 0); - uint32_t buf[PIPE_BUF / 4 + 2]; - ssize_t n = read(fd, (char *)(buf + 2), PIPE_BUF); - if (n < 0) - bug("read from stdout/err pipe"); - buf[0] = (uint32_t)orig_fd; - buf[1] = (uint32_t)n; - if (write(STDOUT_FILENO, buf, n + 8) < 0) - bug("write to stdout failed"); + unsigned char buf[4 + 4 + PIPE_BUF]; + ssize_t n; + uint32_t un; + + n = read(in_fd, buf + 8, PIPE_BUF); + if (n > 0) { + buf[0] = (fd >> 0) & 0xff; + buf[1] = (fd >> 8) & 0xff; + buf[2] = (fd >> 16) & 0xff; + buf[3] = (fd >> 24) & 0xff; + + un = (uint32_t)n; + buf[4] = (un >> 0) & 0xff; + buf[5] = (un >> 8) & 0xff; + buf[6] = (un >> 16) & 0xff; + buf[7] = (un >> 24) & 0xff; + + if (write(1, buf, 8 + n) != 8 + n) + die("write failed"); + } } -static void handle_signal(pid_t mpid, struct signalfd_siginfo *si) +static void handle_signalfd(pid_t child_pid, int fd) { - if (si->ssi_signo == SIGINT || si->ssi_signo == SIGTERM || - si->ssi_signo == SIGHUP) - finish(POE_TIMEDOUT, -1, "Supervisor terminated"); - if (si->ssi_signo != SIGCHLD) - bug("unknown signal %d", si->ssi_signo); - + struct signalfd_siginfo si; int status; pid_t spid; - while ((spid = waitpid(-mpid, &status, WNOHANG | __WALL)) > 0) { - if (spid == mpid && WIFEXITED(status)) { + long syscalln; + + CHECK(read(fd, &si, sizeof(si) != sizeof(si))); + if (si.ssi_signo == SIGINT || + si.ssi_signo == SIGTERM || + si.ssi_signo == SIGHUP) + finish(POE_TIMEDOUT, -1, "Supervisor terminated"); + + CHECK(si.ssi_signo == SIGCHLD); + while ((spid = waitpid(-child_pid, &status, WNOHANG | __WALL)) > 0) { + if (spid == child_pid && WIFEXITED(status)) finish(POE_SUCCESS, WEXITSTATUS(status), NULL); - } else if (spid == mpid && WIFSIGNALED(status)) { + if (spid == child_pid && WIFSIGNALED(status)) { int sig = WTERMSIG(status); finish(POE_SIGNALED, -1, "Program terminated with signal %d (%s)", sig, strsignal(sig)); - } else if (WIFSTOPPED(status)) { - switch (status >> 16 & 0xff) { - case PTRACE_EVENT_SECCOMP: - errno = 0; - int syscalln = ptrace(PTRACE_PEEKUSER, spid, sizeof(long) * ORIG_RAX); - if (errno) - bug("ptrace(PTRACE_PEEKUSER) failed"); - char *name = poe_seccomp_syscall_resolve(syscalln); - finish(POE_SIGNALED, -1, "System call %s is blocked", name); - break; - case PTRACE_EVENT_CLONE: - case PTRACE_EVENT_FORK: - case PTRACE_EVENT_VFORK: - ptrace(PTRACE_CONT, spid, 0, 0); - break; - default: - ptrace(PTRACE_CONT, spid, 0, WSTOPSIG(status)); - break; - } + } + if (!WIFSTOPPED(status)) + continue; + + switch (status >> 16 & 0xff) { + case PTRACE_EVENT_SECCOMP: + errno = 0; + syscalln = ptrace(PTRACE_PEEKUSER, spid, 8 * ORIG_RAX); + if (errno) + die("ptrace(PTRACE_PEEKUSER) failed: %s", + strerror(errno)); + finish(POE_SIGNALED, -1, "System call %s is blocked", + poe_seccomp_syscall_resolve(syscalln)); + case PTRACE_EVENT_CLONE: + case PTRACE_EVENT_FORK: + case PTRACE_EVENT_VFORK: + ptrace(PTRACE_CONT, spid, 0, 0); + break; + default: + ptrace(PTRACE_CONT, spid, 0, WSTOPSIG(status)); + break; } } - if (spid < 0) { - if (errno == ECHILD) - bug("child dies too early (before raising SIGSTOP)"); - else - bug("waitpid failed"); - } + if (spid < 0) + die("waitpid failed: %s", strerror(errno)); } -int main(int argc, char *argv[]) +static int do_epoll_add(int epfd, int fd) { - if (argc < 5) - die("usage: runner basedir overlaydir sourcefile cmdl.."); + struct epoll_event event = { + .events = EPOLLRDHUP | EPOLLIN, + .data.fd = fd + }; - struct playground *pg = poe_playground_init(argv[1], argv[2]); - if (!pg) - die("playground init failed"); - if (poe_playground_init_command_line(pg, argv + 4, argv[3])) - die("copy program failed"); + return epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &event); +} +static int do_spawn(struct playground *pg, struct cgroups *cg, char **cmdl) +{ int stdout_fd[2], stderr_fd[2], child_fd[2]; - if (pipe2(stdout_fd, O_DIRECT)) - bug("pipe2 failed"); - if (pipe2(stderr_fd, O_DIRECT)) - bug("pipe2 failed"); - if (pipe2(child_fd, O_DIRECT | O_CLOEXEC)) - bug("pipe2 failed"); + int pid; - // init cgroup: create root hierarchy and setup controllers - if (poe_cgroup_init()) - die("failed to init cgroup"); + if (pipe2(stdout_fd, O_DIRECT) || + pipe2(stderr_fd, O_DIRECT) || + pipe2(child_fd, O_DIRECT | O_CLOEXEC)) { + error("pipe2 failed"); + return -1; + } // TODO: CLONE_NEWUSER - pid_t pid = (pid_t)syscall(SYS_clone, SIGCHLD | CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET, 0); - if (pid < 0) - bug("clone failed"); - if (!pid) { - poe_child_do(pg, stdout_fd, stderr_fd, child_fd); - bug("unreachable"); + pid = (int)syscall(__NR_clone, SIGCHLD | + CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWPID | + CLONE_NEWUTS | CLONE_NEWNET, 0); + if (pid < 0) { + error("clone failed"); + return -1; } + if (pid) { + char buf[PIPE_BUF]; + ssize_t n; + sigset_t mask; + int signal_fd, timer_fd, epoll_fd; + struct itimerspec itspec = { + /* FIXME: make it configurable */ + .it_value.tv_sec = POE_TIME_LIMIT + }; + + /* Close the write-side of fds */ + if (close(stdout_fd[1]) || close(stderr_fd[1]) || close(child_fd[1])) { + error("close failed"); + goto bailout; + } + + /* limit memory, cpu and processes */ + if (poe_cgroups_add(cg, pid)) + goto bailout; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); /* ptrace */ + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGTERM); + sigaddset(&mask, SIGHUP); + sigprocmask(SIG_BLOCK, &mask, NULL); + signal_fd = signalfd(-1, &mask, 0); + if (signal_fd < 0) { + error("signalfd failed: %s", strerror(errno)); + goto bailout; + } + + timer_fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (timer_fd < 0 || + timerfd_settime(timer_fd, 0, &itspec, NULL)) { + error("timerfd failed: %s", strerror(errno)); + goto bailout; + } + + epoll_fd = epoll_create1(0); + if (epoll_fd < 0 || + do_epoll_add(epoll_fd, signal_fd) || + do_epoll_add(epoll_fd, timer_fd) || + do_epoll_add(epoll_fd, stdout_fd[0]) || + do_epoll_add(epoll_fd, stderr_fd[0])) { + error("epollfd failed: %s", strerror(errno)); + goto bailout; + } + + if (ptrace(PTRACE_SEIZE, pid, NULL, + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEFORK | + PTRACE_O_TRACESECCOMP | + PTRACE_O_TRACEVFORK)) { + error("ptrace failed: %s", strerror(errno)); + goto bailout; + } + + n = read(child_fd[0], buf, sizeof(buf)); + if (n != 0) { + if (n < 0) + error("read from child failed: %s", strerror(errno)); + /* + * Something happened in the child process. Read the + * message and kill the process. + * + * NUL-terminated string is expected. + */ + error("error from child: %s", buf); + goto bailout; + } else { + /* The write-side of the pipe is closed by exec() */ + if (clock_gettime(CLOCK_MONOTONIC, &start_timespec)) { + error("clock_gettime failed: %s", strerror(errno)); + goto bailout; + } + } + + while (true) { + struct epoll_event events[10], *ev; + int n; + + n = epoll_wait(epoll_fd, events, numberof(events), -1); + if (n < 0) { + error("epoll_wait failed: %s", strerror(errno)); + goto bailout; + } - if (close(stdout_fd[1]) || close(stderr_fd[1]) || close(child_fd[1])) - bug("close child write pipe failed"); - - int epoll_fd = epoll_create1(0); - if (epoll_fd < 0) - bug("epoll_create1 failed"); - - sigset_t mask; - sigemptyset(&mask); - sigaddset(&mask, SIGCHLD); - sigaddset(&mask, SIGINT); - sigaddset(&mask, SIGTERM); - sigaddset(&mask, SIGHUP); - sigprocmask(SIG_BLOCK, &mask, NULL); - int signal_fd = signalfd(-1, &mask, 0); - if (signal_fd < 0) - bug("signalfd failed"); - - int timer_fd = timerfd_create(CLOCK_MONOTONIC, 0); - if (timer_fd < 0) - bug("timerfd_create failed"); - if (timerfd_settime(timer_fd, 0, &(struct itimerspec) { .it_value.tv_sec = POE_TIME_LIMIT }, NULL)) - bug("timerfd_settime failed"); - -#define ADD(_fd__) do if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, _fd__, &(struct epoll_event) { .data.fd = _fd__, .events = EPOLLRDHUP|EPOLLIN })) \ - bug("EPOLL_CTL_ADD failed"); while (0) - ADD(signal_fd); - ADD(timer_fd); - ADD(child_fd[0]); - ADD(stdout_fd[0]); - ADD(stderr_fd[0]); - - if (ptrace(PTRACE_SEIZE, pid, NULL, - PTRACE_O_TRACECLONE | - PTRACE_O_TRACEFORK | - PTRACE_O_TRACESECCOMP | - PTRACE_O_TRACEVFORK)) - bug("ptrace failed"); - - if (poe_cgroup_add(pid)) - die("failed cgroup add"); - - while (true) { - struct epoll_event events[10]; - int n = epoll_wait(epoll_fd, events, sizeof(events) / sizeof(events[0]), -1); - if (n < 0) - bug("epoll_wait failed"); - - for (int i = 0; i < n; i++) { - struct epoll_event *ev = &events[i]; - if (ev->events & EPOLLIN) { + for (ev = events; ev < events + n; ev++) { + if (!(ev->events & EPOLLIN)) { + error("unknown event from epoll"); + goto bailout; + } if (ev->data.fd == stdout_fd[0]) { - handle_stdout(ev->data.fd, STDOUT_FILENO); + handle_stdout(ev->data.fd, 1); } else if (ev->data.fd == stderr_fd[0]) { - handle_stdout(ev->data.fd, STDERR_FILENO); + handle_stdout(ev->data.fd, 2); } else if (ev->data.fd == signal_fd) { - struct signalfd_siginfo si; - if (sizeof(si) != read(signal_fd, &si, sizeof(si))) - die("partial read signalfd"); - handle_signal(pid, &si); + handle_signalfd(pid, signal_fd); } else if (ev->data.fd == timer_fd) { finish(POE_TIMEDOUT, -1, NULL); - } else if (ev->data.fd == child_fd[0]) { - char buf[PIPE_BUF]; - ssize_t nx = read(child_fd[0], buf, sizeof(buf)); - if (nx > 0) // TODO - die("child err: %s", strndupa(buf, nx)); + } else { + error("event for unknown fd from epoll"); + goto bailout; } } - if (ev->events & EPOLLERR || ev->events & EPOLLHUP || ev->events & EPOLLRDHUP) { - // fd closed - close(ev->data.fd); - if (ev->data.fd == child_fd[0]) - // exec succeeded - if (clock_gettime(CLOCK_MONOTONIC, &start_timespec)) - bug("clock_gettime failed"); - } } + return 0; + +bailout: + (void)close(child_fd[0]); + /* Kill the child process(es) and report failure */ + kill(pid, SIGKILL); + while (waitpid(-1, NULL, __WALL) != -1); + return -1; + } else { + /* Close the read-size fds */ + CHECK(!close(stdout_fd[0])); + CHECK(!close(stderr_fd[0])); + CHECK(!close(child_fd[0])); + /* Replace stdout and stderr */ + CHECK(dup2(stdout_fd[1], 1) == 1); + CHECK(dup2(stderr_fd[1], 2) == 2); + /* Close the original write-side fds */ + CHECK(!close(stdout_fd[1])); + CHECK(!close(stderr_fd[1])); + + /* Setup environemnt and do exec() */ + poe_child_do(pg, child_fd[1]); } - bug("unreachable"); + UNREACHABLE(); +} + +static void print_usage(void) +{ + /* FIXME */ +} + +int main(int argc, char **argv) +{ + const char *basedir = NULL, *overlaydir = NULL, *tmpdir = NULL; + list(const char *) copyfiles = LIST_INIT; + char **commandline; + struct playground *pg = NULL; + struct cgroups *cg = NULL; + int ret = 1; + + while (1) { + int c, lindex; + static const struct option opts[] = { + { "help", no_argument, NULL, 'h' }, + { "basedir", required_argument, NULL, 'b' }, + { "overlaydir", required_argument, NULL, 'o' }, + { "tmpdir", required_argument, NULL, 't' }, + { "copy", required_argument, NULL, 'c' }, + { NULL, 0, NULL, 0 } + }; + + c = getopt_long(argc, argv, "hb:o:c:", opts, &lindex); + if (c == -1) + break; + switch (c) { + case 'h': + print_usage(); + return 0; + case 'b': + basedir = optarg; + break; + case 'o': + overlaydir = optarg; + break; + case 't': + tmpdir = optarg; + break; + case 'c': + list_append(©files, optarg); + break; + default: + print_usage(); + return 1; + } + } + + if (optind == argc) + die("commands not specified"); + commandline = argv + optind; + + if (!basedir) + die("option --basedir not specified"); + if (!tmpdir) + die("option --tmpdir not specified"); + + /* initialize playground */ + pg = poe_playground_init(basedir, overlaydir, tmpdir); + if (!pg) { + error("playground could not be setup"); + goto err; + } + if (poe_playground_copy_files(pg, copyfiles.len, copyfiles.ptr)) { + list_free0(©files); + error("files could not be copied to playground"); + goto err; + } + list_free0(©files); + + /* initialize cgroups */ + cg = poe_cgroups_init(tmpdir); + if (!cg) { + error("cgroups could not be setup"); + goto err; + } + + /* setup child process and limit privileges */ + if (do_spawn(pg, cg, commandline)) { + error("child process could not be started"); + goto err; + } + + ret = 0; + +err: + if (pg) + poe_playground_destroy(pg); + if (cg) + poe_cgroups_destroy(cg); + return ret; } |