diff options
-rw-r--r-- | sandbox/.gitignore | 3 | ||||
-rw-r--r-- | sandbox/Makefile | 8 | ||||
-rw-r--r-- | sandbox/playground.c | 51 | ||||
-rw-r--r-- | sandbox/sandbox.c | 183 | ||||
-rw-r--r-- | sandbox/sandbox.h | 52 | ||||
-rw-r--r-- | sandbox/seccomp.c | 100 |
6 files changed, 397 insertions, 0 deletions
diff --git a/sandbox/.gitignore b/sandbox/.gitignore new file mode 100644 index 0000000..7fe4e0c --- /dev/null +++ b/sandbox/.gitignore @@ -0,0 +1,3 @@ +/testbox +/sandbox +/*.o diff --git a/sandbox/Makefile b/sandbox/Makefile new file mode 100644 index 0000000..5141daa --- /dev/null +++ b/sandbox/Makefile @@ -0,0 +1,8 @@ +CFLAGS += -std=c11 -D_GNU_SOURCE -D_FORTIFY_SOURCE=2 -ggdb3 -O0 -fPIE -fstack-protector-all \ + -Wall -Wextra -Wshadow -Wmissing-declarations -Wpointer-arith +LDLIBS = -lseccomp +LDFLAGS += -pie -Wl,-z,relro,-z,now + +#all: sandbox safe_runner +sandbox: sandbox.c seccomp.c playground.c +safe_runner: safe_runner.c diff --git a/sandbox/playground.c b/sandbox/playground.c new file mode 100644 index 0000000..3ea009e --- /dev/null +++ b/sandbox/playground.c @@ -0,0 +1,51 @@ +#include "sandbox.h" + +static char *upperdir = NULL; +static char *workdir = NULL; +static char *mergeddir = NULL; + +char * +poe_init_playground(const char *base, const char *env) +{ + struct stat s; + if (stat(POE_TEMPORARY_BASE, &s) == -1) { + if (mkdir(POE_TEMPORARY_BASE, 0755) == -1) ERROR("failed to create temporary base"); + } + + workdir = strdup(POE_WORKDIR_TEMPLATE); + if (!workdir || !mkdtemp(workdir)) ERROR("failed to create workdir"); + if (chmod(workdir, 0755) == -1) ERROR("failed to chmod workdir"); + upperdir = strdup(POE_UPPERDIR_TEMPLATE); + if (!upperdir || !mkdtemp(upperdir)) ERROR("failed to create upperdir"); + if (chmod(upperdir, 0755) == -1) ERROR("failed to chmod upperdir"); + mergeddir = strdup(POE_MERGEDDIR_TEMPLATE); + if (!mergeddir || !mkdtemp(mergeddir)) ERROR("failed to create mergeddir"); + if (chmod(mergeddir, 0755) == -1) ERROR("failed to chmod mergeddir"); + + char *opts = NULL; + if (asprintf(&opts, "lowerdir=%s:%s,upperdir=%s,workdir=%s", env, base, upperdir, workdir) == -1) + ERROR("asprintf() failed"); + if (mount(NULL, mergeddir, "overlay", MS_NOSUID, opts) == -1) + ERROR("mount overlay failed"); + + return mergeddir; +} + +void +poe_destroy_playground() +{ + struct stat s; + if (mergeddir) { + umount(mergeddir); + if (rmdir(mergeddir) != -1) fprintf(stderr, "failed remove mergeddir"); + free(mergeddir); + } + if (workdir && stat(workdir, &s) != -1) { + if (rmdir(workdir) != -1) fprintf(stderr, "failed remove workdir"); + free(workdir); + } + if (upperdir && stat(upperdir, &s) != -1) { + fprintf(stderr, "remove upperdir"); + free(upperdir); + } +} diff --git a/sandbox/sandbox.c b/sandbox/sandbox.c new file mode 100644 index 0000000..6315102 --- /dev/null +++ b/sandbox/sandbox.c @@ -0,0 +1,183 @@ +#include "sandbox.h" + +static void +child(const char *root, char *cmd[]) +{ + pid_t pid = (pid_t)syscall(SYS_getpid); + assert(pid == 1); + //TODO: check FDs + // die if parent dies + if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) == -1) ERROR("prctl(PR_SET_PDEATHSIG, SIGKILL) failed"); + + if (sethostname(POE_HOSTNAME, strlen(POE_HOSTNAME)) == -1) ERROR("sethostname() failed"); + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1) ERROR("mount / failed"); + if (mount(root, root, "bind", MS_BIND | MS_REC, NULL) == -1) ERROR("bind root failed"); + if (chroot(root) == -1) ERROR("chroot() failed"); + // if (mount(NULL, "/proc", "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL) == -1) ERROR("mount /proc failed"); + // if (mount(NULL, "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL) == -1) ERROR("mount /dev failed"); + // if (mount(NULL, "/dev/shm", "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount /dev/shm failed"); + if (mount(NULL, "/tmp", "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount /tmp failed"); + + struct passwd *pw = getpwnam(POE_USERNAME); + if (!pw) ERROR("getpwnam() failed"); + + if (mount(NULL, pw->pw_dir, "tmpfs", MS_NOSUID | MS_NODEV, NULL) == -1) ERROR("mount home failed"); + if (chdir(pw->pw_dir) == -1) ERROR("chdir() failed"); + if (setsid() == -1) ERROR("setsid() failed"); + if (initgroups(POE_USERNAME, pw->pw_gid) == -1) ERROR("initgroups() failed"); + if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) == -1) ERROR("setresgid() failed"); + if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) == -1) ERROR("setresuid() failed"); + + char path[] = "PATH=/usr/bin"; + char *env[] = {path, NULL, NULL, NULL, NULL}; + asprintf(env + 1, "HOME=%s", pw->pw_dir); + asprintf(env + 2, "USER=%s", POE_USERNAME); + asprintf(env + 3, "LOGNAME=%s", POE_USERNAME); + + // wait parent + if (kill(pid, SIGSTOP) == -1) ERROR("kill(self, SIGSTOP) failed"); + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) ERROR("ptctl(PR_SET_NO_NEW_PRIVS, 1) failed"); + poe_init_seccomp(SCMP_ACT_TRACE(0)); + + if (execvpe(cmd[0], cmd, env) == -1) err(EXIT_FAILURE, "execvpe() failed"); +} + +static inline long +get_arg(pid_t pid, int i) +{ + static const int regs[] = {RDI, RSI, RDX, R10, R8, R9}; + return ptrace(PTRACE_PEEKUSER, pid, sizeof(long) * (size_t)regs[i - 1]); +} + +static enum poe_handler_result +handle_syscall(pid_t pid, int syscalln) +{ + long arg1; + switch (syscalln) { + case SYS_write: + arg1 = get_arg(pid, 1); + if (arg1 == 1 || arg1 == 2) { + char *pp = (char *)get_arg(pid, 2); + int count = (int)get_arg(pid, 3); + char fd = (char)arg1; + write(1, (void *)&fd, sizeof(fd)); + write(1, (void *)&count, sizeof(count)); + for (int k = 0; k < count; k++, pp++) { + char d = (char)ptrace(PTRACE_PEEKDATA, pid, pp); + write(1, &d, 1); + } + ptrace(PTRACE_POKEUSER, pid, sizeof(long) * RAX, count); + return POE_HANDLED; + } + break; + default: + if (DEBUG) { + char *rule = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, syscalln); + if (!rule) ERROR("seccomp_syscall_resolve_num_arch() failed"); + fprintf(stderr, "syscall: %s\n", rule); + free(rule); + } + return POE_PROHIBITED; + } + return POE_ALLOWED; +} + +static void +result(uint32_t status, uint32_t signal) +{ + write(2, (void *)&status, sizeof(status)); + write(2, (void *)&signal, sizeof(signal)); + poe_destroy_playground(); + exit(0); +} + +static void +parent(const pid_t mpid, int sig_fd) +{ + long trace_flags = PTRACE_O_TRACECLONE | PTRACE_O_TRACEFORK | PTRACE_O_TRACESECCOMP | PTRACE_O_TRACEVFORK; + if (ptrace(PTRACE_SEIZE, mpid, NULL, trace_flags) == -1) ERROR("ptrace(PTRACE_SEIZE, ) failed"); + + while (true) { + struct signalfd_siginfo si; + ssize_t bytes_r = read(sig_fd, &si, sizeof(si)); + if (bytes_r == -1) ERROR("read(sig_fd, ) failed"); + if (si.ssi_signo != SIGCHLD) ERROR("parent: unexpected signal"); + + while (true) { + int status; + pid_t spid = waitpid(-mpid, &status, WNOHANG | __WALL); + if (spid == -1) ERROR("waitpid() failed"); + if (!spid) break; + + if (WIFEXITED(status) && spid == mpid) { + result(WEXITSTATUS(status), 0); + } else if (WIFSIGNALED(status)) { + result(0, WTERMSIG(status)); + } else if (WIFSTOPPED(status)) { + int e = status >> 16 & 0xff; + switch (e) { + case PTRACE_EVENT_SECCOMP: + errno = 0; + int syscalln = ptrace(PTRACE_PEEKUSER, spid, sizeof(long) * ORIG_RAX); + if (errno) ERROR("ptrace(PTRACE_PEEKUSER, ) failed"); + enum poe_handler_result ret = handle_syscall(spid, syscalln); + if (ret == POE_HANDLED) { + // cancel syscall + ptrace(PTRACE_POKEUSER, spid, sizeof(long) * ORIG_RAX, -1); + } else if (ret == POE_PROHIBITED) { + if (DEBUG) { + // implicitly prohibited syscall + kill(spid, SIGKILL); + char *rule = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, syscalln); + if (rule) fprintf(stderr, "#### prohibited syscall: %s ####", rule); + free(rule); + result(0, SIGSYS); + } + } + ptrace(PTRACE_CONT, spid, 0, 0); + break; + case PTRACE_EVENT_CLONE: + case PTRACE_EVENT_FORK: + case PTRACE_EVENT_VFORK: + ptrace(PTRACE_CONT, spid, 0, 0); + break; + default: + ptrace(PTRACE_CONT, spid, 0, WSTOPSIG(status)); + break; + } + } + } + } +} + +int +main(int argc, char *argv[]) +{ + if (argc < 4) { + ERROR("usage: %s baseroot envroot cmd...", program_invocation_short_name); + } + + const char *root = poe_init_playground(argv[1], argv[2]); + + sigset_t mask, omask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + sigprocmask(SIG_BLOCK, &mask, &omask); + int sig_fd = signalfd(-1, &mask, SFD_CLOEXEC); + if (sig_fd == -1) ERROR("signalfd() failed"); + + // TODO: CLONE_NEWUSER? require CONFIG_USER_NS=y + pid_t pid = (pid_t)syscall(SYS_clone, SIGCHLD | CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET, 0); + if (pid == -1) { + ERROR("clone() failed"); + } + if (pid == 0) { + sigprocmask(SIG_SETMASK, &omask, NULL); + child(root, argv + 3); + } else { + parent(pid, sig_fd); + } + + ERROR("unreachable"); +} diff --git a/sandbox/sandbox.h b/sandbox/sandbox.h new file mode 100644 index 0000000..88a99ca --- /dev/null +++ b/sandbox/sandbox.h @@ -0,0 +1,52 @@ +#ifndef __x86_64__ +# error unsupported +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <errno.h> +#include <pwd.h> +#include <grp.h> +#include <sched.h> +#include <seccomp.h> +#include <assert.h> +#include <signal.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include <sys/ptrace.h> +#include <sys/signalfd.h> +#include <sys/reg.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <sys/stat.h> +#include <sys/types.h> + +#define DEBUG true +#define POE_USERNAME "nobody" +#define POE_HOSTNAME "poe-sandbox" + +#define POE_LOWERDIR "/" +#define POE_TEMPORARY_BASE "/tmp/poe" +#define POE_UPPERDIR_TEMPLATE POE_TEMPORARY_BASE "/upperXXXXXX" +#define POE_WORKDIR_TEMPLATE POE_TEMPORARY_BASE "/workXXXXXX" +#define POE_MERGEDDIR_TEMPLATE POE_TEMPORARY_BASE "/mergedXXXXXX" + +#define ERROR(...) do {\ + fprintf(stderr, __VA_ARGS__);\ + if (syscall(SYS_getpid) != 1) poe_destroy_playground();\ + exit(1);\ +} while (false) + +enum poe_handler_result { + POE_PROHIBITED, + POE_HANDLED, + POE_ALLOWED +}; + +void poe_init_seccomp(uint32_t); + +char * poe_init_playground(const char *, const char *); +void poe_destroy_playground(); diff --git a/sandbox/seccomp.c b/sandbox/seccomp.c new file mode 100644 index 0000000..f5d0e37 --- /dev/null +++ b/sandbox/seccomp.c @@ -0,0 +1,100 @@ +#include "sandbox.h" + +struct syscall_rule { + int syscall; + uint32_t action; +}; +#define RULE(name, action) { SCMP_SYS(name), SCMP_ACT_##action } +static const struct syscall_rule syscall_rules[] = { + RULE(ptrace, ERRNO(EPERM)), + RULE(prctl, ERRNO(EPERM)), + RULE(execve, ALLOW), + RULE(clone, ALLOW), + RULE(vfork, ALLOW), + RULE(wait4, ALLOW), + RULE(dup, ALLOW), + RULE(dup2, ALLOW), + RULE(capget, ALLOW), + RULE(kill, ALLOW), + + // safe + RULE(futex, ALLOW), + RULE(exit, ALLOW), + RULE(pipe2, ALLOW), + RULE(brk, ALLOW), + RULE(mmap, ALLOW), + RULE(mprotect, ALLOW), + RULE(munmap, ALLOW), + RULE(mremap, ALLOW), + RULE(madvise, ALLOW), + RULE(rt_sigaction, ALLOW), + RULE(rt_sigprocmask, ALLOW), + RULE(rt_sigreturn, ALLOW), + RULE(nanosleep, ALLOW), + RULE(getrlimit, ALLOW), + RULE(poll, ALLOW), + RULE(exit_group, ALLOW), + RULE(getpid, ALLOW), + RULE(getuid, ALLOW), + RULE(getgid, ALLOW), + RULE(geteuid, ALLOW), + RULE(getegid, ALLOW), + RULE(getresuid, ALLOW), + RULE(getresgid, ALLOW), + RULE(gettimeofday, ALLOW), + RULE(clock_gettime, ALLOW), + RULE(set_tid_address, ALLOW), + RULE(getdents, ALLOW), + RULE(arch_prctl, ALLOW), + RULE(set_robust_list, ALLOW), + RULE(get_robust_list, ALLOW), + RULE(sigaltstack, ALLOW), + RULE(uname, ALLOW), // dummy? /proc/sys/kernel/*? + RULE(getcwd, ALLOW), + RULE(getppid, ALLOW), + RULE(getpgrp, ALLOW), + + // ???? + RULE(socket, ERRNO(ENOSYS)), + RULE(utimensat, ALLOW), + RULE(futimesat, ALLOW), + RULE(getxattr, ALLOW), + + // ??? + RULE(fadvise64, ALLOW), + RULE(readlink, ALLOW), + RULE(open, ALLOW), + RULE(openat, ALLOW), + RULE(stat, ALLOW), + RULE(close, ALLOW), + RULE(read, ALLOW), + RULE(readv, ALLOW), + RULE(pread64, ALLOW), + RULE(write, TRACE(1)), + RULE(writev, ALLOW), + RULE(pwrite64, ALLOW), + RULE(lstat, ALLOW), + RULE(fstat, ALLOW), + RULE(fcntl, ALLOW), + RULE(ioctl, ALLOW), + RULE(lseek, ALLOW), + RULE(access, ALLOW), +}; +static const int syscall_rules_count = sizeof(syscall_rules) / sizeof(struct syscall_rule); + +void +poe_init_seccomp(uint32_t act) +{ + scmp_filter_ctx ctx = seccomp_init(act); + if (!ctx) ERROR("seccomp_init() failed"); + + for (int i = 0; i < syscall_rules_count; i++) { + struct syscall_rule rule = syscall_rules[i]; + if (seccomp_rule_add(ctx, rule.action, rule.syscall, 0) < 0) ERROR("seccomp_rule_add() failed"); + } + + int rc = seccomp_load(ctx); + if (rc < 0) ERROR("seccomp_load() failed: %s", strerror(-rc)); + + seccomp_release(ctx); +} |